<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2025.1734247</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Applying traditional and machine learning-based GWAS approaches for marker-trait identification in wheat</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Milek</surname><given-names>Joel Joshua</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3257142/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Michel</surname><given-names>Sebastian</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/591978/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Buchelt</surname><given-names>Alexander</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Holzinger</surname><given-names>Andreas</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3198553/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Molin</surname><given-names>Eva Maria</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1219282/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Unit Bioresources, Center for Health &amp; Bioresources, AIT Austrian Institute of Technology</institution>, <city>Tulln</city>,&#xa0;<country country="at">Austria</country></aff>
<aff id="aff2"><label>2</label><institution>Human-Centered AI Lab, Institute of Forest Engineering, Department of Ecosystem Management, Climate and Biodiversity, BOKU University</institution>, <city>Vienna</city>,&#xa0;<country country="at">Austria</country></aff>
<aff id="aff3"><label>3</label><institution>Department of Agrobiotechnology, IFA-Tulln, University of Natural Resources and Life Sciences Vienna</institution>, <city>Tulln</city>,&#xa0;<country country="at">Austria</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Eva Maria Molin, <email xlink:href="mailto:eva-maria.molin@ait.ac.at">eva-maria.molin@ait.ac.at</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-28">
<day>28</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1734247</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>16</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Milek, Michel, Buchelt, Holzinger and Molin.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Milek, Michel, Buchelt, Holzinger and Molin</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-28">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Complex traits arise from polygenic and interactive genomic architectures that are difficult to resolve using traditional genome-wide association study (GWAS) approaches. Machine learning (ML) provides complementary methods capable of capturing non-linear effects, improving signal detection, and enhancing predictive accuracy of marker trait associations (MTAs).</p>
</sec>
<sec>
<title>Methods</title>
<p>Using a publicly available winter wheat dataset (CIMMYT), we evaluated several widely used traditional GWAS tools, including GAPIT, GCTA, GEMMA, sommer, and TASSEL, with respect to computational efficiency, model performance, and the consistency of detected associations. In parallel, ML approaches, such as Elastic Net, Extreme Gradient Boosting (XGBoost), Random Forest, and the hybrid TSLRF model, were assessed based on feature importance metrics and functional annotation of selected markers.</p>
</sec>
<sec>
<title>Results</title>
<p>Despite a shared reliance on mixed linear models, the traditional GWAS tools exhibited differences in runtime and showed modest but meaningful variability in the number and overlap of MTAs. ML models recovered several associations detected by traditional methods and additionally identified novel markers, potentially reflecting non-linear or epistatic effects.</p>
</sec>
<sec>
<title>Discussion</title>
<p>Our findings demonstrate that ML can effectively complement traditional GWAS approaches for marker-trait identification in wheat. By extending beyond additive effects, ML broadens the scope of detectable genetic signals, providing a practical way to analyze complex traits and support informed marker-assisted breeding strategies.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>GWAS</kwd>
<kwd>plant height</kwd>
<kwd>thousand kernel weight</kwd>
<kwd>tool comparison</kwd>
<kwd><italic>Triticum aestivum</italic></kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by Amt der Nieder&#xf6;sterreichischen Landesregierung, N&#xd6; Wirtschafts- und Tourismusfonds (Grant numbers WST3-F-5030665/018&#x2013;2022 and WST3-F-5030665/019-2022) through the d4agrotech initiative (https://www.d4agrotech.at/).</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="118"/>
<page-count count="20"/>
<word-count count="11841"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Plant Breeding</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Genome-wide association studies (GWAS) have significantly advanced plant genetics and breeding by enabling the identification of genetic variants associated with complex agronomic traits, such as yield, stress tolerance, and disease resistance. With the rise of high-throughput genotyping and the availability of reference genomes, GWAS became a widely adopted approach by the 2010s, particularly in major crops, such as wheat (<xref ref-type="bibr" rid="B78">Saini et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B82">Sehgal and Dreisigacker, 2022</xref>). Its application has accelerated marker-assisted selection (MAS) and facilitated the integration of genomic data into modern wheat breeding programs (<xref ref-type="bibr" rid="B44">Korte and Farlow, 2013</xref>). As one of the world&#x2019;s most widely grown cereals and a staple food for over one-third of the global population, wheat is a key component of global food security. However, wheat is particularly vulnerable to abiotic stresses such as drought and heat during critical developmental stages like flowering and grain filling, and is also severely affected by a range of pests and diseases (<xref ref-type="bibr" rid="B27">Figueroa et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B83">Senapati et&#xa0;al., 2021</xref>). These biotic and abiotic stressors are projected to intensify in frequency and severity under future climate change scenarios, posing a significant threat to yield stability affecting global wheat production. Thus, MAS, which relies on genetic markers derived from GWAS, remains fundamental to breeding programs aiming to improve stress resilience and ensure yield stability in adverse environmental conditions (<xref ref-type="bibr" rid="B16">Chang-Brahim et&#xa0;al., 2024</xref>).</p>
<p>Traditionally, GWAS rely on statistical models to detect associations between genetic markers and phenotypic traits while correcting for confounding effects such as population structure and relatedness. Early approaches used general linear models (GLMs), which are computationally simple but prone to false positives due to population stratification (<xref ref-type="bibr" rid="B112">Yu et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B73">Price et&#xa0;al., 2006</xref>). Mixed linear models (MLMs) addressed this limitation by incorporating fixed effects for population structure (Q) and random effects for kinship (K), becoming a widely adopted standard despite their high computational cost for large datasets (<xref ref-type="bibr" rid="B112">Yu et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B98">Wang and Zhang, 2021</xref>). Efficiency improvements led to compressed MLMs (CMLMs), which estimate kinship at the group level (<xref ref-type="bibr" rid="B115">Zhang et&#xa0;al., 2010</xref>), and to the SUPER method, which computes kinship using a subset of trait-associated markers to enhance power (<xref ref-type="bibr" rid="B97">Wang et&#xa0;al., 2014</xref>). Single-locus models, however, often fail to capture the polygenic architecture of complex traits (<xref ref-type="bibr" rid="B81">Segura et&#xa0;al., 2012</xref>). Multi-locus approaches such as MLMM and FarmCPU improve detection power by iteratively including significant markers, although FarmCPU remains computationally demanding for large populations (<xref ref-type="bibr" rid="B81">Segura et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B52">Liu et&#xa0;al., 2016</xref>). BLINK further improves efficiency by replacing kinship-based random effects with a Bayesian Information Criterion&#x2013;based fixed-effect framework, achieving higher power and speed without assuming uniform genomic distribution of causal variants (<xref ref-type="bibr" rid="B39">Huang et&#xa0;al., 2019</xref>). Simulation studies consistently rank BLINK among the most powerful GWAS models (GAPIT manual). Despite these advances, traditional GWAS models assume linear, additive effects and struggle with high-dimensional, collinear genomic data, limiting their ability to capture dominance, epistasis, and non-linear interactions and contributing to the problem of missing heritability (<xref ref-type="bibr" rid="B101">Witte, 2010</xref>; <xref ref-type="bibr" rid="B44">Korte and Farlow, 2013</xref>; <xref ref-type="bibr" rid="B92">Tam et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B56">L&#xf3;pez-Cortegano and Caballero, 2019</xref>; <xref ref-type="bibr" rid="B26">Enoma et&#xa0;al., 2022</xref>).</p>
<p>Within artificial intelligence (AI), machine learning (ML) offers promising solutions to these challenges. Unlike traditional GWAS models, algorithms such as Random Forest (RF) and XGBoost (XGB) use ensembles of decision trees to automatically capture non-linear marker effects and complex epistatic interactions, while providing robust feature-importance rankings and handling high-dimensional marker data without strict parametric assumptions (<xref ref-type="bibr" rid="B35">Guo et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B25">Elgart et&#xa0;al., 2022</xref>). Penalized-regression approaches like GLMnet, that apply elastic-net regularization to perform simultaneous feature selection and coefficient shrinkage, can mitigate overfitting and multicollinearity by efficiently identifying informative SNPs in large GWAS datasets (<xref ref-type="bibr" rid="B19">Cho et&#xa0;al., 2009</xref>; <xref ref-type="bibr" rid="B14">B&#xfc;hlmann and Van De Geer, 2011</xref>). Especially in human genetics, ML-based and, more recently, deep learning-based GWAS approaches have been employed for epistasis detection, SNP marker prioritization, and SNP discovery (<xref ref-type="bibr" rid="B4">Arloth et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B32">Ghose et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B66">Mieth et&#xa0;al., 2016</xref>, <xref ref-type="bibr" rid="B67">2021</xref>). Coming back to plant genetics, while ML&#x2010;based genomic prediction has seen widespread success in crop improvement (<xref ref-type="bibr" rid="B34">Grinberg et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B105">Yan et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B33">Gill et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B84">Sirsat et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B71">Montesinos-L&#xf3;pez et&#xa0;al., 2024</xref>), ML&#x2010;mediated GWAS remains relatively underexplored. For example, <xref ref-type="bibr" rid="B111">Yoosefzadeh-Najafabadi et&#xa0;al. (2022)</xref> applied Support Vector Machines (SVM) and RF to dissect yield and yield&#x2010;component traits in soybean, successfully uncovering quantitative trait loci (QTL) that co&#x2010;localized with those reported in previous studies. <xref ref-type="bibr" rid="B54">Liu et&#xa0;al. (2019)</xref> extended this work by using deep learning to predict phenotypes and assess genotypic contributions in soybean, demonstrating that their approach efficiently identifies significant SNPs from GWAS data. For wheat, recent studies have used ML approaches to extract top-ranking SNPs (<xref ref-type="bibr" rid="B85">Song et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B99">Wang et&#xa0;al., 2025</xref>). However, at the time of writing, only one study presented a ML-based GWAS approach, including empirical cutoff, to detect genetic regions associated with cuticular wax ester biosynthesis and early maturity (<xref ref-type="bibr" rid="B93">Tekeu et&#xa0;al., 2023</xref>). These examples demonstrate that ML has the potential to complement or even enhance traditional GWAS by leveraging the capabilities of ML algorithms, revealing previously undetected genetic associations and advancing our understanding of complex traits, thereby accelerating plant breeding.</p>
<p>Despite the growing number of tools and methods available, ranging from traditional ML algorithms to advanced deep learning models, systematic comparisons of their performance, robustness, and biological relevance in plant GWAS are still scarce. An early study evaluated a range of ML algorithms on human genetic data, demonstrating their ability to capture complex genetic architectures (<xref ref-type="bibr" rid="B90">Szymczak et&#xa0;al., 2009</xref>). More recently, <xref ref-type="bibr" rid="B33">Gill et&#xa0;al. (2022)</xref> compared tree classifiers (XGB, RF) with deep learning methods on plant data, revealing that traditional ML algorithms can outperform deep learning. Notably, they observed that XGB and traditional GWAS identified overlapping genomic regions for soybean traits, underscoring the potential of ML to complement traditional approaches. Yet, comprehensive evaluations that assess the strengths and limitations of these methods especially in the context of crop genomics across different plant species, traits, and genomic architectures are lacking. This gap hinders the broader adoption of ML-based GWAS in plant breeding and limits the ability to draw generalizable conclusions about their utility.</p>
<p>This study therefore aims to bridge this gap by providing a systematic, multi-metric comparison of selected traditional GWAS tools and ML algorithms. Our selection encompasses widely used GWAS tools (e.g., GAPIT, sommer, GCTA, GEMMA, and TASSEL) implementing linear models, which we compared to established ML approaches (such as EN, RF, SVM, and XGB) that have already found application in GWAS. For this comparative analysis, we focused on the two traits plant height (PH) and thousand kernel weight (TKW), two key traits in wheat breeding. Unlike prior comparisons, we evaluate not only computational efficiency and MTA detection but also biological relevance, model interpretability, and scalability across a real-world wheat dataset (CIMMYT; <xref ref-type="bibr" rid="B41">Juliana et&#xa0;al., 2019</xref>). By linking associated genes with functional gene annotation, we seek to evaluate how ML approaches can enrich conventional GWAS pipelines, reveal additional candidate loci, and provide interpretable metrics of marker relevance. Our central hypothesis is that ML models, when properly tuned and combined with robust feature-selection approaches, can (i) detect MTAs overlooked by conventional GWAS, (ii) offer stable, biologically meaningful estimates of marker relevance, and (iii) enhance the interpretability and resolution of trait-gene relationships. Ultimately, this application-driven comparative analysis aims to illustrate the potential of ML to enhance trait dissection and to support breeding strategies for complex traits in wheat.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Data and preprocessing</title>
<p>To conduct our comparative study, we used a large dataset widely recognized in the field of crop genetics, published by <xref ref-type="bibr" rid="B41">Juliana et&#xa0;al. (2019)</xref>. This dataset, provided by CIMMYT (International Maize and Wheat Improvement Center, Mexico), includes a total of 44,400 wheat lines for which 78,606 SNPs were identified through genotyping-by-sequencing (GBS) and 50 phenotypic traits were evaluated. After thoroughly examining the CIMMYT dataset, we created two subsets for our analysis (<xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1A</bold></xref>): the first subset was selected for the trait plant height (PH) including 7,887 individuals evaluated in first-year yield trials conducted in Ciudad Obreg&#xf3;n, Mexico, during the 2014&#x2013;2015 season, with two replicates, resembling Juliana et&#xa0;al.&#x2019;s panel 4. The second subset targeted the trait thousand kernel weight (TKW) and comprised 3,478 individuals derived from multiple trials evaluated in Ciudad Obreg&#xf3;n, Mexico: 766 lines from the elite yield trial (EYT) 2013-2014, 775 lines from EYT 2014-2015, 964 lines from EYT 2015-2016, and 980 lines from EYT 2016-2017, resembling Juliana et&#xa0;al.&#x2019;s panel 1 (but seven individuals less due to removal of individuals with missing values in the quality traits). For more information on the yield trials be referred to <xref ref-type="bibr" rid="B36">Guzman et&#xa0;al. (2016)</xref> and <xref ref-type="bibr" rid="B41">Juliana et&#xa0;al. (2019)</xref>. These datasets were subsequently filtered using TASSEL.v5 (<xref ref-type="bibr" rid="B13">Bradbury et&#xa0;al., 2007</xref>) as described in <xref ref-type="bibr" rid="B41">Juliana et&#xa0;al. (2019)</xref>: first, a filtering of unaligned markers (UN) was done resulting in a final set of 78,148 markers (cf. Figure&#xa0;1 of Juliana et&#xa0;al.), followed by a second filtering step for markers with less than 40% missing data, a MAF &gt; 0.05, and less than 5% heterozygosity. Missing markers were imputed using the LinkImpute (<xref ref-type="bibr" rid="B70">Money et&#xa0;al., 2015</xref>) module within TASSEL.v5, with the following settings: Linkage Disequilibrium (LD) was set to 60, and k-Nearest Neighbors (kNN) to 20. Using the tool&#xb4;s internal masking procedure to assess accuracy, the imputation error was estimated at 8% (PH) and 9% (TKW), respectively. After the imputation step, SNPs were again filtered for minor allele frequency (MAF &gt; 0.05), resulting in the final datasets as follows: the PH dataset included 7,890 markers and 7,886 individuals, and the TKW dataset comprised 7,287 markers and 3,478 individuals. In our study, the resulting final number of markers differed from those reported by <xref ref-type="bibr" rid="B41">Juliana et&#xa0;al. (2019)</xref>, who identified for panel 1 6,355 markers. The number of markers of panel 4 is not explicitly stated but falls in the range between 9,171 and 9,704 markers. In addition to this ambiguity regarding panel 4, especially the lack of information on thresholds, settings, and imputation accuracy made it impossible to replicate the filtering procedures. Furthermore, additional factors contributing to this discrepancy may be attributed to differences in TASSEL software versions. A note to the file formats: the initial dataset was in HapMap format, however, in order to use the data for Gemma and GCTA, it was converted using PLINK 2.0 (<xref ref-type="bibr" rid="B15">Chang et&#xa0;al., 2015</xref>) into bed/bim/fam format.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Methodological overview of data preprocessing and filtering starting from 78,148 SNPs and 44,400 individual samples <bold>(A)</bold> and the herein used traditional GWAS tools and ML-based approaches including used thresholds <bold>(B)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1734247-g001.tif">
<alt-text content-type="machine-generated">Flowchart detailing a genetic analysis procedure. Panel A: The CIMMYT dataset includes 44,400 samples, 78,606 SNPs, and 50 traits. Traits PH and TKW are selected, filtered, and adjusted. PH results in 7,890 SNPs and TKW in 7,287 SNPs. Panel B: Input datasets use traditional GWAS tools or ML-based approaches. Tools include GAPIT, GCTA, and GEMMA, with feature importance and summary statistics like Manhattan plots highlighted.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Computational workflow</title>
<p>Code including the tools sommer and GAPIT, ML algorithms, TSLRF and plotting was run in R Studio server version 2025.5.0.496 with R version 4.4.3 (<xref ref-type="bibr" rid="B76">R Core Team, 2025</xref>) on a Ubuntu 22.04.5 LTS server equipped with an Intel(R) Xeon(R) CPU E5&#x2013;2637 v3 @ 3.50GHz (16 cores, 32 threads). Manhattan &amp; QQ-plots were plotted with ggplot2 version 3.5.2. UpSet plot and Venn diagram were visualized using the R-package ggVennDiagramm version 1.5.2. GCTA, GEMMA and TASSEL were used over the command line. The code (separated in seven R scripts: EN.R [commit tag: 3fcdd80], FeatureSelection [3fcdd80], Gapit.R [3fcdd80], RF.R [2385148], Sommer.R [3fcdd80], TSLRF.R [09f8d51], XBR.R [c20e9f2]) is publicly available on <ext-link ext-link-type="uri" xlink:href="https://github.com/MolinLab/BenchmarkingGWAS">https://github.com/MolinLab/BenchmarkingGWAS</ext-link>. The overall computational workflow is depicted in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1B</bold></xref> and is given in more detail below:</p>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Traditional GWAS tools</title>
<p>In GAPIT version 3, we applied multiple models including GLM, MLM (<xref ref-type="bibr" rid="B115">Zhang et&#xa0;al., 2010</xref>), CMLM (<xref ref-type="bibr" rid="B115">Zhang et&#xa0;al., 2010</xref>), SUPER (<xref ref-type="bibr" rid="B97">Wang et&#xa0;al., 2014</xref>), MLMM (<xref ref-type="bibr" rid="B81">Segura et&#xa0;al., 2012</xref>), FarmCPU (<xref ref-type="bibr" rid="B52">Liu et&#xa0;al., 2016</xref>), and BLINK (<xref ref-type="bibr" rid="B39">Huang et&#xa0;al., 2019</xref>). In all analyses, two principal components (PCs) were included as covariates and the default settings were used, except for the MAF, which was set to 0.05. For the analysis with sommer version 4.3.6 (<xref ref-type="bibr" rid="B20">Covarrubias-Pazaran, 2016</xref>), a standard MLM was used that incorporated a kinship matrix along with two PCs to account for population structure. Similarly, GWAS was performed using GCTA (<xref ref-type="bibr" rid="B107">Yang et al., 2011a</xref>), Linux version 1.94.1, employing the mlma option, integrating two PCs as covariates, a genetic relationship matrix, and the MAF was set to 0.05. In TASSEL-5 standalone software, GWAS was carried out using a MLM that also utilized two PCs and a kinship matrix to control for population structure and relatedness. Finally, GEMMA (<xref ref-type="bibr" rid="B118">Zhou and Stephens, 2012</xref>) version 0.98.5, Linux was used to perform GWAS under a univariate mixed linear model framework, utilizing a genetic relationship matrix. A common tool for model diagnostics in association mapping is visual assessment of QQ-plots, which can indicate under- and over-powered models with low detection rates or inflated false positive rates (<xref ref-type="bibr" rid="B96">Wang et&#xa0;al., 2022</xref>). In addition to visually assessing QQ-plots, we calculated the genomic inflation factor from raw p-values using the following method as described in (<xref ref-type="bibr" rid="B109">Yang et&#xa0;al., 2011b</xref>). For well-adjusted models we expect &#x3bb; = 1, for inflated &#x3bb; &gt; 1.1 and deflated &#x3bb; &lt; 0.9.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>ML models for GWAS</title>
<p>Machine learning algorithms were implemented using the mlr3 package (<xref ref-type="bibr" rid="B47">Lang et&#xa0;al., 2019</xref>) version 0.22.1 using the following extra packages: mlr3learners (<xref ref-type="bibr" rid="B46">Lang et&#xa0;al., 2025</xref>) version 0.10.0, mlr3mbo (<xref ref-type="bibr" rid="B80">Schneider et&#xa0;al., 2025</xref>) version 0.2.9, mlr3misc (<xref ref-type="bibr" rid="B7">Becker et&#xa0;al., 2025a</xref>) version 0.18.0, mlr3tuning (<xref ref-type="bibr" rid="B6">Becker et&#xa0;al., 2025b</xref>) version 1.4.0 and mlr3tuningspaces (<xref ref-type="bibr" rid="B10">Bischl et&#xa0;al., 2023</xref>) version 0.5.2. Marker selection (feature selection) was utilized using the Elastic Net (EN) algorithm from GLMnet (<xref ref-type="bibr" rid="B29">Friedman et&#xa0;al., 2010</xref>) version 4.1&#x2013;9 included as a standard learner in the mlr3 package in R. Using the best performing model with the following hyperparameters &#x3b1; = 0.6904 and &#x3bb; = 0.0461 for TKW and &#x3b1; = 0.2298 and &#x3bb; = 0.1070 for PH. All features with coefficient of 0 were excluded from further analysis which led to 4,018 markers for the TKW dataset and 5,582 markers for PH. EN was also used as a GWAS model. The absolute values of the coefficients of the features were used as importance for this model. Stability selection (<xref ref-type="bibr" rid="B63">Meinshausen and B&#xfc;hlmann, 2010</xref>) was performed as a proxy of feature importance due to the empirical threshold not performing well with the algorithm. RF was implemented using the regression.ranger algorithm from the ranger package (<xref ref-type="bibr" rid="B102">Wright and Ziegler, 2017</xref>) version 0.17.0 and decrease of node impurity was selected as feature importance. XGB was performed using xgboost package (<xref ref-type="bibr" rid="B17">Chen et&#xa0;al., 2025</xref>) version 1.7.8.1. and feature importance was extracted using gain from the integrated xgb.importance function. Variable importance measure was chosen as a representative for the SVR algorithm. The regression.svm algorithm was used from the R package e1071 (<xref ref-type="bibr" rid="B65">Meyer et&#xa0;al., 2024</xref>) version 1.7-16. Feature importance was done by multiplying coefficients with the support vectors. The workflow Two-Stage algorithm based on Least angle regression and Random Forest (TSLRF), which includes population stratification and feature selection, was utilized following <xref ref-type="bibr" rid="B89">Sun et&#xa0;al. (2019)</xref>. Least angle regression (LARS) (<xref ref-type="bibr" rid="B24">Efron et&#xa0;al., 2004</xref>) was applied using Lars package version 1.3 (<xref ref-type="bibr" rid="B37">Hastie and Efron, 2022</xref>), RF for TSLRF was applied using the same settings and tuning procedure described for the standalone RF learner. As feature importance metrics, we selected gain for XGB and decrease of node impurity for RF instead of permutation-based importance. Permutation performance performs poorly in the presence of highly correlated predictors, which is a defining characteristic of genomic marker data. In such settings, permuting a single SNP breaks its correlation structure with neighboring loci, causing the model to redirect predictive importance to correlated markers. This leads to unstable, underestimated, and biologically misleading importance estimates. Additionally, permutation importance is most informative when features differ in scale or cardinality, whereas SNP markers all share the same discrete genotype state space (0/1/2).</p>
</sec>
<sec id="s2_2_3">
<label>2.2.3</label>
<title>Hyperparameters and tuning of ML algorithms</title>
<p>For the ML-based GWAS, we adopted a regression framework in which the respective traits (TKW and PH) served as the prediction targets. Since several ML algorithms require numerical or integer matrices as input, we used numerical encoded genomic data generated by GAPIT during GWAS analysis. Hyperparameter ranges were selected using the recommended settings from mlr3tuningspaces for each learner and were subsequently optimized using 5-fold nested cross-validation with Bayesian optimization, also known as model-based optimization. The best hyperparameters were selected by minimizing the cross-validated performance metric, after which the final model was retrained on the full dataset using these optimized hyperparameters (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;1</bold></xref>). To reduce the impact of algorithmic stochasticity, we retrained the resulting model 100 times, extracted feature-importance scores from each run, and aggregated them to obtain stable estimates.</p>
</sec>
<sec id="s2_2_4">
<label>2.2.4</label>
<title>Significance thresholds</title>
<p>Bonferroni correction (<xref ref-type="bibr" rid="B12">Bonferroni, 1936</xref>) was used as a significance threshold and calculated as explained in <xref ref-type="bibr" rid="B96">Wang et&#xa0;al. (2022)</xref> for &#x3b1; = 0.05. Additionally, to the Bonferroni correction a false discovery rate (FDR) correction (<xref ref-type="bibr" rid="B8">Benjamini and Hochberg, 1995</xref>) with an applied threshold of &#x3b1; = 0.05 was applied. For ML algorithms, an empirical threshold was estimated by a permutation test as described in <xref ref-type="bibr" rid="B96">Wang et&#xa0;al. (2022)</xref>. Instead of the lowest p-value, we stored the highest variable importance, repeating 1,000 times, and the &#x3b1; = 0.05 percentile as the threshold. Feature importances exceeding this threshold were subsequently treated as significant.</p>
</sec>
<sec id="s2_2_5">
<label>2.2.5</label>
<title>Identifying markers reflecting population structure and relatedness</title>
<p>To assess the influence of population structure on individual markers, we calculated marker informativeness following the approach of <xref ref-type="bibr" rid="B11">Biswas et&#xa0;al. (2009)</xref>, with the exception that no permutation test was performed. The coefficient of determination (R<sup>2</sup>) values were summed across the first two PCs derived from GAPIT. Higher values indicate stronger influence of population structure on the marker, reflecting its potential association with underlying relatedness patterns.</p>
</sec>
<sec id="s2_2_6">
<label>2.2.6</label>
<title>McNemar&#x2019;s test for pairwise significance</title>
<p>To assess whether the different models identified significantly different sets of MTAs, we conducted corrected pairwise McNemar&#x2019;s tests (built-in function in R) using two significance thresholds: &#x3b1; = 0.05 to evaluate overall differences in MTA discovery patterns and &#x3b1; = 0.001 to identify strongly divergent model pairs.</p>
</sec>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Extracting candidate genes</title>
<p>Linkage disequilibrium (LD) calculated with PLINK 2.0 (<xref ref-type="bibr" rid="B15">Chang et&#xa0;al., 2015</xref>) was used to define flanking regions around significant MTAs. Candidate QTLs and genes were identified based on the <italic>Triticum aestivum</italic> reference genome (IWGSC; Ensembl Plants, accessed 18 May 2025; <xref ref-type="bibr" rid="B23">Dyer et&#xa0;al., 2025</xref>). Within each defined window, genes were annotated based on gene ontology, functional classification, and prior literature to prioritize and report the most relevant candidate genes.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<p>All MTAs identified for both traits under different models are listed with their respective candidate genes and functions in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;2</bold></xref>. An overview of the tools and models used, the number of detected MTAs, computational runtime, and applied significance thresholds is provided in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Overview of GWAS models, model form, population structure correction, applied significance thresholds, number of detected marker-trait associations (MTAs), and computational runtime (minutes) for thousand kernel weight (TKW) and plant height (PH).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Tool</th>
<th valign="top" rowspan="2" align="center">GWAS model</th>
<th valign="top" rowspan="2" align="center">Model form</th>
<th valign="top" rowspan="2" align="center">Population structure correction</th>
<th valign="middle" colspan="3" align="center">TKW</th>
<th valign="middle" colspan="3" align="center">PH</th>
<th valign="top" rowspan="2" align="center">Version</th>
</tr>
<tr>
<th valign="middle" align="left">Thresholds</th>
<th valign="middle" align="right">MTAs</th>
<th valign="middle" align="right">Runtime[min]</th>
<th valign="middle" align="left">Thresholds</th>
<th valign="middle" align="right">MTAs</th>
<th valign="middle" align="right">Runtime[min]</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Sommer</td>
<td valign="middle" align="left">MLM</td>
<td valign="middle" align="left">Mixed linear model</td>
<td valign="middle" align="left">2 PCs + Kinship</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">4</td>
<td valign="middle" align="right">13.0</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">2</td>
<td valign="middle" align="right">311.4</td>
<td valign="middle" align="left">4.3.6</td>
</tr>
<tr>
<td valign="middle" align="left">GCTA</td>
<td valign="middle" align="left">MLM</td>
<td valign="middle" align="left">Mixed linear model</td>
<td valign="middle" align="left">2 PCs + Kinship</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">4</td>
<td valign="middle" align="right">1.5</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">1</td>
<td valign="middle" align="right">10.0</td>
<td valign="middle" align="left">1.94.1</td>
</tr>
<tr>
<td valign="middle" align="left">GEMMA</td>
<td valign="middle" align="left">MLM</td>
<td valign="middle" align="left">Mixed linear model</td>
<td valign="middle" align="left">2 PCs + Kinship</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">4</td>
<td valign="middle" align="right">3.0</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">0</td>
<td valign="middle" align="right">36.0</td>
<td valign="middle" align="left">0.98.5</td>
</tr>
<tr>
<td valign="middle" align="left">TASSEL</td>
<td valign="middle" align="left">MLM</td>
<td valign="middle" align="left">Mixed linear model</td>
<td valign="middle" align="left">2 PCs + Kinship</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">2</td>
<td valign="middle" align="right">1025.0</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">4</td>
<td valign="middle" align="right">26220.0</td>
<td valign="middle" align="left">Tassel-5</td>
</tr>
<tr>
<td valign="middle" align="left">GAPIT</td>
<td valign="middle" align="left">GLM</td>
<td valign="middle" align="left">Generalized linear model</td>
<td valign="middle" align="left">2 PCs (no Kinship)</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">1352</td>
<td valign="middle" align="right">8.0</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">4482</td>
<td valign="middle" align="right">99.9</td>
<td valign="middle" align="left">Gapit version 3</td>
</tr>
<tr>
<td valign="middle" align="left">GAPIT</td>
<td valign="middle" align="left">MLM</td>
<td valign="middle" align="left">Mixed linear model</td>
<td valign="middle" align="left">2 PCs + Kinship</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">3</td>
<td valign="middle" align="right">26.0</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">0</td>
<td valign="middle" align="right">264.2</td>
<td valign="middle" align="left">Gapit version 3</td>
</tr>
<tr>
<td valign="middle" align="left">GAPIT</td>
<td valign="middle" align="left">CMLM</td>
<td valign="middle" align="left">Compressed mixed linear model</td>
<td valign="middle" align="left">2 PCs + Kinship</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">3</td>
<td valign="middle" align="right">1851.0</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">2</td>
<td valign="middle" align="right">19551.0</td>
<td valign="middle" align="left">Gapit version 3</td>
</tr>
<tr>
<td valign="middle" align="left">GAPIT</td>
<td valign="middle" align="left">MLMM</td>
<td valign="middle" align="left">Multi-locus mixed linear model</td>
<td valign="middle" align="left">2 PCs + Kinship</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">2</td>
<td valign="middle" align="right">136.8</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">2</td>
<td valign="middle" align="right">1435.7</td>
<td valign="middle" align="left">Gapit version 3</td>
</tr>
<tr>
<td valign="middle" align="left">GAPIT</td>
<td valign="middle" align="left">SUPER</td>
<td valign="middle" align="left">Mixed linear model with SNP selection</td>
<td valign="middle" align="left">2 PCs + Kinship (selected SNPs)</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">1101</td>
<td valign="middle" align="right">109.1</td>
<td valign="middle" align="left">FDR &#x3b1;= 0.05</td>
<td valign="middle" align="right">3497</td>
<td valign="middle" align="right">812.6</td>
<td valign="middle" align="left">Gapit version 3</td>
</tr>
<tr>
<td valign="middle" align="left">GAPIT</td>
<td valign="middle" align="left">FarmCPU</td>
<td valign="middle" align="left">Iterative fixed and random effects model</td>
<td valign="middle" align="left">Implicit (iterative pseudo-QTNs)</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">29</td>
<td valign="middle" align="right">1.5</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">40</td>
<td valign="middle" align="right">4.0</td>
<td valign="middle" align="left">Gapit version 3</td>
</tr>
<tr>
<td valign="middle" align="left">GAPIT</td>
<td valign="middle" align="left">BLINK</td>
<td valign="middle" align="left">Fixed-effect multi-locus model</td>
<td valign="middle" align="left">Implicit (LD-based pseudo-QTN selection)</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">45</td>
<td valign="middle" align="right">2.2</td>
<td valign="middle" align="left">Bonferroni &#x3b1;=0.05</td>
<td valign="middle" align="right">58</td>
<td valign="middle" align="right">4.7</td>
<td valign="middle" align="left">Gapit version 3</td>
</tr>
<tr>
<td valign="middle" align="left">TSLRF</td>
<td valign="middle" align="left">EMMA-transformed RF</td>
<td valign="middle" align="left">Random Forest</td>
<td valign="middle" align="left">Kinship</td>
<td valign="middle" align="left">Permutation 1000&#xd7;, &#x3b1;=0.05 quantile</td>
<td valign="middle" align="right">3</td>
<td valign="middle" align="right">28.9</td>
<td valign="middle" align="left">Permutation 1000&#xd7;, &#x3b1;=0.05 quantile</td>
<td valign="middle" align="right">0</td>
<td valign="middle" align="right">27.2</td>
<td valign="middle" align="left">0.17.0 (RF), 1.3 (LARS)</td>
</tr>
<tr>
<td valign="middle" align="left">RF</td>
<td valign="middle" align="left">Random Forest</td>
<td valign="middle" align="left">Random Forest</td>
<td valign="middle" align="left">None</td>
<td valign="middle" align="left">Permutation 1000&#xd7;, &#x3b1;=0.05 quantile</td>
<td valign="middle" align="right">44</td>
<td valign="middle" align="right">25.3</td>
<td valign="middle" align="left">Permutation 1000&#xd7;, &#x3b1;=0.05 quantile</td>
<td valign="middle" align="right">38</td>
<td valign="middle" align="right">343.8</td>
<td valign="middle" align="left">0.17.0</td>
</tr>
<tr>
<td valign="middle" align="left">XGB</td>
<td valign="middle" align="left">XGBoost</td>
<td valign="middle" align="left">Gradient Boosting</td>
<td valign="middle" align="left">None</td>
<td valign="middle" align="left">Permutation 1000&#xd7;, &#x3b1;=0.05 quantile</td>
<td valign="middle" align="right">13</td>
<td valign="middle" align="right">100.6</td>
<td valign="middle" align="left">Permutation 1000&#xd7;, &#x3b1;=0.05 quantile</td>
<td valign="middle" align="right">10</td>
<td valign="middle" align="right">86.8</td>
<td valign="middle" align="left">1.7.8.1</td>
</tr>
<tr>
<td valign="middle" align="left">Elastic Net (EN)</td>
<td valign="middle" align="left">Penalized Regression</td>
<td valign="middle" align="left">Regularized regression</td>
<td valign="middle" align="left">None</td>
<td valign="middle" align="left">Stability Selection 1000x, 95% frequency</td>
<td valign="middle" align="right">6</td>
<td valign="middle" align="right">2.4</td>
<td valign="middle" align="left">Stability Selection 1000x, 95% frequency</td>
<td valign="middle" align="right">21</td>
<td valign="middle" align="right">12.8</td>
<td valign="middle" align="left">4.1.9</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The software version used for each tool and model is also reported.</p></fn>
</table-wrap-foot>
</table-wrap>
<sec id="s3_1">
<label>3.1</label>
<title>Comparison of traditional GWAS tools</title>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Thousand kernel weight</title>
<p>Most tools evaluated in this study use a Q+K model to identify MTAs, which should result in similar behavior of the tools. However, we observed a different number of MTAs (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;1</bold></xref>, <xref ref-type="supplementary-material" rid="SM1"><bold>2</bold></xref>): TASSEL found two markers, one on chromosome 2A and three on chromosome 6D, when the Bonferroni threshold was applied. The MLM implemented in GAPIT (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;1F</bold></xref>) detected three associations, including one additional marker at chromosome 6D compared to TASSEL. GCTA, GEMMA and sommer highlighted four associations, one at 2D and three at 6D (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;1A-D</bold></xref>). Among the MLM approaches, genomic inflation factors varied, ranging from 0.88 for GCTA to 0.99 for TASSEL (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;3</bold></xref>). This shows that TASSEL is a well-adjusted model, while GCTA, GEMMA and sommer show slightly deflated p-values.</p>
<p>Evaluation of QQ-plots and genomic inflation factors revealed that the models implemented in GAPIT, specifically the GLM and SUPER models, exhibited highly inflated p-values, with genomic inflation factors of 10.7 and 9.7, respectively (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;1E</bold></xref>, <xref ref-type="supplementary-material" rid="SM1"><bold>2C</bold></xref>; <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;3</bold></xref>). Consequently, these models produced an excessive number of false positives and were excluded from further analysis. In contrast, FarmCPU and BLINK showed a more modest but still significant inflation, with factors of 1.7 and 1.4, respectively (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;2D, E</bold></xref>). Although these models will be included in subsequent analyses, their results should be interpreted with caution. The extent of genomic inflation is reflected in the number of MTAs detected: GLM and SUPER identified over 1,000 MTAs, whereas BLINK identified 45 and FarmCPU 29 MTAs (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;3A</bold></xref>). CMLM found three and MLMM two marker associations. Two markers, S6D_241296319 on chromosome 6D and S2A_143724068 on chromosome 2A were consistently detected across models (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;3A</bold></xref>), underscoring its importance for further investigation. Additionally, FarmCPU and BLINK, both known for finding small-effect loci, shared 14 overlapping MTAs, while FarmCPU found 13 and BLINK 29 unique MTAs (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;3A</bold></xref>). These MTAs were detected across almost all chromosomes, namely, 1B, 2A, 2B, 3A, 3B, 3D, 4A, 5A, 5B, 6A, 6B, 6D, 7A, 7B and 7D (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;2E, F</bold></xref>).</p>
</sec>
<sec id="s3_1_2">
<label>3.1.2</label>
<title>Plant height</title>
<p>For the plant height (PH) dataset, several MLM-based approaches did not reveal any significant associations when applying a Bonferroni threshold. Only TASSEL, GCTA and CMLM identified significant MTAs at chromosome 7A, while other MLM methods revealed a peak that narrowly missed the threshold (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;4</bold></xref>). Using a less stringent FDR threshold, two markers S7A_335064522 and S7A_403940100, were significantly associated across TASSEL, and sommer (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;4</bold></xref>). GCTA identified only S7A_335064522, while GAPIT&#x2019;s MLM and GEMMA reported both markers just below the threshold (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;4</bold></xref>). These results highlight how differences in model implementation and computational assumptions can determine whether one, two, or no MTAs are detected. As observed for TKW, GLM, SUPER, FarmCPU, and BLINK exhibited inflated p-values (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;3</bold></xref>), excluding GLM and SUPER for further analysis. FarmCPU and BLINK showed genomic inflation factors of 2.15 (FarmCPU) and 1.87 (BLINK), identifying a large number of MTAs, 58 for FarmCPU and 40 for BLINK, with an elevated risk of false positives. In contrast, MLM implementations indicated a good model fit, with genomic inflation factors ranging from 0.979 GAPIT to 0.99 GCTA. Among all MLM implementations, TASSEL was the only tool to detect a significant association on chromosome 2B, at an FDR threshold, namely, S2B_770054032 (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;4D</bold></xref>). The same marker was also highlighted as significant by FarmCPU and BLINK (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;5D, E</bold></xref>).</p>
</sec>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Comparison of ML-based GWAS approaches</title>
<p>To explore ML for GWAS, we employed the R package mlr3, which provides a robust base set of learners that can be extended with additional packages, e.g. EN, RF, and XGB. SVM was implemented with package e1071, but we encountered a complex prediction error and therefore did not investigate this approach further within this study. Additionally, we evaluated TSLRF, which integrates population stratification using the FASTmrEMMA method (<xref ref-type="bibr" rid="B100">Wen et&#xa0;al., 2018</xref>) and feature selection via LARS, followed by a final prediction with a RF model. We acknowledge that our study does not cover all available methods and tools, but the chosen approaches provide a strong foundation for assessing how ML can enhance GWAS in wheat breeding. The selected models provide interpretable feature-importance metrics, which are essential for identifying MTAs. In addition, they represent complementary algorithmic approaches, enabling a diverse yet comparable set of importance metrics that can be directly contrasted with traditional GWAS outputs.</p>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Thousand kernel weight</title>
<p>Interestingly, despite their differing computational complexities and algorithmic approaches, the models yielded very similar RMSE values for the TKW data, suggesting comparable model performance (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;1</bold></xref>). This finding is somewhat surprising given that each algorithm possesses unique strengths and weaknesses. The real differentiator, therefore, may lie in the features that each model deems important. For EN, six important MTAs were observed on chromosome 1A, 2A, 3D, 5D and 6D (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;6D</bold></xref>). The RF model identified 44 highly important markers, with major loci on chromosomes 6A and 6D, and additional MTAs on 2A, 3A, 5A, 6B, 7A, and 7D (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;6A</bold></xref>). Similarly, the XGB algorithm highlighted highly important loci on chromosomes 6A and 6D, with additional markers on 1B, 2A, 3A, 5A, 6A, and 7A, resulting in 13 MTAs (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;6B</bold></xref>). The TSLRF approach produced results comparable with MLM-based models, identifying three MTAs, one on chromosome 2A and two on chromosome 6D (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;6C</bold></xref>).</p>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Plant height</title>
<p>An overview of the outcomes of selected ML models on the PH data is given in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;7</bold></xref>. RF found 38 important loci distributed across six different chromosomes (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;7A</bold></xref>), however, several markers on chromosomes 5A and 7A were overlapping, suggesting they may represent the same underlying locus. EN found in total 21 important loci on the chromosomes 1B, 2D, 3A, 3D, 4A, 5A, 5B, 6A, 6D, 7A, and 7B (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;7D</bold></xref>). XGB highlighted important loci on 2B, 3B, 4A, 4D, 5A, and 7B, but did not identify the major association on chromosome 7A, which was consistently detected by most other models and traditional MLM-based methods (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;7B</bold></xref>). Nevertheless, XGB achieved the best model performance in terms of RMSE across ML models for TKW and PH. The TSLRF model, which includes population structure adjustment and feature selection, did not detect any significant associations for PH (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;7C</bold></xref>), similar to MLM implementations with Bonferroni correction.</p>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Overlap between traditional and ML methods</title>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Thousand kernel weight</title>
<p>For TKW, we observed an overlap of significant markers between traditional and ML-based methods (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;3A</bold></xref>, <xref ref-type="supplementary-material" rid="SM1"><bold>8</bold></xref>-<xref ref-type="supplementary-material" rid="SM1"><bold>10</bold></xref>). For example, S6D_241296319 was found using all models, or S6D_143959985 that was also significantly associated using the models CMLM, MLM (using GAPIT, GCTA, GEMMA, sommer), TSLRF and RF (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>). While S2A_143724068, which was found to be associated with TKW using multiple traditional methods, was only significantly associated using the EN and TSLRF model (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;8</bold></xref>). However, RF detected S2A_143241161 in proximity, suggesting detection of the same biological signal. Another key marker, S6D_82630628, was consistently associated using the MLM from GCTA, GEMMA and sommer as well as with EN, RF, and XGB (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>). Using the genomic inflation value, we were able to estimate the model performance in terms of inflation of p-values. BLINK and FarmCPU showed inflated p-values, which suggests a more careful interpretation of associated markers using these models. Some of these markers were, however, also highly important using ML models: S1B_642678709 (FarmCPU, BLINK, XGB), S3A_642133365 (FarmCPU, BLINK RF, XGB), S3D_181564815 (FarmCPU, BLINK, EN), S5A_561661320 (BLINK, RF, XGB), S6A_481736675 (FarmCPU, RF, XGB), S6B_157666111 (FarmCPU, BLINK, RF), S7A_672854561 (FarmCPU, BLINK, XGB), and S7A_90336792 (FarmCPU, RF, XGB) (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;8</bold></xref>-<xref ref-type="supplementary-material" rid="SM1"><bold>10</bold></xref>). Therefore, these markers, which would not have been included using the best-fit MLM, should still be investigated due to their relevance across ML algorithms. In total, 96 unique MTAs associated with TKW were detected across all evaluated models, excluding GLM and SUPER due to high inflation rates (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;2</bold></xref>).</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Detected marker-trait associations (MTA) for thousand kernel weight (TKW) identified on the chromosomes 6A, 6D, 7A. The vertical black bars represent the chromosomes and the horizontal lines the approximate position of each associated marker along the chromosome. Each dot on the right side of each chromosome represents MTA detected by one model, with colors indicating the respective method. Dots positioned to the left of each chromosome summarize the detection method by which each MTA was identified.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1734247-g002.tif">
<alt-text content-type="machine-generated">Diagram showing the distribution of TKW MTAs detected across chromosomes 6A, 6D, 7A in wheat. Each chromosome is represented as a vertical black bar, with MTAs plotted at their approximate genomic positions. Colored points on the right side of each chromosome indicate MTAs identified by different analytical models. Detection type is distinguished by color coding on the left side of the chromosome, indicating MTAs identified by machine learning approaches, traditional GWAS methods, or both. SNPs are labeled alongside their approximate genomic positions.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Plant height</title>
<p>When comparing the detected MTAs for PH, no single marker was identified by all models (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;3B</bold></xref>, <xref ref-type="supplementary-material" rid="SM1"><bold>11</bold></xref>-<xref ref-type="supplementary-material" rid="SM1"><bold>13</bold></xref>). However, two markers were notable due to appearance across approaches: S7A_403940100 and S7A_335064522, each detected by six different models (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>). Interestingly, the latter marker was not found in FarmCPU and BLINK. Other markers also showed cross-method detection. For example, 3B_757480752 and S5A_514279842 were consistently found using FarmCPU, BLINK, RF, and XGB (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>; <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;12</bold></xref>). Given the inflation of the p-values, their repeated detection by both traditional and ML models verifies the importance of these loci. Several further loci demonstrated high importance across three models: S2B_565068485 was found by FarmCPU, BLINK, and XGB, while S1B_568961025 was detected by FarmCPU, BLINK, and RF (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>; <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;11</bold></xref>). Similarly, S3D_282956863 and S3D_529775477 were jointly identified by FarmCPU, BLINK, and EN, suggesting consistent signals across parametric and regularized regression models (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figure&#xa0;11</bold></xref>). S4A_679187682 was detected by the decision tree models RF and XGB, while also being significantly associated using BLINK. Likewise, S5A_563358779 was identified using four different methods, namely, FarmCPU, BLINK, EN and XGB. S2B_770054032 was found by TASSEL&#x2019;s MLM implementation as well as by FarmCPU, BLINK, and XGB. In total, 119 unique MTAs were identified across all models except GLM and SUPER, indicating substantial discovery power when combining traditional and ML-based methods (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Table&#xa0;2</bold></xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Detected marker-trait associations (MTA) for plant height (PH) identified on the chromosomes 2B, 5A, 7A. The vertical black bars represent the chromosomes and the horizontal lines the approximate position of each associated marker along the chromosome. Each dot on the right side of each chromosome represents MTA detected by one model, with colors indicating the respective method. Dots positioned to the left of each chromosome summarize the detection method by which each MTA was identified.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1734247-g003.tif">
<alt-text content-type="machine-generated">Diagram showing the distribution of PH MTAs detected across chromosomes 2B, 5A, 7A in wheat. Each chromosome is represented as a vertical black bar, with MTAs plotted at their approximate genomic positions. Colored points on the right side of each chromosome indicate MTAs identified by different analytical models. Detection type is distinguished by color coding on the left side of the chromosome, indicating MTAs identified by machine learning approaches, traditional GWAS methods, or both. SNPs are labeled alongside their approximate genomic positions.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Pairwise comparison</title>
<p>Using McNemar&#x2019;s test (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;14</bold></xref>, <xref ref-type="supplementary-material" rid="SM1"><bold>15</bold></xref>), we assessed whether the different models identified significantly different sets of MTAs. For TKW, the results revealed a clear methodological separation among the approaches. The MLM-based approaches, together with TSLRF and EN, showed no significant pairwise differences, indicating that these methods detected largely overlapping MTAs. In contrast, FarmCPU, BLINK, RF, and XGB produced MTA sets that differed significantly from the MLM cluster. Within this non-MLM group, BLINK and RF as well as RF and FarmCPU did not differ significantly, whereas most other comparisons were significant. Notably, XGB and EN did not differ significantly, suggesting partially overlapping signals despite their methodological differences. Importantly, XGB differed from most MLM approaches only at &#x3b1; = 0.05, whereas FarmCPU, BLINK, and RF showed much stronger divergence at &#x3b1; = 0.001.</p>
<p>For PH, a similar pattern emerged. The MLM methods that detected MTAs showed no significant pairwise differences, while all other models yielded MTA sets significantly different from the MLM cluster, with the exception of TASSEL, which did not differ from XGB. Again, XGB and EN showed no significant difference, although EN differed significantly from all MLM approaches for PH, in contrast to its alignment with MLM methods for TKW. FarmCPU and BLINK did not show similarity to any other model, with the only non-significant comparison observed between RF and FarmCPU. Similar to the TKW results, XGB differed from MLM approaches only at &#x3b1; = 0.05 for PH, except for the comparison with TASSEL, which was not significant. Influence of population structure on MTAs.</p>
<sec id="s3_4_1">
<label>3.4.1</label>
<title>Thousand kernel weight</title>
<p>Due to the high inflation rates observed in FarmCPU and BLINK (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Figures&#xa0;2D, E</bold></xref>), we investigated marker informativeness along the PCs to estimate if the described MTAs (see <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>) might be confounded by population structure. However, associations with population structure alone cannot distinguish between effects driven by demographic history and those potentially shaped by selection. To further explore possible confounding, we overlaid MTAs detected by both the traditional and ML-based models and evaluated whether significant associations were disproportionally enriched among SNPs with high structure-related variance.</p>
<p>Across the genome, MTAs were distributed over a wide range of informativeness values, indicating that most associations are not primarily driven by markers that strongly covary with population structure. Furthermore, no systematic differences were observed between traditional and ML-based approaches in how their detected MTAs aligned with the informativeness spectrum, with the exception of chromosome 6A, where a cluster of MTAs detected exclusively by RF ranked relatively high, whereas MTAs detected by both methods showed lower informativeness values.</p>
<p>Additionally, we identified several MTAs with high informativeness values &gt; 30%, suggesting potential confounding (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>). These include S1B_457535596 and S2B_412778191, detected exclusively by BLINK, and S3A_658239559 and S6A_187295466, detected by both FarmCPU and BLINK. Other markers such as S4A_679160910 and S4A_679187682, detected by FarmCPU and BLINK, respectively, ranked relatively high explaining ~29%, which is around 10% less compared to the top-ranked MTAs on 1A, 2B and 3A. Additionally, S6D_143959985 and S6D_241296319, detected by multiple models including MLMs, explained 28%.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Marker informativeness based on the coefficient of determination (R&#xb2;) between all markers and the first two principal components for thousand kernel weight (TKW). The y-axis shows the summed R&#xb2; values across the first and second principal components, The x-axis represents the genomic position of markers across chromosomes. Marker&#x2013;trait associations (MTAs) are highlighted according to the model that detected them: blue for traditional GWAS models, red for ML&#x2013;based models, and purple for markers detected by both approaches.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1734247-g004.tif">
<alt-text content-type="machine-generated">Scatter plot illustrating marker informativeness for TKW based on the coefficient of determination between genetic markers and principal components across wheat chromosomes 1A–7D. Chromosomes are shown on the x-axis and cumulative R² values are shown on the y-axis. Grey points represent all evaluated markers, while colored points highlight markers identified as associations by different analytical approaches. Marker classification is indicated by color coding, distinguishing markers detected by machine learning methods, traditional GWAS approaches, both, or neither.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_4_2">
<label>3.4.2</label>
<title>Plant height</title>
<p>Overall, MTAs for PH were distributed across a wide range of cumulative variance, indicating, similar to TKW, that most associations are not closely aligned with markers that strongly correlate with population structure. Instead, the majority of MTAs, regardless of the method used, exhibited relatively low informativeness. Interestingly MTAs detected by ML-based methods ranked comparatively lower than those identified by traditional methods although some MTAs detected by both methods ranked higher.</p>
<p>The highest-ranking MTAs for plant height, were S2B_565068485 identified by FarmCPU, BLINK and XGB, S3A_667698488 (FarmCPU and BLINK), S7B_115377252 (BLINK), each explaining more than 30% of variation individually (<xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>). Other noteworthy markers include S4A_679160910 (RF) and S4A_679187682 identified by BLINK, XGB and RF as well as S7B_185017185 detected by BLINK. It is important to note that no thresholds or p-values were calculated to define markers as significantly associated with population structure. Therefore, these results should be interpreted as indicative rather than definitive.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Marker informativeness based on the coefficient of determination (R&#xb2;) between all markers and the first two principal components for plant height (PH). The y-axis shows the summed R&#xb2; values across the first and second principal components. The x-axis represents the genomic position of markers across chromosomes. Marker&#x2013;trait associations (MTAs) are highlighted according to the model that detected them: blue for traditional GWAS models, red for ML&#x2013;based models, and purple for markers detected by both approaches.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1734247-g005.tif">
<alt-text content-type="machine-generated">Scatter plot illustrating marker informativeness for PH based on the coefficient of determination between genetic markers and principal components across wheat chromosomes 1A–7D. Chromosomes are shown on the x-axis and cumulative R² values are shown on the y-axis. Grey points represent all evaluated markers, while colored points highlight markers identified as associations by different analytical approaches. Marker classification is indicated by color coding, distinguishing markers detected by machine learning methods, traditional GWAS approaches, both, or neither.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Computational efficiency</title>
<p>In terms of computational efficiency on the Intel(R) Xeon(R) CPU E5&#x2013;2637 v3 @ 3.50GHz (16 cores, 32 threads), the tools demonstrated considerable variation. It is important to note that the server was used in parallel by multiple users during these experiments, which may have introduced variability in the reported computation times due to resource contention. Under these conditions, TASSEL exhibited the longest total computation times, ~1,025 minutes (min) for TKW and 26,220 min (~ 18 days) for the PH. GAPIT showed large variation across models: for TKW, runtimes ranged from 1.5 (FarmCPU) to 1,851 min (CMLM), and for PH, from 4 min (FarmCPU) to 19,551 min (CMLM). Notably, FarmCPU and BLINK consistently ran significantly faster than other GAPIT models. Sommer completed in 13 min (TKW) and 311 min (PH). Command-line tools such as GCTA required ~2 min (TKW) and 10 min (PH), while GEMMA took 3 min (TKW) and 36 min (PH). For the ML models implemented in this study, computational times included both hyperparameter tuning and model training, and were strongly influenced by the chosen hyperparameters. In general, the models ranked from fastest to slowest as follows: EN (TKW: ~ 2 min; PH: ~ 12 min) &lt; RF &lt; XGB (TKW: 100 min; PH: 344 min). EN was the fastest, which may be attributed to its efficient regularized regression framework. RF and XGB, on the other hand, incurred higher computational costs, partly due to hyperparameters that were computationally intensive. For instance, we used 1,000 rounds of boosting for XGB, and reducing this number would decrease computational time but would result in less performance.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>The overall objectives of this study were to compare traditional and ML-based GWAS tools and methods amongst and across each other and highlight the potential of integrating ML to enhance the characterization of marker-trait associations by focusing on wheat as an important staple crop. Only a few studies attempted similar comparisons while some focusing only on traditional GWAS applications (<xref ref-type="bibr" rid="B30">Galesloot et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B77">Saber and Jesse Shapiro, 2020</xref>; <xref ref-type="bibr" rid="B106">Yang et&#xa0;al., 2024</xref>), others compared statistical GWAS models (<xref ref-type="bibr" rid="B100">Wen et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B43">Kaler et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B60">Malik et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B64">Merrick et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B79">Sandhu et&#xa0;al., 2024</xref>), and others incorporated ML-based methods (<xref ref-type="bibr" rid="B33">Gill et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B111">Yoosefzadeh-Najafabadi et&#xa0;al., 2022</xref>). However, to the best of our knowledge, there is only one study to date that applied several different ML algorithms in wheat, extracted feature importance metrics, calculated an empirical cutoff, and directly compared their findings with traditional approaches (<xref ref-type="bibr" rid="B93">Tekeu et&#xa0;al., 2023</xref>). But their analysis was conducted on a relatively small dataset (170 samples) and did not include well-established learners such as XGB and EN. Thus, in our study, we aimed for a higher sample number by using the well-known and publicly available CIMMYT dataset (<xref ref-type="bibr" rid="B41">Juliana et&#xa0;al., 2019</xref>) by focusing on the traits plant height (PH, 7,886 samples) and thousand kernel weight (TKW, 3,478 samples) in wheat, assessing both statistical results with detected MTAs and the practical usability of different ML-based models. Since MAS continues to play a central role in breeding programs aimed at improving wheat, research into ML-based GWAS is particularly relevant for this crop.</p>
<sec id="s4_1">
<label>4.1</label>
<title>Evaluation of traditional GWAS tools</title>
<p>Even though most of the traditional tools primarily rely on a MLM, subtle differences in implementation and internal computations resulted in variations in significance levels. In most cases, these differences were minor and led to a few MTAs falling just above or below the significance threshold depending on the tool used. However, the biggest distinction across tools was computational performance and model flexibility. TASSEL provided an easy-to-use platform with several in depth filtering options, making it a solid choice for pre-processing, SNP filtering, and imputation, and has been used already several times in wheat GWAS (e.g. <xref ref-type="bibr" rid="B88">Sukumaran et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B53">Liu et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B31">Genc et&#xa0;al., 2019</xref>).</p>
<p>However, in our study, TASSEL exhibited notably long computation times, including a runtime of up to 2.5 weeks for the PH dataset. While this may partly reflect less efficient computation and parallelization constraints, it is important to consider that the server was shared among multiple users, which could have influenced the observed runtimes. TASSEL does offer additional modelling options, such as GLM and Fast Association, which can significantly increase computation time but may not always adequately account for kinship and population structure. Sommer, implemented in R, provides users with the flexibility to define complex MLM structures and random effects tailored to experimental designs. While its runtime was moderate in our tests, its customizability and compatibility for HapMap-formatted genotype data make it a valuable tool. GCTA and GEMMA, both command-line tools optimized for PLINK input, consistently demonstrated faster performance compared to TASSEL and R-based tools. However, converting HapMap files into the required input format may present a challenge for some users. Among the tools evaluated, GAPIT offered the widest range of model options, allowing users to select the best model based on genomic control or benchmark results. Computational times varied significantly across GAPIT models, with FarmCPU and BLINK running considerably faster than others. While CMLM was introduced to reduce computational time (<xref ref-type="bibr" rid="B115">Zhang et&#xa0;al., 2010</xref>), the process of calculating the optimal compression level, such as the 158 grouping variations for PH, substantially increased runtime compared to other GAPIT models. This observation might be data size-dependent and could be more pronounced with datasets containing several thousand individuals.</p>
<p>GAPIT&#x2019;s variety also highlights how different model assumptions influence results. For instance, the basic GLM tests one marker at a time with only population structure covariates without kinship matrix, to account for relationships among individuals. As a result, GLM often produces many spurious associations when individuals are related, or population structure is complex (<xref ref-type="bibr" rid="B112">Yu et&#xa0;al., 2006</xref>). This behavior is reflected in our study and other experiments and simulations (<xref ref-type="bibr" rid="B43">Kaler et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B79">Sandhu et&#xa0;al., 2024</xref>). In SUPER, kinship is built from a subset of informative markers, and markers in LD with each test SNP are excluded to avoid confounding (<xref ref-type="bibr" rid="B52">Liu et&#xa0;al., 2016</xref>). However, using only a few pseudo-QTNs often fails to capture the full relatedness structure, reintroducing confounding. Thus, SUPER can still suffer high inflation if the pseudo-QTNs do not fully account for cryptic kinship, especially in structured wheat populations. The high amount of spurious associations we observed is in line with simulation studies (<xref ref-type="bibr" rid="B100">Wen et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B43">Kaler et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B79">Sandhu et&#xa0;al., 2024</xref>). MLM approaches i.e. MLM, CMLM, MLMM performed well in controlling for population stratification and relatedness which is reflected in the genomic inflation values. However, this strict control comes at the cost of power to detect true associations (<xref ref-type="bibr" rid="B43">Kaler et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B39">Huang et&#xa0;al., 2019</xref>). By modelling the polygenic background, MLM-based tests effectively attribute variance to kinship and leave little for each SNP. As a result, only very strong MTAs will pass strict significance thresholds (<xref ref-type="bibr" rid="B52">Liu et&#xa0;al., 2016</xref>). Simulations under high heritability showed that MLM/CMLM detected five out of 20 true QTLs, while MLMM was able to find 11 out of 20 (<xref ref-type="bibr" rid="B79">Sandhu et&#xa0;al., 2024</xref>). This result is mirrored in the results for the trait TKW in this study. No association was detected for PH using GAPIT&#x2019;s MLM approach, which could be explained by overfitting of kinship. Although when using different clustering in CMLM, the model detected one MTA, showing a slight increase in power. FarmCPU and BLINK showed moderately inflated &#x3bb;, which might reflect spurious signals. Empirically, FarmCPU often finds more associations than MLM but allows for more false positives, while BLINK efficiently detects associations often with less false positives, outperforming FarmCPU (<xref ref-type="bibr" rid="B39">Huang et&#xa0;al., 2019</xref>). When assessing potential confounding due to population structure, we observed that certain MTAs uniquely detected by FarmCPU and BLINK indeed showed higher informativeness values, indicating a greater likelihood of being influenced by population structure. This may reflect differences in the modeling approaches of these methods and their interaction with the specific characteristics of the analyzed dataset. Nevertheless, not all MTAs detected by BLINK and FarmCPU appear to be related due to population structure. The higher number of significant markers observed from these methods likely arises due to application of multi-locus tests in FarmCPU and BLINK, enabling detection of loci that single-marker tests might miss (<xref ref-type="bibr" rid="B64">Merrick et&#xa0;al., 2022</xref>). However, the potential of false positives, as indicated by the model inflation underscores the value of incorporating ML approaches as a benchmark to further validate and refine these findings.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Evaluation of ML-based models for GWAS</title>
<p>The genetic architecture of the traits TKW and PH in wheat is clearly more complex than can be captured by purely linear, single marker models. In order to account for the well&#x2010;known limitations of traditional GWAS, namely their inability to capture non-linear and epistatic interactions, we employed ML-mediated GWAS to complement conventional methods. <xref ref-type="bibr" rid="B111">Yoosefzadeh-Najafabadi et&#xa0;al. (2022)</xref> demonstrated that ML-based GWAS can enhance the detection of MTAs in soybean, providing a broader and more comprehensive genetic insight than conventional methods alone.</p>
<p>Unlike traditional models, ML approaches do not estimate direct allele substitution effects. Instead, we focused on their capacity to identify MTAs, QTLs, and candidate genes. To further assess their predictive performance, we evaluated overall model fit using RMSE, where XGB outperformed other models for both traits, PH and TKW. TSLRF was developed with the same reasoning as MLMs, that control for population structure and polygenic background, which is crucial to reduce false positives, i.e. importance rankings of SNPs (<xref ref-type="bibr" rid="B73">Price et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B89">Sun et&#xa0;al., 2019</xref>). Previous work by <xref ref-type="bibr" rid="B87">Stephan et&#xa0;al. (2015)</xref> showed that RF and Lasso correcting for population structure outperformed their unadjusted counterparts. However, this stringent control might come at the expense of power, which is reflected in high false negative rates of MLM (<xref ref-type="bibr" rid="B52">Liu et&#xa0;al., 2016</xref>). This trade-off was evident in our results: for TKW, TSLRF only detected markers on chromosomes 2A and 6D, consistent with MLM findings. Notably, eight of the ten most important markers overlapped with MTAs from other models, suggesting that a less stringent cutoff might reveal additional meaningful associations, similar to traditional approaches. For PH no marker was significantly associated which is consistent with some MLM approaches used in this study and further supports the notion that stringent control may decrease detection power in traits with more complex architectures.</p>
<p>In contrast, RF without a control for population structure and relatedness, identified a larger number of MTAs. This notable difference to TSLRF indicates that some of the MTAs detected may partly reflect confounding due to population structure. However, when investigating marker informativeness along the PCs this does not seem to be the dominant cause. Another factor could be that, unlike EN and XGB, RF lacks feature regularization resulting in correlated markers receiving similar importance scores. This is more likely, especially because most MTAs identified in RF cluster at Chromosome 6A for TKW and 5A and 7A in PH. Investigation of these clusters revealed that most of the markers are in strong LD, thereby inflating the number of associations. This discrepancy to EN and XGB suggests that regularization might be very important for ML-based GWAS approaches. Despite these limitations, the unadjusted RF model was able to detect MTAs with showed strong biological relevance, with a pipeline that better accounts for correlated features, RF might be a powerful algorithm for GWAS. Previous studies showed the high potential of RF in detecting MTAs: both, RF and gradient boosting machines identified regions with multiple candidate genes and some previously not reported in cattle (<xref ref-type="bibr" rid="B3">Alves et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B51">Li et&#xa0;al., 2018</xref>). In soy, RF detected more regions compared to traditional models (MLM and FarmCPU) for yield component traits, with no overlap between the approaches (<xref ref-type="bibr" rid="B111">Yoosefzadeh-Najafabadi et&#xa0;al., 2022</xref>). In contrast, our study revealed partial overlap between RF and traditional approaches, suggesting that while RF uncovers additional loci, it can still capture some of the core signals detected by traditional GWAS methods.</p>
<p>XGB, designed for high-dimensional and complex data, is well-suited for GWAS applications. <xref ref-type="bibr" rid="B33">Gill et&#xa0;al. (2022)</xref> showed that XGB not only outperformed deep learning architectures on most traits but also detected loci associated with flower and seed coat color that are supported by previous research. For complex traits with non-additive gene action gradient boosting performed better than traditional RF and deep learning architectures (<xref ref-type="bibr" rid="B1">Abdollahi-Arpanahi et&#xa0;al., 2020</xref>). In our study, compared to mixed&#x2010;model single&#x2010;marker tests, XGB identified more MTAs, but found fewer than multi&#x2010;locus methods like FarmCPU and BLINK. Due to efficient regularization XGB was able to better able to rank markers compared to RF. For TKW, 13 MTAs were detected, seven of which overlapped with those detected by RF. Notably, three MTAs were exclusively identified by these ML algorithms, two of which were linked to biologically plausible gene candidates, highlighting the potential of ML to uncover possible non&#x2010;linear or epistatic interactions overlooked in single&#x2010;marker mixed models.</p>
<p>The regularized regression model GLMnet is commonly used for feature ranking (<xref ref-type="bibr" rid="B21">Dagasso et&#xa0;al., 2021</xref>) or feature selection (<xref ref-type="bibr" rid="B18">Cho et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B95">Waldmann et&#xa0;al., 2013</xref>). <xref ref-type="bibr" rid="B40">John et&#xa0;al. (2022)</xref> compared several ML algorithms on prediction and feature selection performance and showed that EN performed as one of the best selection algorithms with good prediction performance outperforming other ML algorithms. However, EN reliance on regularization can lead to unstable features and may fail to select all informative features. To address this, <xref ref-type="bibr" rid="B63">Meinshausen and B&#xfc;hlmann (2010)</xref> proposed stability selection, a procedure that aggregates selection results across subsamples to improve consistency and control false discoveries. <xref ref-type="bibr" rid="B2">Alexander and Lange (2011)</xref> investigated this process on the Wellcome Trust Case-Control Consortium human dataset, however, in this case Lasso was used instead of EN, finding that stability selection effectively controls the family-wise error rate but suffers from a loss of power. Nevertheless, in our study, we implemented stability selection alongside EN because the ridge component of EN preserves groups of correlated predictors, resulting in a less sparse model, while stability selection further improves the robustness and interpretability of our marker rankings. Although EN does not model non&#x2010;linear or interaction terms directly, it uncovered several distinct MTAs linked to genes associated with the traits. For PH, a notable example was S7A_22981667, which neither tree&#x2010;based methods nor MLMs detected. These unique SNPs likely represent additive effects that were too subtle to surpass genome-wide thresholds in single-marker MLMs yet survived the regularization and stability&#x2010;selection process.</p>
<p>Overall, ML-based approaches recovered core MTAs identified by traditional GWAS while differing in their sensitivity to additional loci, with overlap patterns strongly dependent on the specific algorithm and trait. Interpretations involving MLM-based comparisons should nevertheless be made cautiously, as the small number of detected MTAs resulted in sparse contingency tables and limited power, despite the use of a continuity-corrected McNemar&#x2019;s test.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Functional characterization of shared MTAs identified by traditional and ML-based GWAS</title>
<p>To assess the added value of using ML approaches for GWAS and candidate gene discovery, we examined the MTAs previously identified by ML approaches for their biological function relevant for the analyzed traits. After removing MTAs in strong LD, we found 21 markers with relevant genes, 11 for TKW and ten for PH. First, we will discuss those MTAs that were found in traditional methods and further validated using ML-based GWAS approaches.</p>
<p>The TKW-associated marker S2A_143724068 was identified by all models except RF and XGB and points to the candidate gene <italic>TraesCS2A02G183900</italic>. This gene was previously reported by <xref ref-type="bibr" rid="B55">Liu et&#xa0;al. (2022)</xref> as an auxin-responsive differentially expressed gene involved in phytohormone metabolism and signaling in pairwise near-isogenic lines. Auxin plays a critical role in lateral root initiation and development. Root spatial configuration in the soil is recognized as a promising strategy to enhance crop yield (<xref ref-type="bibr" rid="B57">Lynch, 2022</xref>). Following this reasoning it might be a promising candidate for TKW. Marker S3D_181564815 (FarmCPU, BLINK, EN) co-localizes with three candidate genes potentially influencing TKW. The first one, <italic>TraesCS3D02G191300</italic> encodes an expansin, a cell-wall&#x2013;loosening protein promoting cell expansion in growing tissues such as the developing endosperm. At the molecular level, expansin expression is a key determinant of grain cell enlargement and thus final kernel size. Although 241 expansins have been annotated in wheat, only a subset has been functionally characterized, with some linked to grain traits (<xref ref-type="bibr" rid="B68">Mira et&#xa0;al., 2023</xref>). <italic>TraesCS3D02G189900</italic>, a Class III peroxidase, modulates cell-wall stiffening and has been associated with spikelet traits inversely related to TKW. Overexpression of the related gene <italic>TaPRX-2A</italic> reduced grain number per spike and spike length, indicating potential pleiotropic effects (<xref ref-type="bibr" rid="B113">Zang et&#xa0;al., 2025</xref>). Additionally, <italic>TraesCS3D02G189500</italic>, a putative nitrate transporter (NPF family), is implicated in nitrogen use efficiency which is a key determinant of grain filling and starch accumulation (<xref ref-type="bibr" rid="B45">Kumar et&#xa0;al., 2022</xref>). The gene <italic>TraesCS5A02G359900</italic>, annotated as a serine/threonine protein phosphatase, was the most likely candidate for marker S5A_561661320 (Blink, RF and XGB). <xref ref-type="bibr" rid="B38">Hu et&#xa0;al. (2012)</xref> identified a serine/threonine protein phosphatase gene as a candidate after fine mapping of a grain length, grain width and TKW QTL in rice. A promising candidate for marker S6A_481736675 (FarmCPU, RF, XGB) is <italic>TraesCS6A02G259000</italic>, a MADS-box transcription factor serving as a candidate gene for grain size and weight in <italic>Triticum turgidum</italic> (<xref ref-type="bibr" rid="B61">Mangini et&#xa0;al., 2025</xref>). Marker S6A_484065257 (FarmCPU, RF, XGB) lies near <italic>TraesCS6A02G260600</italic> and <italic>TraesCS6A02G260500</italic> that share a seed&#x2212;storage helical domain and are annotated for lipid transport (GO:0006869). These non-specific lipid transfer proteins have been linked to cuticle formation and seed coat integrity and knock-out plants in rice showed decreased TKW and grain size (<xref ref-type="bibr" rid="B49">Li et&#xa0;al., 2023b</xref>). Located near marker S6A_531394936 (BLINK, RF, XGB) is <italic>GS1</italic> (<italic>TraesCS6A02G298100</italic>), a central enzyme in the glutamine synthetase (GS)/glutamate synthase (GOGAT) cycle which is involved in N assimilation and represents a crucial metabolic step in regulating nitrogen use efficiency and grain yield (<xref ref-type="bibr" rid="B28">Fortunato et&#xa0;al., 2023</xref>). Additional studies on the GS1 gene family in several crops have shown that they play different roles in determining seed size and number, grain filling and development (<xref ref-type="bibr" rid="B91">Tabuchi et&#xa0;al., 2005</xref>; <xref ref-type="bibr" rid="B9">Bernard et&#xa0;al., 2008</xref>; <xref ref-type="bibr" rid="B28">Fortunato et&#xa0;al., 2023</xref>). In the LD interval also lie <italic>TraesCS6A02G296500</italic> (<xref ref-type="bibr" rid="B62">Manjunath et&#xa0;al., 2023</xref>) and <italic>TraesCS6A02G296400</italic> (<xref ref-type="bibr" rid="B22">Devate et&#xa0;al., 2023</xref>), which have been reported as QTLs for TKW. Marker S6D_82630628 (GCTA, GEMMA, sommer, RF, XGB, and EN) falls directly on <italic>TraesCS6D02G116200</italic> (<italic>TaPRX-2A</italic>), encoding a Class III Peroxidase gene previously shown to regulate grain number per spike in common wheat (<xref ref-type="bibr" rid="B113">Zang et&#xa0;al., 2025</xref>). Because grain number per spike and TKW are often negatively correlated, variation in <italic>TaPRX-2A</italic> may indirectly influence TKW. Similarly, <italic>TraesCS7A02G136600</italic>, a COBRA-like protein showing a range of functions and participate in various developmental processes in cereals. In rice, for example, it has been associated with QTLs for grain number per spike (<xref ref-type="bibr" rid="B69">Mizuno et&#xa0;al., 2021</xref>). While this gene also showed no direct correlation with TKW, the study highlighted the complex genetic architecture in spike traits. S7A_90336792 linked with <italic>TraesCS7A02G136600</italic> was significantly associated with FarmCPU, RF and XGB.</p>
<p>Regarding the trait PH, the marker S5A_563358779 (FarmCPU, BLINK, XGB, EN), which corresponds to a cluster of Cytochrome P450 (CYP450) genes (<italic>TraesCS5A02G362400</italic>, <italic>TraesCS5A02G361800</italic>, <italic>TraesCS5A02G361700</italic>, <italic>TraesCS5A02G363000</italic>, <italic>TraesCS5A02G362900</italic>, <italic>TraesCS5A02G362600</italic>, <italic>TraesCS5A02G362700</italic>), has been detected in our study. Some members of the CYP450 family play a key role in brassinosteroid biosynthesis, which influences plant height through the regulation of cell elongation. The biosynthesis and catabolism of brassinosteroids rely on the enzymatic activity of several CYP450 subfamilies (<xref ref-type="bibr" rid="B50">Li and Wei, 2020</xref>). Manipulating the expression of BR-related genes can result in pleiotropic effects, some of which may be undesirable in crop breeding. For example, changes in BR pathways often lead to altered plant architecture, including reduced or excessive plant height (<xref ref-type="bibr" rid="B104">Yamamuro et&#xa0;al., 2000</xref>). Furthermore, the markers S1B_568961025 (FarmCPU, BLINK, RF) and S7A_403940100 (BLINK, FarmCPU, MLMM, sommer, TASSEL_MLM, RF) lie close to a Growth-Regulating Factor (GRF) and zinc finger GRF, respectively. GRFs are plant-specific transcription factors known to play critical roles in plant growth, development, and responses to environmental stress (<xref ref-type="bibr" rid="B103">Wu et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B94">Van Der Knaap et&#xa0;al., 2000</xref>; <xref ref-type="bibr" rid="B5">Bazhenov et&#xa0;al., 2021</xref>), demonstrating the essential role of GRFs as candidate genes for PH.</p>
<p>The high amount of overlapping markers shows ML-driven GWAS can effectively validate and prioritize potential MTAs that are detected in traditional methods like FarmCPU and BLINK, which show a higher power to detect associations at a cost of more false positives. However, in addition to MTAs that co&#x2010;localize with genes of known function, we also identified several associations without any obvious candidate annotation. Two of these unannotated loci stood out for their consistency across methods: S6D_241296319 associated with TKW was identified by all approaches and S7A_335064522, associated with PH detected by six methods. These findings reflect the fact that GWAS is only as informative as the available functional annotation, and without high&#x2010;confidence gene models or known pathways, the biological interpretation remains uncertain.</p>
<p>Notably, the marker S2B_565068485, which was detected by FarmCPU, BLINK, and XGB, showed no obvious candidate genes in its vicinity but instead exhibited a signal pattern more closely aligned with population structure, suggesting potential confounding. A similar pattern was observed for S4A_679187682, identified by BLINK and both tree-based learners. These findings indicate that ML-based GWAS can be susceptible to residual confounding when stratification is not accounted for. However, some markers, for instance, S6D_143959985 or S6D_241296319 identified by most approaches applied in this study including the conservative MLM showed similar patterns. A likely conclusion might be that at least some of these signals may reflect genuine selection-driven associations rather than structural or noise-related artifacts.</p>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Unique MTAs revealed by ML-based approaches</title>
<p>While the overlap between ML-based and traditional GWAS highlights the robustness of the detected MTAs, we now focus on the ability of ML algorithms to detect additional, previously overlooked associations to extend and enrich conventional discovery pipelines, building on prior studies that have already demonstrated this potential (<xref ref-type="bibr" rid="B67">Mieth et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B3">Alves et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B33">Gill et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B32">Ghose et&#xa0;al., 2024</xref>).</p>
<p>Tree-based ML algorithms uncovered three novel MTAs that were not detected by traditional GWAS. For TKW, a candidate behind S2A_507030190 (RF, XGB) is <italic>TraesCS2A02G294600</italic>, blue&#x2010;light inhibitor of cryptochromes <italic>1 (BIC1)</italic>, functioning as a transcriptional coactivator that promotes brassinosteroid signaling and plant growth (<xref ref-type="bibr" rid="B110">Yang et al. 2021b</xref>). Since brassinosteroids might directly influence grain size through cell&#x2010;expansion pathways, this MTA represents a physiologically plausible contributor to TKW variation. For PH, S5A_519892082, found by RF and XGB, and S4D_455321401, solely found by XGB, showed several promising genes in their proximity: the first marker is near <italic>TraesCS5A02G307300</italic>, a PHD-type domain-containing protein, playing an important role in plant growth, development and response to abiotic stress (<xref ref-type="bibr" rid="B72">Pang et&#xa0;al., 2022</xref>) and two CYP450 genes (<italic>TraesCS5A02G307700</italic>, <italic>TraesCS5A02G307600</italic>) linked to PH. The second marker is associated with TraesCS4D02G283000, a MYB transcription factor, playing important roles in plant development, including internode elongation and stem architecture. Although relatively few studies have directly linked MYB transcription factors to stem development, available evidence suggests their involvement in regulating stem height and strength. For instance, overexpression of <italic>TaMYB18s</italic> led to a pronounced leaf rolling phenotype, a phenotype also related to drought stress (<xref ref-type="bibr" rid="B42">Kadioglu and Terzi, 2007</xref>), and a significant reduction in plant height in wheat (<xref ref-type="bibr" rid="B114">Zhang et&#xa0;al., 2016</xref>). Similarly, <italic>GmGAMYB</italic> has been shown to promote stem elongation via the gibberellin signaling pathway in soybean (<xref ref-type="bibr" rid="B108">Yang et&#xa0;al., 2021a</xref>), while <italic>OsMPH1</italic> acts as a positive regulator of stem height in rice (<xref ref-type="bibr" rid="B116">Zhang et&#xa0;al., 2017</xref>). These findings support the potential involvement of MYB-related genes in regulating plant height across different species. On the other hand, by applying EN, most MTAs for TKW overlapped with markers that were found with traditional methods, while also finding two unique markers. One of them, S1A_540136972, is located near the gene <italic>TraesCS1A02G357800</italic>, which is involved in the regulation of flower development (GO:0009909). This gene belongs to the AP2/ERF transcription factor family, previously shown to increase grain yield due to regulation of inflorescence architecture and grain number per spike in wheat and soybean (<xref ref-type="bibr" rid="B96">Wang et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B48">Li et&#xa0;al., 2023a</xref>). The increase of grain number per spike is often linked with a decrease in grain weight directly influencing TKW.</p>
<p>For PH, EN uniquely identified 14 MTAs; out of this, the following five were associated to the candidate genes S1B_670929735, S3A_646672662, S4A_4593612, S7A_22981667, and S7B_695381826. The first one is located near a cluster of S-type anion channel genes (SLAH2 family: <italic>TraesCS1B02G456200</italic>, <italic>TraesCS1B02G456300</italic>, <italic>TraesCS1B02G456400</italic>, and <italic>TraesCS1B02G456500</italic>), which are associated with nitrate transport (<xref ref-type="bibr" rid="B45">Kumar et&#xa0;al., 2022</xref>) and regulation of nitrate flux between root and shoot (<xref ref-type="bibr" rid="B59">Maierhofer et&#xa0;al., 2014</xref>), thereby potentially influencing overall plant growth and development, including height. The other marker of relevance is S3A_646672662, which is located near <italic>TraesCS3A02G399800</italic>, a homologue of <italic>GA20ox-D3</italic>, which is catalyzing a late step in gibberellin biosynthesis and is responsible for unidimensional cell growth (<xref ref-type="bibr" rid="B74">Qin et&#xa0;al., 2013</xref>). This gene underlies classic &#x201c;Green Revolution&#x201d; height phenotypes in rice and wheat (sd1/Rht) and controls stem elongation in legumes (<xref ref-type="bibr" rid="B86">Spielmeyer et&#xa0;al., 2002</xref>). The other marker, S4A_4593612, encodes ABC transporter B family member 19, which is well-characterized as a major auxin exporter in <italic>Arabidopsis</italic>. The phytohormone auxin plays a critical role in plant development, in cell growth, division, and expansion (<xref ref-type="bibr" rid="B117">Zhao et&#xa0;al., 2013</xref>). This could directly influence the plant height in wheat, making this a promising candidate gene. Furthermore, S7A_22981667 is located close to the gene <italic>GRF5-A1</italic> belonging to the GRFs, which have been mentioned already in the chapter above. And finally, S7B_695381826 is located close to <italic>TraesCS7B02G426100</italic> which is encoding a F-box domain protein, known to be relevant for various functions (<xref ref-type="bibr" rid="B75">Rameneni et&#xa0;al., 2018</xref>).</p>
<p>Collectively, these novel ML-derived MTAs enrich our understanding of the genetic architecture of TKW and PH by revealing hormone-related, nutrient-transport, and protein-turnover genes that traditional GWAS alone failed to detect. Especially, penalized models efficiently rank markers and show promise as ML-based GWAS approaches. Moreover, several of the MTAs mapped to genes involved in biotic and abiotic stress responses, pathways we did not explicitly include, but which can profoundly influence resource allocation, developmental timing, and ultimately affecting grain weight (TKW) and stem elongation (PH) under field conditions.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>Machine Learning (ML) offers a powerful complement to traditional GWAS methodologies for plant genomics, as demonstrated in our study using a large-scale, real-world wheat dataset. While mixed linear models remain robust for correcting population structure and controlling false positives, they often miss complex, non-additive genetic signals. ML methods address this limitation by recapitulating key loci detected by conventional approaches, but also by uncovering novel and biologically plausible associations. These findings illustrate how tree-based and penalized regression models can detect even subtle and potentially epistatic effects that evade detection by traditional GWAS tools. Moreover, ML methods offer scalable, interpretable, and model-agnostic importance measures, rendering them especially valuable for high-dimensional plant genomic datasets. By systematically comparing traditional and ML-based GWAS tools, we provide actionable insights into their performance, usability, and biological relevance, advocating for the integration of ML into routine breeding workflows to enhance trait dissection and accelerate marker-assisted selection under complex genomic architectures. Instead of recommending one tool (e.g., the one with the most MTAs) or one method (e.g., the fastest), we have shown that a mix of methods and tools provides a more complete overview, allows a core set of MTAs to be defined, and enables tool/method-specific MTAs to be characterized, which have the potential to yield new insights.</p>
<p>However, several limitations of this study should be considered. First, the phenotypic data come from a large, publicly available breeding dataset and may contain noise from environmental variation, measurement error, and unmodeled genotype-by-environment (G&#xd7;E) interactions, potentially reducing statistical power to detect MTAs. Second, while ML models capture non-linear effects, they are prone to overfitting in high-dimensional genomic settings. Third, applying and comparing multiple GWAS models introduces an additional layer of multiple testing at the method-comparison level, which may inflate the probability of false-positive findings. To mitigate this, we emphasized overlap across methods and focused on biologically plausible MTAs supported by functional annotation. Our results also differ from <xref ref-type="bibr" rid="B41">Juliana et&#xa0;al. (2019)</xref>, likely due to methodological differences, including a lenient Bonferroni significance threshold (&#x3b1; = 0.20) and SNP filtering and imputation strategies retaining low-frequency alleles. Nevertheless, we replicated five loci reported by <xref ref-type="bibr" rid="B41">Juliana et&#xa0;al. (2019)</xref>: three for PH (S3B_757480752 via BLINK, FarmCPU, RF, XGB, S6A_484916096 via BLINK only, and S7A_403940100 via BLINK, FarmCPU, MLMM, sommer, Tassel, RF), and one for TKW (S6B_583334052 via FarmCPU), and one locus that they associated with TKW, but we found a significant association for PH (S6A_553941375 via BLINK). In contrast, we did not replicate MTAs at the GS5-3A locus, which has been linked to increased kernel size (<xref ref-type="bibr" rid="B58">Ma et&#xa0;al., 2016</xref>), or the <italic>Tasus</italic> gene on chromosome 7B.</p>
<p>To fully harness ML&#x2019;s potential for GWAS, future research must integrate technical innovation with domain expertise. Hybrid analytical frameworks that incorporate interaction effects and biological priors, such as gene regulatory networks, functional ontologies, or chromatin accessibility data, could improve model interpretability and guide learning. Ensemble strategies combining traditional statistical GWAS, ML, and causal inference may further disentangle signals from noise, addressing missing heritability and extending applicability to complex dynamic traits and G&#xd7;E interactions through multi-environmental and longitudinal phenotyping. Furthermore, follow-up analysis of the top consensus markers and interesting ML-derived MTAs, including investigation of potential interaction effects, SHAP-based estimation of model-specific effect sizes, and subsequent fine-mapping and haplotype-level characterization, will be essential to validate these loci and to distinguish true causal variants from markers identified through correlation or linkage alone. Likewise, extending ML-GWAS to explicitly capture genotype-by-environment interactions will enable the identification of environment-specific and stable alleles, thereby strengthening selection decisions and supporting the development of resilient cultivars across variable production environments.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <uri xlink:href="https://www.nature.com/articles/s41588-019-0496-6">https://www.nature.com/articles/s41588-019-0496-6</uri>.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>JM: Writing &#x2013; review &amp; editing, Writing &#x2013; original draft, Conceptualization, Investigation, Formal analysis, Methodology, Visualization, Data curation. SM: Methodology, Writing &#x2013; review &amp; editing. AB: Writing &#x2013; review &amp; editing, Methodology. AH: Methodology, Supervision, Writing &#x2013; review &amp; editing. EM: Conceptualization, Writing &#x2013; review &amp; editing, Supervision, Visualization, Project administration, Funding acquisition.</p></sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. During the preparation of this work, the authors used ChatGPT Business (GPT-4o/5 model), LeChat Pro and DeepL to improve the language and readability, as well as for spelling check. After using these tools, the authors reviewed the output and edited the content as needed and take full responsibility for the content of the publication.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2025.1734247/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2025.1734247/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Presentation1.pptx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.presentationml.presentation"/>
<supplementary-material xlink:href="Presentation2.pptx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.presentationml.presentation"/>
<supplementary-material xlink:href="Table1.xlsx" id="ST1" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Table2.xlsx" id="ST2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Table3.xlsx" id="ST3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Abdollahi-Arpanahi</surname> <given-names>R.</given-names></name>
<name><surname>Gianola</surname> <given-names>D.</given-names></name>
<name><surname>Pe&#xf1;agaricano</surname> <given-names>F.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Deep learning versus parametric and ensemble methods for genomic prediction of complex phenotypes</article-title>. <source>Genet. Selection Evol.</source> <volume>52</volume>, <fpage>12</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-020-00531-z</pub-id>, PMID: <pub-id pub-id-type="pmid">32093611</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Alexander</surname> <given-names>D. H.</given-names></name>
<name><surname>Lange</surname> <given-names>K.</given-names></name>
</person-group> (<year>2011</year>). 
<article-title>Stability selection for genome-wide association</article-title>. <source>Genet. Epidemiol.</source> <volume>35</volume>, <fpage>722</fpage>&#x2013;<lpage>728</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/gepi.20623</pub-id>, PMID: <pub-id pub-id-type="pmid">22009793</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Alves</surname> <given-names>A. A. C.</given-names></name>
<name><surname>da Costa</surname> <given-names>R. M.</given-names></name>
<name><surname>Fonseca</surname> <given-names>L. F. S.</given-names></name>
<name><surname>Carvalheiro</surname> <given-names>R.</given-names></name>
<name><surname>Ventura</surname> <given-names>R. V.</given-names></name>
<name><surname>Rosa</surname> <given-names>G. J.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>A random forest-based genome-wide scan reveals fertility-related candidate genes and potential inter-chromosomal epistatic regions associated with age at first calving in Nellore cattle</article-title>. <source>Front. Genet.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fgene.2022.834724</pub-id>, PMID: <pub-id pub-id-type="pmid">35692843</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Arloth</surname> <given-names>J.</given-names></name>
<name><surname>Eraslan</surname> <given-names>G.</given-names></name>
<name><surname>Andlauer</surname> <given-names>T. F. M.</given-names></name>
<name><surname>Martins</surname> <given-names>J.</given-names></name>
<name><surname>Iurato</surname> <given-names>S.</given-names></name>
<name><surname>K&#xfc;hnel</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>DeepWAS: Multivariate genotype-phenotype associations by directly integrating regulatory information using deep learning</article-title>. <source>PloS Comput. Biol.</source> <volume>16</volume> (<issue>2</issue>), <elocation-id>e1007616</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/JOURNAL.PCBI.1007616</pub-id>, PMID: <pub-id pub-id-type="pmid">32012148</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bazhenov</surname> <given-names>M. S.</given-names></name>
<name><surname>Chernook</surname> <given-names>A. G.</given-names></name>
<name><surname>Bespalova</surname> <given-names>L. A.</given-names></name>
<name><surname>Gritsay</surname> <given-names>T. I.</given-names></name>
<name><surname>Polevikova</surname> <given-names>N. A.</given-names></name>
<name><surname>Karlov</surname> <given-names>G. I.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Alleles of the GRF3-2A gene in wheat and their agronomic value</article-title>. <source>Int. J. Mol. Sci.</source> <volume>22</volume> (<issue>22</issue>), <elocation-id>12376</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms222212376</pub-id>, PMID: <pub-id pub-id-type="pmid">34830258</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name><surname>Becker</surname> <given-names>M.</given-names></name>
<name><surname>Lang</surname> <given-names>M.</given-names></name>
<name><surname>Richter</surname> <given-names>J.</given-names></name>
<name><surname>Bischl</surname> <given-names>B.</given-names></name>
<name><surname>Schalk</surname> <given-names>D.</given-names></name>
</person-group> (<year>2025</year>b).
<article-title>mlr3tuning: hyperparameter optimization for &#x2018;mlr3&#x2019;</article-title>. Available online at: <uri xlink:href="https://CRAN.R-project.org/package=mlr3tuning">https://CRAN.R-project.org/package=mlr3tuning</uri>.
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name><surname>Becker</surname> <given-names>M.</given-names></name>
<name><surname>Lang</surname> <given-names>M.</given-names></name>
<name><surname>Schratz</surname> <given-names>P.</given-names></name>
</person-group> (<year>2025</year>a).
<article-title>mlr3misc: helper functions for &#x2018;mlr3&#x2019;</article-title>. Available online at: <uri xlink:href="https://CRAN.R-project.org/package=mlr3misc">https://CRAN.R-project.org/package=mlr3misc</uri>.
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Benjamini</surname> <given-names>Y.</given-names></name>
<name><surname>Hochberg</surname> <given-names>Y.</given-names></name>
</person-group> (<year>1995</year>). 
<article-title>Controlling the false discovery rate: a practical and powerful approach to multiple testing</article-title>. <source>J. R. Stat. society: Ser. B (Methodological)</source> <volume>57</volume>, <fpage>289</fpage>&#x2013;<lpage>300</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.2517-6161.1995.tb02031.x</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bernard</surname> <given-names>S. M.</given-names></name>
<name><surname>M&#xf8;ller</surname> <given-names>A. L. B.</given-names></name>
<name><surname>Dionisio</surname> <given-names>G.</given-names></name>
<name><surname>Kichey</surname> <given-names>T.</given-names></name>
<name><surname>Jahn</surname> <given-names>T. P.</given-names></name>
<name><surname>Dubois</surname> <given-names>F.</given-names></name>
<etal/>
</person-group>. (<year>2008</year>). 
<article-title>Gene expression, cellular localisation and function of glutamine synthetase isozymes in wheat (Triticum aestivum L.)</article-title>. <source>Plant Mol. Biol.</source> <volume>67</volume>, <fpage>89</fpage>&#x2013;<lpage>105</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11103-008-9303-y</pub-id>, PMID: <pub-id pub-id-type="pmid">18288574</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Bischl</surname> <given-names>B.</given-names></name>
<name><surname>Binder</surname> <given-names>M.</given-names></name>
<name><surname>Lang</surname> <given-names>M.</given-names></name>
<name><surname>Pielok</surname> <given-names>T.</given-names></name>
<name><surname>Richter</surname> <given-names>J.</given-names></name>
<name><surname>Coors</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Hyperparameter optimization: Foundations, algorithms, best practices, and open challenges</article-title>. <source>WIREs Data Mining and Knowledge Discovery</source> <volume>13</volume> (<issue>2</issue>), <elocation-id>e1484</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/widm.1484</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Biswas</surname> <given-names>S.</given-names></name>
<name><surname>Scheinfeldt</surname> <given-names>L. B.</given-names></name>
<name><surname>Akey</surname> <given-names>J. M.</given-names></name>
</person-group> (<year>2009</year>). 
<article-title>Genome-wide insights into the patterns and determinants of fine-scale population structure in humans</article-title>. <source>Am. J. Hum. Genet.</source> <volume>84</volume>, <fpage>641</fpage>&#x2013;<lpage>650</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ajhg.2009.04.015</pub-id>, PMID: <pub-id pub-id-type="pmid">19442770</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Bonferroni</surname> <given-names>C.</given-names></name>
</person-group> (<year>1936</year>). <source>Teoria statistica delle classi e calcolo delle probabilita</source>
<publisher-name>Pubblicazioni del R istituto superiore di scienze economiche e commericiali di firenze</publisher-name>. <volume>8</volume>, <fpage>3</fpage>&#x2013;<lpage>62</lpage>.
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bradbury</surname> <given-names>P. J.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Kroon</surname> <given-names>D. E.</given-names></name>
<name><surname>Casstevens</surname> <given-names>T. M.</given-names></name>
<name><surname>Ramdoss</surname> <given-names>Y.</given-names></name>
<name><surname>Buckler</surname> <given-names>E. S.</given-names></name>
</person-group> (<year>2007</year>). 
<article-title>TASSEL: Software for association mapping of complex traits in diverse samples</article-title>. <source>Bioinformatics</source> <volume>23</volume>, <fpage>2633</fpage>&#x2013;<lpage>2635</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btm308</pub-id>, PMID: <pub-id pub-id-type="pmid">17586829</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>B&#xfc;hlmann</surname> <given-names>P.</given-names></name>
<name><surname>Van De Geer</surname> <given-names>S.</given-names></name>
</person-group> (<year>2011</year>). <source>Statistics for high-dimensional data</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: 
<publisher-name>Springer</publisher-name>).
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chang</surname> <given-names>C. C.</given-names></name>
<name><surname>Chow</surname> <given-names>C. C.</given-names></name>
<name><surname>Tellier</surname> <given-names>L. C.</given-names></name>
<name><surname>Vattikuti</surname> <given-names>S.</given-names></name>
<name><surname>Purcell</surname> <given-names>S. M.</given-names></name>
<name><surname>Lee</surname> <given-names>J. J.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Second-generation PLINK: rising to the challenge of larger and richer datasets</article-title>. <source>Gigascience</source> <volume>4</volume>, <fpage>s13742</fpage>&#x2013;<lpage>s13015</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13742-015-0047-8</pub-id>, PMID: <pub-id pub-id-type="pmid">25722852</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chang-Brahim</surname> <given-names>I.</given-names></name>
<name><surname>Koppensteiner</surname> <given-names>L. J.</given-names></name>
<name><surname>Beltrame</surname> <given-names>L.</given-names></name>
<name><surname>Bodner</surname> <given-names>G.</given-names></name>
<name><surname>Saranti</surname> <given-names>A.</given-names></name>
<name><surname>Salzinger</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Reviewing the essential roles of remote phenotyping, GWAS and explainable AI in practical marker-assisted selection for drought-tolerant winter wheat breeding</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1319938</pub-id>, PMID: <pub-id pub-id-type="pmid">38699541</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>T.</given-names></name>
<name><surname>He</surname> <given-names>T.</given-names></name>
<name><surname>Benesty</surname> <given-names>M.</given-names></name>
<name><surname>Khotilovich</surname> <given-names>V.</given-names></name>
<name><surname>Tang</surname> <given-names>Y.</given-names></name>
<name><surname>Cho</surname> <given-names>H.</given-names></name>
<etal/>
</person-group> (<year>2025</year>). <source>xgboost: eXtreme Gradient Boosting</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.32614/CRAN.package.xgboost</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cho</surname> <given-names>S.</given-names></name>
<name><surname>Kim</surname> <given-names>K.</given-names></name>
<name><surname>Kim</surname> <given-names>Y. J.</given-names></name>
<name><surname>Lee</surname> <given-names>J. K.</given-names></name>
<name><surname>Cho</surname> <given-names>Y. S.</given-names></name>
<name><surname>Lee</surname> <given-names>J. Y.</given-names></name>
<etal/>
</person-group>. (<year>2010</year>). 
<article-title>Joint identification of multiple genetic variants via elastic-net variable selection in a genome-wide association analysis</article-title>. <source>Ann. Hum. Genet.</source> <volume>74</volume>, <fpage>416</fpage>&#x2013;<lpage>428</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1469-1809.2010.00597.x</pub-id>, PMID: <pub-id pub-id-type="pmid">20642809</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cho</surname> <given-names>S.</given-names></name>
<name><surname>Kim</surname> <given-names>H.</given-names></name>
<name><surname>Oh</surname> <given-names>S.</given-names></name>
<name><surname>Kim</surname> <given-names>K.</given-names></name>
<name><surname>Park</surname> <given-names>T.</given-names></name>
</person-group> (<year>2009</year>). 
<article-title>Elastic-net regularization approaches for genome-wide association studies of rheumatoid arthritis</article-title>. <source>BMC Proc.</source> <volume>3</volume> (<supplement>Suppl 7</supplement>), <fpage>S25</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1753-6561-3-s7-s25</pub-id>, PMID: <pub-id pub-id-type="pmid">20018015</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Covarrubias-Pazaran</surname> <given-names>G.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>Genome-Assisted prediction of quantitative traits using the r package sommer</article-title>. <source>PLoS One</source> <volume>11</volume> (<issue>6</issue>), <elocation-id>e0156744</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0156744</pub-id>, PMID: <pub-id pub-id-type="pmid">27271781</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dagasso</surname> <given-names>G.</given-names></name>
<name><surname>Yan</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>L.</given-names></name>
<name><surname>Li</surname> <given-names>L.</given-names></name>
<name><surname>Kutcher</surname> <given-names>R.</given-names></name>
<name><surname>Zhang</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Leveraging machine learning to advance genome-wide association studies</article-title>. <source>Int. J. Data Min. Bioinf.</source> <volume>25</volume>, <fpage>17</fpage>&#x2013;<lpage>36</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1504/IJDMB.2021.116881</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Devate</surname> <given-names>N. B.</given-names></name>
<name><surname>Krishna</surname> <given-names>H.</given-names></name>
<name><surname>Mishra</surname> <given-names>C. N.</given-names></name>
<name><surname>Manjunath</surname> <given-names>K. K.</given-names></name>
<name><surname>Sunilkumar</surname> <given-names>V. P.</given-names></name>
<name><surname>Chauhan</surname> <given-names>D.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Genetic dissection of marker trait associations for grain micro-nutrients and thousand grain weight under heat and drought stress conditions in wheat</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.1082513</pub-id>, PMID: <pub-id pub-id-type="pmid">36726675</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dyer</surname> <given-names>S. C.</given-names></name>
<name><surname>Austine-Orimoloye</surname> <given-names>O.</given-names></name>
<name><surname>Azov</surname> <given-names>A. G.</given-names></name>
<name><surname>Barba</surname> <given-names>M.</given-names></name>
<name><surname>Barnes</surname> <given-names>I.</given-names></name>
<name><surname>Barrera-Enriquez</surname> <given-names>V. P.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Ensembl 2025</article-title>. <source>Nucleic Acids Res.</source> <volume>53</volume>, <fpage>D948</fpage>&#x2013;<lpage>D957</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkae1071</pub-id>, PMID: <pub-id pub-id-type="pmid">39656687</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Efron</surname> <given-names>B.</given-names></name>
<name><surname>Hastie</surname> <given-names>T.</given-names></name>
<name><surname>Johnstone</surname> <given-names>I.</given-names></name>
<name><surname>Tibshirani</surname> <given-names>R.</given-names></name>
</person-group> (<year>2004</year>). 
<article-title>Least angle regression</article-title>. <source>The Annals of Statistics</source>. <volume>32</volume> (<issue>2</issue>), <fpage>407</fpage>&#x2013;<lpage>499</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1214/009053604000000067</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Elgart</surname> <given-names>M.</given-names></name>
<name><surname>Lyons</surname> <given-names>G.</given-names></name>
<name><surname>Romero-Brufau</surname> <given-names>S.</given-names></name>
<name><surname>Kurniansyah</surname> <given-names>N.</given-names></name>
<name><surname>Brody</surname> <given-names>J. A.</given-names></name>
<name><surname>Guo</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Non-linear machine learning models incorporating SNPs and PRS improve polygenic prediction in diverse human populations</article-title>. <source>Commun. Biol.</source> <volume>5</volume>, <fpage>856</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s42003-022-03812-z</pub-id>, PMID: <pub-id pub-id-type="pmid">35995843</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Enoma</surname> <given-names>D. O.</given-names></name>
<name><surname>Bishung</surname> <given-names>J.</given-names></name>
<name><surname>Abiodun</surname> <given-names>T.</given-names></name>
<name><surname>Ogunlana</surname> <given-names>O.</given-names></name>
<name><surname>Osamor</surname> <given-names>V. C.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Machine learning approaches to genome-wide association studies</article-title>. <source>J. King Saud Univ. - Sci.</source> <volume>34</volume>, <fpage>101847</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jksus.2022.101847</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Figueroa</surname> <given-names>M.</given-names></name>
<name><surname>Hammond-Kosack</surname> <given-names>K. E.</given-names></name>
<name><surname>Solomon</surname> <given-names>P. S.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>A review of wheat diseases&#x2014;a field perspective</article-title>. <source>Mol. Plant Pathol.</source> <volume>19</volume>, <fpage>1523</fpage>&#x2013;<lpage>1536</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/mpp.12618</pub-id>, PMID: <pub-id pub-id-type="pmid">29045052</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fortunato</surname> <given-names>S.</given-names></name>
<name><surname>Nigro</surname> <given-names>D.</given-names></name>
<name><surname>Lasorella</surname> <given-names>C.</given-names></name>
<name><surname>Marcotuli</surname> <given-names>I.</given-names></name>
<name><surname>Gadaleta</surname> <given-names>A.</given-names></name>
<name><surname>de Pinto</surname> <given-names>M. C.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>The role of glutamine synthetase (GS) and glutamate synthase (GOGAT) in the improvement of nitrogen use efficiency in cereals</article-title>. <source>Biomolecules</source> <volume>13</volume>, <elocation-id>1771</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/biom13121771</pub-id>, PMID: <pub-id pub-id-type="pmid">38136642</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Friedman</surname> <given-names>J.</given-names></name>
<name><surname>Hastie</surname> <given-names>T.</given-names></name>
<name><surname>Tibshirani</surname> <given-names>R.</given-names></name>
</person-group> (<year>2010</year>). 
<article-title>Regularization paths for generalized linear models via coordinate descent</article-title>. <source>J. Stat. Software</source> <volume>33</volume>, <fpage>1</fpage>&#x2013;<lpage>22</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18637/jss.v033.i01</pub-id>, PMID: <pub-id pub-id-type="pmid">20808728</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Galesloot</surname> <given-names>T. E.</given-names></name>
<name><surname>Van Steen</surname> <given-names>K.</given-names></name>
<name><surname>Kiemeney</surname> <given-names>L. A. L. M.</given-names></name>
<name><surname>Janss</surname> <given-names>L. L.</given-names></name>
<name><surname>Vermeulen</surname> <given-names>S. H.</given-names></name>
</person-group> (<year>2014</year>). 
<article-title>A comparison of multivariate genome-wide association methods</article-title>. <source>PLoS One</source> <volume>9</volume> (<issue>4</issue>), <elocation-id>e95923</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0095923</pub-id>, PMID: <pub-id pub-id-type="pmid">24763738</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Genc</surname> <given-names>Y.</given-names></name>
<name><surname>Taylor</surname> <given-names>J.</given-names></name>
<name><surname>Lyons</surname> <given-names>G.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Cheong</surname> <given-names>J.</given-names></name>
<name><surname>Appelbee</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Bread wheat with high salinity and sodicity tolerance</article-title>. <source>Front. Plant Sci</source>. <volume>10</volume>, <fpage>1280</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2019.01280</pub-id>, PMID: <pub-id pub-id-type="pmid">31695711</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ghose</surname> <given-names>U.</given-names></name>
<name><surname>Sproviero</surname> <given-names>W.</given-names></name>
<name><surname>Winchester</surname> <given-names>L.</given-names></name>
<name><surname>Amin</surname> <given-names>N.</given-names></name>
<name><surname>Zhu</surname> <given-names>T.</given-names></name>
<name><surname>Newby</surname> <given-names>D.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Genome-wide association neural networks identify genes linked to family history of Alzheimer&#x2019;s disease</article-title>. <source>Briefings Bioinf.</source> <volume>26</volume> (<issue>1</issue>), <elocation-id>bbae704</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbae704</pub-id>, PMID: <pub-id pub-id-type="pmid">39775791</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gill</surname> <given-names>M.</given-names></name>
<name><surname>Anderson</surname> <given-names>R.</given-names></name>
<name><surname>Hu</surname> <given-names>H.</given-names></name>
<name><surname>Bennamoun</surname> <given-names>M.</given-names></name>
<name><surname>Petereit</surname> <given-names>J.</given-names></name>
<name><surname>Valliyodan</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Machine learning models outperform deep learning models, provide interpretation and facilitate feature selection for soybean trait prediction</article-title>. <source>BMC Plant Biol.</source> <volume>22</volume>, <fpage>180</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12870-022-03559-z</pub-id>, PMID: <pub-id pub-id-type="pmid">35395721</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Grinberg</surname> <given-names>N. F.</given-names></name>
<name><surname>Orhobor</surname> <given-names>O. I.</given-names></name>
<name><surname>King</surname> <given-names>R. D.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>An evaluation of machine-learning for predicting phenotype: studies in yeast, rice, and wheat</article-title>. <source>Mach. Learn.</source> <volume>109</volume>, <fpage>251</fpage>&#x2013;<lpage>277</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10994-019-05848-5</pub-id>, PMID: <pub-id pub-id-type="pmid">32174648</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Guo</surname> <given-names>Y.</given-names></name>
<name><surname>Wu</surname> <given-names>C.</given-names></name>
<name><surname>Yuan</surname> <given-names>Z.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Liang</surname> <given-names>Z.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Gene-based testing of interactions using XGBoost in genome-wide association studies</article-title>. <source>Front. Cell Dev. Biol.</source> <volume>9</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fcell.2021.801113</pub-id>, PMID: <pub-id pub-id-type="pmid">34977040</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Guzman</surname> <given-names>C.</given-names></name>
<name><surname>Pe&#xf1;a</surname> <given-names>R. J.</given-names></name>
<name><surname>Singh</surname> <given-names>R.</given-names></name>
<name><surname>Autrique</surname> <given-names>E.</given-names></name>
<name><surname>Dreisigacker</surname> <given-names>S.</given-names></name>
<name><surname>Crossa</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2016</year>). 
<article-title>Wheat quality improvement at CIMMYT and the use of genomic selection on it</article-title>. <source>Applied and Translational Genomics</source> <volume>11</volume>, <fpage>3</fpage>&#x2013;<lpage>8</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atg.2016.10.004</pub-id>, PMID: <pub-id pub-id-type="pmid">28018844</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name><surname>Hastie</surname> <given-names>T.</given-names></name>
<name><surname>Efron</surname> <given-names>B.</given-names></name>
</person-group> (<year>2022</year>).
<article-title>lars: least angle regression, lasso and forward stagewise</article-title>. Available online at: <uri xlink:href="https://CRAN.R-project.org/package=lars">https://CRAN.R-project.org/package=lars</uri>.
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>Z.</given-names></name>
<name><surname>He</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>S.</given-names></name>
<name><surname>Sun</surname> <given-names>F.</given-names></name>
<name><surname>Xin</surname> <given-names>X.</given-names></name>
<name><surname>Wang</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2012</year>). 
<article-title>A Kelch motif-containing serine/threonine protein phosphatase determines the large grain QTL trait in rice</article-title>. <source>J. Integr. Plant Biol.</source> <volume>54</volume>, <fpage>979</fpage>&#x2013;<lpage>990</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/jipb.12008</pub-id>, PMID: <pub-id pub-id-type="pmid">23137285</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>M.</given-names></name>
<name><surname>Liu</surname> <given-names>X.</given-names></name>
<name><surname>Zhou</surname> <given-names>Y.</given-names></name>
<name><surname>Summers</surname> <given-names>R. M.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>BLINK: A package for the next level of genome-wide association studies with both individuals and markers in the millions</article-title>. <source>GigaScience</source> <volume>8</volume> (<issue>2</issue>), <elocation-id>giy154</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gigascience/giy154</pub-id>, PMID: <pub-id pub-id-type="pmid">30535326</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>John</surname> <given-names>M.</given-names></name>
<name><surname>Haselbeck</surname> <given-names>F.</given-names></name>
<name><surname>Dass</surname> <given-names>R.</given-names></name>
<name><surname>Malisi</surname> <given-names>C.</given-names></name>
<name><surname>Ricca</surname> <given-names>P.</given-names></name>
<name><surname>Dreischer</surname> <given-names>C.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>A comparison of classical and machine learning-based phenotype prediction methods on simulated data and three plant species</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.932512</pub-id>, PMID: <pub-id pub-id-type="pmid">36407627</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Juliana</surname> <given-names>P.</given-names></name>
<name><surname>Poland</surname> <given-names>J.</given-names></name>
<name><surname>Huerta-Espino</surname> <given-names>J.</given-names></name>
<name><surname>Shrestha</surname> <given-names>S.</given-names></name>
<name><surname>Crossa</surname> <given-names>J.</given-names></name>
<name><surname>Crespo-Herrera</surname> <given-names>L.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Improving grain yield, stress resilience and quality of bread wheat using large-scale genomics</article-title>. <source>Nat. Genet.</source> <volume>51</volume>, <fpage>1530</fpage>&#x2013;<lpage>1539</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41588-019-0496-6</pub-id>, PMID: <pub-id pub-id-type="pmid">31548720</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kadioglu</surname> <given-names>A.</given-names></name>
<name><surname>Terzi</surname> <given-names>R.</given-names></name>
</person-group> (<year>2007</year>). 
<article-title>A dehydration avoidance mechanism: leaf rolling</article-title>. <source>Botanical Rev.</source> <volume>73</volume>, <fpage>290</fpage>&#x2013;<lpage>302</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1663/0006-8101(2007)73[290:ADAMLR]2.0.CO;2</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kaler</surname> <given-names>A. S.</given-names></name>
<name><surname>Gillman</surname> <given-names>J. D.</given-names></name>
<name><surname>Beissinger</surname> <given-names>T.</given-names></name>
<name><surname>Purcell</surname> <given-names>L. C.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Comparing different statistical models and multiple testing corrections for association mapping in soybean and maize</article-title>. <source>Front. Plant Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2019.01794</pub-id>, PMID: <pub-id pub-id-type="pmid">32158452</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Korte</surname> <given-names>A.</given-names></name>
<name><surname>Farlow</surname> <given-names>A.</given-names></name>
</person-group> (<year>2013</year>). 
<article-title>The advantages and limitations of trait analysis with GWAS: A review</article-title>. <source>Plant Methods</source> <volume>9</volume>, <fpage>29</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1746-4811-9-29</pub-id>, PMID: <pub-id pub-id-type="pmid">23876160</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kumar</surname> <given-names>A.</given-names></name>
<name><surname>Sandhu</surname> <given-names>N.</given-names></name>
<name><surname>Kumar</surname> <given-names>P.</given-names></name>
<name><surname>Pruthi</surname> <given-names>G.</given-names></name>
<name><surname>Singh</surname> <given-names>J.</given-names></name>
<name><surname>Kaur</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Genome-wide identification and in silico analysis of NPF, NRT2, CLC and SLAC1/SLAH nitrate transporters in hexaploid wheat (Triticum aestivum)</article-title>. <source>Sci. Rep.</source> <volume>12</volume> (<issue>1</issue>), <fpage>11227</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-022-15202-w</pub-id>, PMID: <pub-id pub-id-type="pmid">35781289</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name><surname>Lang</surname> <given-names>M.</given-names></name>
<name><surname>Au</surname> <given-names>Q.</given-names></name>
<name><surname>Coors</surname> <given-names>S.</given-names></name>
<name><surname>Schratz</surname> <given-names>P.</given-names></name>
<name><surname>Becker</surname> <given-names>M.</given-names></name>
<name><surname>Zobolas</surname> <given-names>J.</given-names></name>
</person-group> (<year>2025</year>).
<article-title>mlr3learners: recommended learners for &#x2018;mlr3&#x2019;</article-title>. Available online at: <uri xlink:href="https://CRAN.R-project.org/package=mlr3learners">https://CRAN.R-project.org/package=mlr3learners</uri>.
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lang</surname> <given-names>M.</given-names></name>
<name><surname>Binder</surname> <given-names>M.</given-names></name>
<name><surname>Richter</surname> <given-names>J.</given-names></name>
<name><surname>Schratz</surname> <given-names>P.</given-names></name>
<name><surname>Pfisterer</surname> <given-names>F.</given-names></name>
<name><surname>Coors</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>mlr3: A modern object-oriented machine learning framework in R</article-title>. <source>J. Open Source Software</source> <volume>4</volume>, <elocation-id>1903</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.21105/joss.01903</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>H.</given-names></name>
<name><surname>Du</surname> <given-names>H.</given-names></name>
<name><surname>Huang</surname> <given-names>Z.</given-names></name>
<name><surname>He</surname> <given-names>M.</given-names></name>
<name><surname>Kong</surname> <given-names>L.</given-names></name>
<name><surname>Fang</surname> <given-names>C.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>a). 
<article-title>The AP2/ERF transcription factor TOE4b regulates photoperiodic flowering and grain yield per plant in soybean</article-title>. <source>Plant Biotechnol. J.</source> <volume>21</volume>, <fpage>1682</fpage>&#x2013;<lpage>1694</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/pbi.14069</pub-id>, PMID: <pub-id pub-id-type="pmid">37171033</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Guo</surname> <given-names>L.</given-names></name>
<name><surname>Cui</surname> <given-names>Y.</given-names></name>
<name><surname>Yan</surname> <given-names>X.</given-names></name>
<name><surname>Ouyang</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>S.</given-names></name>
</person-group> (<year>2023</year>b). 
<article-title>Lipid transfer protein, OsLTPL18, is essential for grain weight and seed germination in rice</article-title>. <source>Gene</source> <volume>883</volume>, <elocation-id>147671</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gene.2023.147671</pub-id>, PMID: <pub-id pub-id-type="pmid">37506985</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Wei</surname> <given-names>K.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Comparative functional genomics analysis of cytochrome P450 gene superfamily in wheat and maize</article-title>. <source>BMC Plant Biol.</source> <volume>20</volume>, <fpage>93</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12870-020-2288-7</pub-id>, PMID: <pub-id pub-id-type="pmid">32122306</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>B.</given-names></name>
<name><surname>Zhang</surname> <given-names>N.</given-names></name>
<name><surname>Wang</surname> <given-names>Y. G.</given-names></name>
<name><surname>George</surname> <given-names>A. W.</given-names></name>
<name><surname>Reverter</surname> <given-names>A.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Genomic prediction of breeding values using a subset of SNPs identified by three machine learning methods</article-title>. <source>Front. Genet.</source> <volume>9</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fgene.2018.00237</pub-id>, PMID: <pub-id pub-id-type="pmid">30023001</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>X.</given-names></name>
<name><surname>Huang</surname> <given-names>M.</given-names></name>
<name><surname>Fan</surname> <given-names>B.</given-names></name>
<name><surname>Buckler</surname> <given-names>E. S.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>Iterative usage of fixed and random effect models for powerful and efficient genome-wide association studies</article-title>. <source>PloS Genet.</source> <volume>12</volume> (2), <elocation-id>e1005767</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pgen.1005767</pub-id>, PMID: <pub-id pub-id-type="pmid">26828793</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Lin</surname> <given-names>Y.</given-names></name>
<name><surname>Gao</surname> <given-names>S.</given-names></name>
<name><surname>Li</surname> <given-names>Z.</given-names></name>
<name><surname>Ma</surname> <given-names>J.</given-names></name>
<name><surname>Deng</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2017</year>). 
<article-title>A genome-wide association study of 23 agronomic traits in Chinese wheat landraces</article-title>. <source>Plant J.</source> <volume>91</volume>, <fpage>861</fpage>&#x2013;<lpage>873</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/tpj.13614</pub-id>, PMID: <pub-id pub-id-type="pmid">28628238</pub-id>
</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>D.</given-names></name>
<name><surname>He</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Joshi</surname> <given-names>T.</given-names></name>
<name><surname>Xu</surname> <given-names>D.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Phenotype prediction and genome-wide association study using deep convolutional neural network of soybean</article-title>. <source>Front. Genet.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fgene.2019.01091</pub-id>, PMID: <pub-id pub-id-type="pmid">31824557</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>J.</given-names></name>
<name><surname>Zhi</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>N.</given-names></name>
<name><surname>Zhang</surname> <given-names>W.</given-names></name>
<name><surname>Meng</surname> <given-names>D.</given-names></name>
<name><surname>Batool</surname> <given-names>A.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Transcriptomic analysis reveals the contribution of QMrl-7B to wheat root growth and development</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.1062575</pub-id>, PMID: <pub-id pub-id-type="pmid">36457528</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>L&#xf3;pez-Cortegano</surname> <given-names>E.</given-names></name>
<name><surname>Caballero</surname> <given-names>A.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Inferring the nature of missing heritability in human traits using data from the GWAS catalog</article-title>. <source>Genetics</source> <volume>212</volume>, <fpage>891</fpage>&#x2013;<lpage>904</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/genetics.119.302077</pub-id>, PMID: <pub-id pub-id-type="pmid">31123044</pub-id>
</mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lynch</surname> <given-names>J. P.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Harnessing root architecture to address global challenges</article-title>. <source>Plant J.</source> <volume>109</volume>, <fpage>415</fpage>&#x2013;<lpage>431</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/tpj.15560</pub-id>, PMID: <pub-id pub-id-type="pmid">34724260</pub-id>
</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ma</surname> <given-names>L.</given-names></name>
<name><surname>Li</surname> <given-names>T.</given-names></name>
<name><surname>Hao</surname> <given-names>C.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>X.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>TaGS5-3A, a grain size gene selected during wheat improvement for larger kernel and yield</article-title>. <source>Plant Biotechnology Journal</source> <volume>14</volume>, <fpage>1269</fpage>&#x2013;<lpage>1280</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/pbi.12492</pub-id>, PMID: <pub-id pub-id-type="pmid">26480952</pub-id>
</mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Maierhofer</surname> <given-names>T.</given-names></name>
<name><surname>Lind</surname> <given-names>C.</given-names></name>
<name><surname>H&#xfc;ttl</surname> <given-names>S.</given-names></name>
<name><surname>Scherzer</surname> <given-names>S.</given-names></name>
<name><surname>Papenfu&#xdf;</surname> <given-names>M.</given-names></name>
<name><surname>Simon</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2014</year>). 
<article-title>A single-pore residue renders the Arabidopsis root anion channel SLAH2 highly nitrate selective</article-title>. <source>Plant Cell</source> <volume>26</volume>, <fpage>2554</fpage>&#x2013;<lpage>2567</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1105/tpc.114.125849</pub-id>, PMID: <pub-id pub-id-type="pmid">24938289</pub-id>
</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Malik</surname> <given-names>P.</given-names></name>
<name><surname>Kumar</surname> <given-names>J.</given-names></name>
<name><surname>Singh</surname> <given-names>S.</given-names></name>
<name><surname>Sharma</surname> <given-names>S.</given-names></name>
<name><surname>Meher</surname> <given-names>P. K.</given-names></name>
<name><surname>Sharma</surname> <given-names>M. K.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Single-trait, multi-locus and multi-trait GWAS using four different models for yield traits in bread wheat</article-title>. <source>Mol. Breed.</source> <volume>41</volume>, <fpage>46</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11032-021-01240-1</pub-id>, PMID: <pub-id pub-id-type="pmid">37309385</pub-id>
</mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mangini</surname> <given-names>G.</given-names></name>
<name><surname>Nigro</surname> <given-names>D.</given-names></name>
<name><surname>Curci</surname> <given-names>P. L.</given-names></name>
<name><surname>Simeone</surname> <given-names>R.</given-names></name>
<name><surname>Blanco</surname> <given-names>A.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Genome-wide association study identifies QTL and candidate genes for grain size and weight in a Triticum turgidum collection</article-title>. <source>Plant Genome</source> <volume>18</volume>, <elocation-id>e20562</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/tpg2.20562</pub-id>, PMID: <pub-id pub-id-type="pmid">39868635</pub-id>
</mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Manjunath</surname> <given-names>K. K.</given-names></name>
<name><surname>Krishna</surname> <given-names>H.</given-names></name>
<name><surname>Devate</surname> <given-names>N. B.</given-names></name>
<name><surname>Sunilkumar</surname> <given-names>V. P.</given-names></name>
<name><surname>Chauhan</surname> <given-names>D.</given-names></name>
<name><surname>Singh</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Mapping of the QTLs governing grain micronutrients and thousand kernel weight in wheat (Triticum aestivum L.) using high density SNP markers</article-title>. <source>Front. Nutr.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fnut.2023.1105207</pub-id>, PMID: <pub-id pub-id-type="pmid">36845058</pub-id>
</mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Meinshausen</surname> <given-names>N.</given-names></name>
<name><surname>B&#xfc;hlmann</surname> <given-names>P.</given-names></name>
</person-group> (<year>2010</year>). 
<article-title>Stability selection</article-title>. <source>J. R. Stat. Soc. Ser. B: Stat. Method.</source> <volume>72</volume>, <fpage>417</fpage>&#x2013;<lpage>473</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1467-9868.2010.00740.x</pub-id>
</mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Merrick</surname> <given-names>L. F.</given-names></name>
<name><surname>Burke</surname> <given-names>A. B.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Carter</surname> <given-names>A. H.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Comparison of single-trait and multi-trait genome-wide association models and inclusion of correlated traits in the dissection of the genetic architecture of a complex trait in a breeding program</article-title>. <source>Front. Plant Sci.</source> <volume>12</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2021.772907</pub-id>, PMID: <pub-id pub-id-type="pmid">35154175</pub-id>
</mixed-citation>
</ref>
<ref id="B65">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name><surname>Meyer</surname> <given-names>D.</given-names></name>
<name><surname>Dimitriadou</surname> <given-names>E.</given-names></name>
<name><surname>Hornik</surname> <given-names>K.</given-names></name>
<name><surname>Weingessel</surname> <given-names>A.</given-names></name>
<name><surname>Leisch</surname> <given-names>F.</given-names></name>
</person-group> (<year>2024</year>).
<article-title>e1071: misc functions of the department of statistics, probability theory group, TU Wien</article-title>. Available online at: <uri xlink:href="https://CRAN.R-project.org/package=e1071">https://CRAN.R-project.org/package=e1071</uri>.
</mixed-citation>
</ref>
<ref id="B66">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mieth</surname> <given-names>B.</given-names></name>
<name><surname>Kloft</surname> <given-names>M.</given-names></name>
<name><surname>Rodr&#xed;guez</surname> <given-names>J. A.</given-names></name>
<name><surname>Sonnenburg</surname> <given-names>S.</given-names></name>
<name><surname>Vobruba</surname> <given-names>R.</given-names></name>
<name><surname>Morcillo-Su&#xe1;rez</surname> <given-names>C.</given-names></name>
<etal/>
</person-group>. (<year>2016</year>). 
<article-title>Combining multiple hypothesis testing with machine learning increases the statistical power of genome-wide association studies</article-title>. <source>Sci. Rep.</source> <volume>6</volume>, <elocation-id>36671</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/srep36671</pub-id>, PMID: <pub-id pub-id-type="pmid">27892471</pub-id>
</mixed-citation>
</ref>
<ref id="B67">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mieth</surname> <given-names>B.</given-names></name>
<name><surname>Rozier</surname> <given-names>A.</given-names></name>
<name><surname>Rodriguez</surname> <given-names>J. A.</given-names></name>
<name><surname>H&#xf6;hne</surname> <given-names>M. M. C.</given-names></name>
<name><surname>G&#xf6;rnitz</surname> <given-names>N.</given-names></name>
<name><surname>M&#xfc;ller</surname> <given-names>K. R.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>DeepCOMBI: Explainable artificial intelligence for the analysis and discovery in genome-wide association studies</article-title>. <source>NAR Genomics Bioinf.</source> <volume>3</volume> (<issue>3</issue>), <elocation-id>lqab065</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nargab/lqab065</pub-id>, PMID: <pub-id pub-id-type="pmid">34296082</pub-id>
</mixed-citation>
</ref>
<ref id="B68">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mira</surname> <given-names>J. P.</given-names></name>
<name><surname>Arenas-M</surname> <given-names>A.</given-names></name>
<name><surname>Calderini</surname> <given-names>D. F.</given-names></name>
<name><surname>Canales</surname> <given-names>J.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Integrated transcriptome analysis identified key expansin genes associated with wheat cell wall, grain weight and yield</article-title>. <source>Plants</source> <volume>12</volume> (<issue>15</issue>), <elocation-id>2868</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/plants12152868</pub-id>, PMID: <pub-id pub-id-type="pmid">37571021</pub-id>
</mixed-citation>
</ref>
<ref id="B69">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mizuno</surname> <given-names>N.</given-names></name>
<name><surname>Ishikawa</surname> <given-names>G.</given-names></name>
<name><surname>Kojima</surname> <given-names>H.</given-names></name>
<name><surname>Tougou</surname> <given-names>M.</given-names></name>
<name><surname>Kiribuchi-Otobe</surname> <given-names>C.</given-names></name>
<name><surname>Fujita</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Genetic mechanisms determining grain number distribution along the spike and their effect on yield components in wheat</article-title>. <source>Mol. Breed.</source> <volume>41</volume>, <fpage>62</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11032-021-01255-8</pub-id>, PMID: <pub-id pub-id-type="pmid">37309314</pub-id>
</mixed-citation>
</ref>
<ref id="B70">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Money</surname> <given-names>D.</given-names></name>
<name><surname>Gardner</surname> <given-names>K.</given-names></name>
<name><surname>Migicovsky</surname> <given-names>Z.</given-names></name>
<name><surname>Schwaninger</surname> <given-names>H.</given-names></name>
<name><surname>Zhong</surname> <given-names>G. Y.</given-names></name>
<name><surname>Myles</surname> <given-names>S.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>LinkImpute: Fast and accurate genotype imputation for nonmodel organisms</article-title>. <source>G3: Genes Genomes Genet.</source> <volume>5</volume>, <fpage>2383</fpage>&#x2013;<lpage>2390</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/g3.115.021667</pub-id>, PMID: <pub-id pub-id-type="pmid">26377960</pub-id>
</mixed-citation>
</ref>
<ref id="B71">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Montesinos-L&#xf3;pez</surname> <given-names>A.</given-names></name>
<name><surname>Crespo-Herrera</surname> <given-names>L.</given-names></name>
<name><surname>Dreisigacker</surname> <given-names>S.</given-names></name>
<name><surname>Gerard</surname> <given-names>G.</given-names></name>
<name><surname>Vitale</surname> <given-names>P.</given-names></name>
<name><surname>Saint Pierre</surname> <given-names>C.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Deep learning methods improve genomic prediction of wheat breeding</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1324090</pub-id>, PMID: <pub-id pub-id-type="pmid">38504889</pub-id>
</mixed-citation>
</ref>
<ref id="B72">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pang</surname> <given-names>F.</given-names></name>
<name><surname>Niu</surname> <given-names>J.</given-names></name>
<name><surname>Solanki</surname> <given-names>M. K.</given-names></name>
<name><surname>Nosheen</surname> <given-names>S.</given-names></name>
<name><surname>Liu</surname> <given-names>Z.</given-names></name>
<name><surname>Wang</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>PHD-finger family genes in wheat (Triticum aestivum L.): Evolutionary conservatism, functional diversification, and active expression in abiotic stress</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.1016831</pub-id>, PMID: <pub-id pub-id-type="pmid">36578331</pub-id>
</mixed-citation>
</ref>
<ref id="B73">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Price</surname> <given-names>A. L.</given-names></name>
<name><surname>Patterson</surname> <given-names>N. J.</given-names></name>
<name><surname>Plenge</surname> <given-names>R. M.</given-names></name>
<name><surname>Weinblatt</surname> <given-names>M. E.</given-names></name>
<name><surname>Shadick</surname> <given-names>N. A.</given-names></name>
<name><surname>Reich</surname> <given-names>D.</given-names></name>
</person-group> (<year>2006</year>). 
<article-title>Principal components analysis corrects for stratification in genome-wide association studies</article-title>. <source>Nat. Genet.</source> <volume>38</volume>, <fpage>904</fpage>&#x2013;<lpage>909</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ng1847</pub-id>, PMID: <pub-id pub-id-type="pmid">16862161</pub-id>
</mixed-citation>
</ref>
<ref id="B74">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qin</surname> <given-names>X.</given-names></name>
<name><surname>Liu</surname> <given-names>J. H.</given-names></name>
<name><surname>Zhao</surname> <given-names>W. S.</given-names></name>
<name><surname>Chen</surname> <given-names>X. J.</given-names></name>
<name><surname>Guo</surname> <given-names>Z. J.</given-names></name>
<name><surname>Peng</surname> <given-names>Y. L.</given-names></name>
</person-group> (<year>2013</year>). 
<article-title>Gibberellin 20-oxidase gene osGA20ox3 regulates plant stature and disease development in rice</article-title>. <source>MPMI</source> <volume>26</volume> (<issue>2</issue>), <fpage>227</fpage>&#x2013;<lpage>239</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1094/MPMI</pub-id>, PMID: <pub-id pub-id-type="pmid">22992000</pub-id>
</mixed-citation>
</ref>
<ref id="B75">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rameneni</surname> <given-names>J. J.</given-names></name>
<name><surname>Dhandapani</surname> <given-names>V.</given-names></name>
<name><surname>Paul</surname> <given-names>P.</given-names></name>
<name><surname>Devaraj</surname> <given-names>S. P.</given-names></name>
<name><surname>Choi</surname> <given-names>S. R.</given-names></name>
<name><surname>Yi</surname> <given-names>S. Y.</given-names></name>
<etal/>
</person-group>. (<year>2018</year>). 
<article-title>F-box genes in Brassica rapa: genome-wide identification, structural characterization, expressional validation, and comparative analysis</article-title>. <source>Plant Mol. Biol. Rep.</source> <volume>36</volume>, <fpage>500</fpage>&#x2013;<lpage>517</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11105-018-1083-1</pub-id>
</mixed-citation>
</ref>
<ref id="B76">
<mixed-citation publication-type="book">
<person-group person-group-type="author"><collab>R Core Team</collab>
</person-group> (<year>2025</year>). <source>R: A Language and Environment for Statistical Computing</source> (<publisher-loc>Vienna, Austria</publisher-loc>: 
<publisher-name>R Foundation for Statistical Computing</publisher-name>). Available online at: <uri xlink:href="https://www.R-project.org/">https://www.R-project.org/</uri>.
</mixed-citation>
</ref>
<ref id="B77">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Saber</surname> <given-names>M. M.</given-names></name>
<name><surname>Jesse Shapiro</surname> <given-names>B.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Benchmarking bacterial genome-wide association study methods using simulated genomes and phenotypes</article-title>. <source>Microbial Genomics</source> <volume>6</volume> (<issue>3</issue>), <elocation-id>e.000337</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1099/mgen.0.000337</pub-id>, PMID: <pub-id pub-id-type="pmid">32100713</pub-id>
</mixed-citation>
</ref>
<ref id="B78">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Saini</surname> <given-names>D. K.</given-names></name>
<name><surname>Chopra</surname> <given-names>Y.</given-names></name>
<name><surname>Singh</surname> <given-names>J.</given-names></name>
<name><surname>Sandhu</surname> <given-names>K. S.</given-names></name>
<name><surname>Kumar</surname> <given-names>A.</given-names></name>
<name><surname>Bazzer</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Comprehensive evaluation of mapping complex traits in wheat using genome-wide association studies</article-title>. <source>Mol. Breed.</source> <volume>42</volume>, <fpage>1</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11032-021-01272-7</pub-id>, PMID: <pub-id pub-id-type="pmid">37309486</pub-id>
</mixed-citation>
</ref>
<ref id="B79">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sandhu</surname> <given-names>K. S.</given-names></name>
<name><surname>Burke</surname> <given-names>A. B.</given-names></name>
<name><surname>Merrick</surname> <given-names>L. F.</given-names></name>
<name><surname>Pumphrey</surname> <given-names>M. O.</given-names></name>
<name><surname>Carter</surname> <given-names>A. H.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Comparing performances of different statistical models and multiple threshold methods in a nested association mapping population of wheat</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1460353</pub-id>, PMID: <pub-id pub-id-type="pmid">39416483</pub-id>
</mixed-citation>
</ref>
<ref id="B80">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name><surname>Schneider</surname> <given-names>L.</given-names></name>
<name><surname>Richter</surname> <given-names>J.</given-names></name>
<name><surname>Becker</surname> <given-names>M.</given-names></name>
<name><surname>Lang</surname> <given-names>M.</given-names></name>
<name><surname>Bischl</surname> <given-names>B.</given-names></name>
<name><surname>Pfisterer</surname> <given-names>F.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>).
<article-title>mlr3mbo: Flexible Bayesian Optimization. R package version 0.2.9</article-title>. Available online at: <uri xlink:href="https://mlr3mbo.mlr-org.com">https://mlr3mbo.mlr-org.com</uri>.
</mixed-citation>
</ref>
<ref id="B81">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Segura</surname> <given-names>V.</given-names></name>
<name><surname>Vilhj&#xe1;lmsson</surname> <given-names>B. J.</given-names></name>
<name><surname>Platt</surname> <given-names>A.</given-names></name>
<name><surname>Korte</surname> <given-names>A.</given-names></name>
<name><surname>Seren</surname> <given-names>&#xdc;.</given-names></name>
<name><surname>Long</surname> <given-names>Q.</given-names></name>
<etal/>
</person-group>. (<year>2012</year>). 
<article-title>An efficient multi-locus mixed-model approach for genome-wide association studies in structured populations</article-title>. <source>Nat. Genet.</source> <volume>44</volume>, <fpage>825</fpage>&#x2013;<lpage>830</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ng.2314</pub-id>, PMID: <pub-id pub-id-type="pmid">22706313</pub-id>
</mixed-citation>
</ref>
<ref id="B82">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Sehgal</surname> <given-names>D.</given-names></name>
<name><surname>Dreisigacker</surname> <given-names>S.</given-names></name>
</person-group> (<year>2022</year>). <source>GWAS Case Studies in Wheat. Methods in molecular biology</source>. (
<publisher-name>Clifton, N.J.</publisher-name>), <volume>2481</volume>, <fpage>341</fpage>&#x2013;<lpage>351</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-1-0716-2237-7_19</pub-id>, PMID: <pub-id pub-id-type="pmid">35641773</pub-id>
</mixed-citation>
</ref>
<ref id="B83">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Senapati</surname> <given-names>N.</given-names></name>
<name><surname>Halford</surname> <given-names>N. G.</given-names></name>
<name><surname>Semenov</surname> <given-names>M. A.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Vulnerability of European wheat to extreme heat and drought around flowering under future climate</article-title>. <source>Environ. Res. Lett.</source> <volume>16</volume> (<issue>2</issue>), <elocation-id>024052</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1748-9326/abdcf3</pub-id>
</mixed-citation>
</ref>
<ref id="B84">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sirsat</surname> <given-names>M. S.</given-names></name>
<name><surname>Oblessuc</surname> <given-names>P. R.</given-names></name>
<name><surname>Ramiro</surname> <given-names>R. S.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Genomic prediction of wheat grain yield using machine learning</article-title>. <source>Agric. (Switzerland)</source> <volume>12</volume> (<issue>9</issue>), <fpage>1406</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture12091406</pub-id>
</mixed-citation>
</ref>
<ref id="B85">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Song</surname> <given-names>M.</given-names></name>
<name><surname>Zhang</surname> <given-names>S.</given-names></name>
<name><surname>Qiu</surname> <given-names>S.</given-names></name>
<name><surname>Qin</surname> <given-names>R.</given-names></name>
<name><surname>Zhao</surname> <given-names>C.</given-names></name>
<name><surname>Wu</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>GRE: A framework for significant SNP identification associated with wheat yield leveraging GWAS&#x2013;random forest joint feature selection and explainable machine learning genomic selection algorithm</article-title>. <source>Genes</source> <volume>16</volume>, <elocation-id>1125</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/genes16101125</pub-id>, PMID: <pub-id pub-id-type="pmid">41153342</pub-id>
</mixed-citation>
</ref>
<ref id="B86">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Spielmeyer</surname> <given-names>W.</given-names></name>
<name><surname>Ellis</surname> <given-names>M. H.</given-names></name>
<name><surname>Chandler</surname> <given-names>P. M.</given-names></name>
</person-group> (<year>2002</year>). 
<article-title>Semidwarf (sd-1), &#x201c;green revolution&#x201d; rice, contains a defective gibberellin 20-oxidase gene</article-title>. <source>PNAS</source> <volume>99</volume>, <fpage>9043</fpage>&#x2013;<lpage>9048</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1073/pnas.132266399</pub-id>, PMID: <pub-id pub-id-type="pmid">12077303</pub-id>
</mixed-citation>
</ref>
<ref id="B87">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Stephan</surname> <given-names>J.</given-names></name>
<name><surname>Stegle</surname> <given-names>O.</given-names></name>
<name><surname>Beyer</surname> <given-names>A.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>A random forest approach to capture genetic effects in the presence of population structure</article-title>. <source>Nat. Commun.</source> <volume>6</volume>, <fpage>7432</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ncomms8432</pub-id>, PMID: <pub-id pub-id-type="pmid">26109276</pub-id>
</mixed-citation>
</ref>
<ref id="B88">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sukumaran</surname> <given-names>S.</given-names></name>
<name><surname>Dreisigacker</surname> <given-names>S.</given-names></name>
<name><surname>Lopes</surname> <given-names>M.</given-names></name>
<name><surname>Chavez</surname> <given-names>P.</given-names></name>
<name><surname>Reynolds</surname> <given-names>M. P.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Genome-wide association study for grain yield and related traits in an elite spring wheat population grown in temperate irrigated environments</article-title>. <source>Theor. Appl. Genet.</source> <volume>128</volume>, <fpage>353</fpage>&#x2013;<lpage>363</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00122-014-2435-3</pub-id>, PMID: <pub-id pub-id-type="pmid">25490985</pub-id>
</mixed-citation>
</ref>
<ref id="B89">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sun</surname> <given-names>J.</given-names></name>
<name><surname>Wu</surname> <given-names>Q.</given-names></name>
<name><surname>Shen</surname> <given-names>D.</given-names></name>
<name><surname>Wen</surname> <given-names>Y.</given-names></name>
<name><surname>Liu</surname> <given-names>F.</given-names></name>
<name><surname>Gao</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>TSLRF: Two-Stage Algorithm Based on Least Angle Regression and Random Forest in genome-wide association studies</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>18034</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-019-54519-x</pub-id>, PMID: <pub-id pub-id-type="pmid">31792302</pub-id>
</mixed-citation>
</ref>
<ref id="B90">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Szymczak</surname> <given-names>S.</given-names></name>
<name><surname>Biernacka</surname> <given-names>J. M.</given-names></name>
<name><surname>Cordell</surname> <given-names>H. J.</given-names></name>
<name><surname>Gonz&#xe1;lez-Recio</surname> <given-names>O.</given-names></name>
<name><surname>K&#xf6;nig</surname> <given-names>I. R.</given-names></name>
<name><surname>Zhang</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2009</year>). 
<article-title>Machine learning in genome-wide association studies</article-title>. <source>Genet. Epidemiol.</source> <volume>33</volume>, <fpage>S51</fpage>&#x2013;<lpage>S57</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/gepi.20473</pub-id>, PMID: <pub-id pub-id-type="pmid">19924717</pub-id>
</mixed-citation>
</ref>
<ref id="B91">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tabuchi</surname> <given-names>M.</given-names></name>
<name><surname>Sugiyama</surname> <given-names>K.</given-names></name>
<name><surname>Ishiyama</surname> <given-names>K.</given-names></name>
<name><surname>Inoue</surname> <given-names>E.</given-names></name>
<name><surname>Sato</surname> <given-names>T.</given-names></name>
<name><surname>Takahashi</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2005</year>). 
<article-title>Severe reduction in growth rate and grain filling of rice mutants lacking OsGS1;1, a cytosolic glutamine synthetase1;1</article-title>. <source>Plant J.</source> <volume>42</volume>, <fpage>641</fpage>&#x2013;<lpage>651</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1365-313X.2005.02406.x</pub-id>, PMID: <pub-id pub-id-type="pmid">15918879</pub-id>
</mixed-citation>
</ref>
<ref id="B92">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tam</surname> <given-names>V.</given-names></name>
<name><surname>Patel</surname> <given-names>N.</given-names></name>
<name><surname>Turcotte</surname> <given-names>M.</given-names></name>
<name><surname>Boss&#xe9;</surname> <given-names>Y.</given-names></name>
<name><surname>Par&#xe9;</surname> <given-names>G.</given-names></name>
<name><surname>Meyre</surname> <given-names>D.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Benefits and limitations of genome-wide association studies</article-title>. <source>Nat. Rev. Genet.</source> <volume>20</volume>, <fpage>467</fpage>&#x2013;<lpage>484</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41576-019-0127-1</pub-id>, PMID: <pub-id pub-id-type="pmid">31068683</pub-id>
</mixed-citation>
</ref>
<ref id="B93">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tekeu</surname> <given-names>H.</given-names></name>
<name><surname>Jean</surname> <given-names>M.</given-names></name>
<name><surname>Ngonkeu</surname> <given-names>E. L.</given-names></name>
<name><surname>Belzile</surname> <given-names>F.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Machine Learning-GWAS reveals the role of WSD1 gene for cuticular wax ester biosynthesis and key genomic regions controlling early maturity in bread wheat</article-title>. <source>bioRxiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/2023.11.03.565125</pub-id>
</mixed-citation>
</ref>
<ref id="B94">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Van Der Knaap</surname> <given-names>E.</given-names></name>
<name><surname>Kim</surname> <given-names>J. H.</given-names></name>
<name><surname>Kende</surname> <given-names>H.</given-names></name>
</person-group> (<year>2000</year>). 
<article-title>A novel gibberellin-induced gene from rice and its potential regulatory role in stem growth</article-title>. <source>Plant Physiol.</source> <volume>122</volume>, <fpage>695</fpage>&#x2013;<lpage>704</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1104/pp.122.3.695</pub-id>, PMID: <pub-id pub-id-type="pmid">10712532</pub-id>
</mixed-citation>
</ref>
<ref id="B95">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Waldmann</surname> <given-names>P.</given-names></name>
<name><surname>M&#xe9;sz&#xe1;ros</surname> <given-names>G.</given-names></name>
<name><surname>Gredler</surname> <given-names>B.</given-names></name>
<name><surname>Fuerst</surname> <given-names>C.</given-names></name>
<name><surname>S&#xf6;lkner</surname> <given-names>J.</given-names></name>
</person-group> (<year>2013</year>). 
<article-title>Evaluation of the lasso and the elastic net in genome-wide association studies</article-title>. <source>Front. Genet.</source> <volume>4</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fgene.2013.00270</pub-id>, PMID: <pub-id pub-id-type="pmid">24363662</pub-id>
</mixed-citation>
</ref>
<ref id="B96">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Du</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Wang</surname> <given-names>K.</given-names></name>
<name><surname>Tian</surname> <given-names>C.</given-names></name>
<name><surname>Qi</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Improving bread wheat yield through modulating an unselected AP2/ERF gene</article-title>. <source>Nat. Plants</source> <volume>8</volume>, <fpage>930</fpage>&#x2013;<lpage>939</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41477-022-01197-9</pub-id>, PMID: <pub-id pub-id-type="pmid">35851621</pub-id>
</mixed-citation>
</ref>
<ref id="B97">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Q.</given-names></name>
<name><surname>Tian</surname> <given-names>F.</given-names></name>
<name><surname>Pan</surname> <given-names>Y.</given-names></name>
<name><surname>Buckler</surname> <given-names>E. S.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2014</year>). 
<article-title>A SUPER powerful method for genome wide association study</article-title>. <source>PloS One</source> <volume>9</volume> (<issue>9</issue>), <elocation-id>e107684</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0107684</pub-id>, PMID: <pub-id pub-id-type="pmid">25247812</pub-id>
</mixed-citation>
</ref>
<ref id="B98">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>GAPIT version 3: boosting power and accuracy for genomic association and prediction</article-title>. <source>Genomics Proteomics Bioinf.</source> <volume>19</volume>, <fpage>629</fpage>&#x2013;<lpage>640</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gpb.2021.08.005</pub-id>, PMID: <pub-id pub-id-type="pmid">34492338</pub-id>
</mixed-citation>
</ref>
<ref id="B99">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>Zhang</surname> <given-names>D.</given-names></name>
<name><surname>Ma</surname> <given-names>Y.</given-names></name>
<name><surname>Zhao</surname> <given-names>Y.</given-names></name>
<name><surname>Liu</surname> <given-names>P.</given-names></name>
<name><surname>Li</surname> <given-names>X.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>WheatGP, a genomic prediction method based on CNN and LSTM</article-title>. <source>Briefings Bioinf.</source> <volume>26</volume>, <elocation-id>bbaf191</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbaf191</pub-id>, PMID: <pub-id pub-id-type="pmid">40275535</pub-id>
</mixed-citation>
</ref>
<ref id="B100">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wen</surname> <given-names>Y. J.</given-names></name>
<name><surname>Zhang</surname> <given-names>H.</given-names></name>
<name><surname>Ni</surname> <given-names>Y. L.</given-names></name>
<name><surname>Huang</surname> <given-names>B.</given-names></name>
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<name><surname>Feng</surname> <given-names>J. Y.</given-names></name>
<etal/>
</person-group>. (<year>2018</year>). 
<article-title>Methodological implementation of mixed linear models in multi-locus genome-wide association studies</article-title>. <source>Briefings Bioinf.</source> <volume>19</volume>, <fpage>700</fpage>&#x2013;<lpage>712</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbw145</pub-id>, PMID: <pub-id pub-id-type="pmid">28158525</pub-id>
</mixed-citation>
</ref>
<ref id="B101">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Witte</surname> <given-names>J. S.</given-names></name>
</person-group> (<year>2010</year>). 
<article-title>Genome-wide association studies and beyond</article-title>. <source>Annu. Rev. Public Health</source> <volume>31</volume>, <fpage>9</fpage>&#x2013;<lpage>20</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1146/annurev.publhealth.012809.103723</pub-id>, PMID: <pub-id pub-id-type="pmid">20235850</pub-id>
</mixed-citation>
</ref>
<ref id="B102">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wright</surname> <given-names>M. N.</given-names></name>
<name><surname>Ziegler</surname> <given-names>A.</given-names></name>
</person-group> (<year>2017</year>). 
<article-title>ranger: A fast implementation of random forests for high dimensional data in C++ and R</article-title>. <source>J. Stat. Software</source> <volume>77</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18637/jss.v077.i01</pub-id>
</mixed-citation>
</ref>
<ref id="B103">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>D.</given-names></name>
<name><surname>Xue</surname> <given-names>M.</given-names></name>
<name><surname>Qian</surname> <given-names>J.</given-names></name>
<name><surname>He</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>S.</given-names></name>
</person-group> (<year>2014</year>). 
<article-title>Overexpression of the maize GRF10, an endogenous truncated growth-regulating factor protein, leads to reduction in leaf size and plant height</article-title>. <source>J. Integr. Plant Biol.</source> <volume>56</volume>, <fpage>1053</fpage>&#x2013;<lpage>1063</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/jipb.12220</pub-id>, PMID: <pub-id pub-id-type="pmid">24854713</pub-id>
</mixed-citation>
</ref>
<ref id="B104">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yamamuro</surname> <given-names>C.</given-names></name>
<name><surname>Ihara</surname> <given-names>Y.</given-names></name>
<name><surname>Wu</surname> <given-names>X.</given-names></name>
<name><surname>Noguchi</surname> <given-names>T.</given-names></name>
<name><surname>Fujioka</surname> <given-names>S.</given-names></name>
<name><surname>Takatsuto</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2000</year>). 
<article-title>Loss of Function of a Rice brassinosteroid insensitive1 Homolog Prevents Internode Elongation and Bending of the Lamina Joint</article-title>. <source>Plant Cell</source> <volume>12</volume>, <fpage>1591</fpage>&#x2013;<lpage>1605</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1105/tpc.12.9.1591</pub-id>, PMID: <pub-id pub-id-type="pmid">11006334</pub-id>
</mixed-citation>
</ref>
<ref id="B105">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yan</surname> <given-names>J.</given-names></name>
<name><surname>Xu</surname> <given-names>Y.</given-names></name>
<name><surname>Cheng</surname> <given-names>Q.</given-names></name>
<name><surname>Jiang</surname> <given-names>S.</given-names></name>
<name><surname>Wang</surname> <given-names>Q.</given-names></name>
<name><surname>Xiao</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>LightGBM: accelerated genomically designed crop breeding through ensemble learning</article-title>. <source>Genome Biol.</source> <volume>22</volume>, <fpage>271</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13059-021-02492-y</pub-id>, PMID: <pub-id pub-id-type="pmid">34544450</pub-id>
</mixed-citation>
</ref>
<ref id="B106">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>Z.</given-names></name>
<name><surname>Cieza</surname> <given-names>B.</given-names></name>
<name><surname>Reyes-Dumeyer</surname> <given-names>D.</given-names></name>
<name><surname>Montesinos</surname> <given-names>R.</given-names></name>
<name><surname>Soto-A&#xf1;ari</surname> <given-names>M.</given-names></name>
<name><surname>Custodio</surname> <given-names>N.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>A benchmark study on current GWAS models in admixed populations</article-title>. <source>Briefings Bioinf.</source> <volume>25</volume> (<issue>1</issue>), <elocation-id>bbad437</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbad437</pub-id>, PMID: <pub-id pub-id-type="pmid">38037235</pub-id>
</mixed-citation>
</ref>
<ref id="B107">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>J.</given-names></name>
<name><surname>Lee</surname> <given-names>S. H.</given-names></name>
<name><surname>Goddard</surname> <given-names>M. E.</given-names></name>
<name><surname>Visscher</surname> <given-names>P. M.</given-names></name>
</person-group> (<year>2011</year>a). 
<article-title>GCTA: A tool for genome-wide complex trait analysis</article-title>. <source>Am. J. Hum. Genet.</source> <volume>88</volume>, <fpage>76</fpage>&#x2013;<lpage>82</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ajhg.2010.11.011</pub-id>, PMID: <pub-id pub-id-type="pmid">21167468</pub-id>
</mixed-citation>
</ref>
<ref id="B108">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>X.</given-names></name>
<name><surname>Shan</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>a). 
<article-title>Overexpression of GmGAMYB accelerates the transition to flowering and increases plant height in soybean</article-title>. <source>Front. Plant Sci.</source> <volume>12</volume>, <elocation-id>667242</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2021.667242</pub-id>, PMID: <pub-id pub-id-type="pmid">34040624</pub-id>
</mixed-citation>
</ref>
<ref id="B109">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>J.</given-names></name>
<name><surname>Weedon</surname> <given-names>M. N.</given-names></name>
<name><surname>Purcell</surname> <given-names>S.</given-names></name>
<name><surname>Lettre</surname> <given-names>G.</given-names></name>
<name><surname>Estrada</surname> <given-names>K.</given-names></name>
<name><surname>Willer</surname> <given-names>C. J.</given-names></name>
<etal/>
</person-group>. (<year>2011</year>b). 
<article-title>Genomic inflation factors under polygenic inheritance</article-title>. <source>Eur. J. Hum. Genet.</source> <volume>19</volume>, <fpage>807</fpage>&#x2013;<lpage>812</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ejhg.2011.39</pub-id>, PMID: <pub-id pub-id-type="pmid">21407268</pub-id>
</mixed-citation>
</ref>
<ref id="B110">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>Z.</given-names></name>
<name><surname>Yan</surname> <given-names>B.</given-names></name>
<name><surname>Dong</surname> <given-names>H.</given-names></name>
<name><surname>He</surname> <given-names>G.</given-names></name>
<name><surname>Zhou</surname> <given-names>Y.</given-names></name>
<name><surname>Sun</surname> <given-names>J.</given-names></name>
</person-group> (<year>2021</year>b). 
<article-title>BIC 1 acts as a transcriptional coactivator to promote brassinosteroid signaling and plant growth</article-title>. <source>EMBO J.</source> <volume>40</volume>, <elocation-id>667242</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.15252/embj.2020104615</pub-id>, PMID: <pub-id pub-id-type="pmid">33280146</pub-id>
</mixed-citation>
</ref>
<ref id="B111">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yoosefzadeh-Najafabadi</surname> <given-names>M.</given-names></name>
<name><surname>Eskandari</surname> <given-names>M.</given-names></name>
<name><surname>Torabi</surname> <given-names>S.</given-names></name>
<name><surname>Torkamaneh</surname> <given-names>D.</given-names></name>
<name><surname>Tulpan</surname> <given-names>D.</given-names></name>
<name><surname>Rajcan</surname> <given-names>I.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Machine-learning-based genome-wide association studies for uncovering QTL underlying soybean yield and its components</article-title>. <source>Int. J. Mol. Sci.</source> <volume>23</volume> (<issue>10</issue>), <fpage>5538</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms23105538</pub-id>, PMID: <pub-id pub-id-type="pmid">35628351</pub-id>
</mixed-citation>
</ref>
<ref id="B112">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yu</surname> <given-names>J.</given-names></name>
<name><surname>Pressoir</surname> <given-names>G.</given-names></name>
<name><surname>Briggs</surname> <given-names>W. H.</given-names></name>
<name><surname>Vroh Bi</surname> <given-names>I.</given-names></name>
<name><surname>Yamasaki</surname> <given-names>M.</given-names></name>
<name><surname>Doebley</surname> <given-names>J. F.</given-names></name>
<etal/>
</person-group>. (<year>2006</year>). 
<article-title>A unified mixed-model method for association mapping that accounts for multiple levels of relatedness</article-title>. <source>Nat. Genet.</source> <volume>38</volume>, <fpage>203</fpage>&#x2013;<lpage>208</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ng1702</pub-id>, PMID: <pub-id pub-id-type="pmid">16380716</pub-id>
</mixed-citation>
</ref>
<ref id="B113">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zang</surname> <given-names>D.</given-names></name>
<name><surname>You</surname> <given-names>W.</given-names></name>
<name><surname>Wu</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>P.</given-names></name>
<name><surname>Wang</surname> <given-names>Z.</given-names></name>
<name><surname>Yang</surname> <given-names>Q.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>The Class III Peroxidase gene TaPRX-2A controls grain number per spike in common wheat (Triticum aestivum L.)</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1501029</pub-id>, PMID: <pub-id pub-id-type="pmid">39980757</pub-id>
</mixed-citation>
</ref>
<ref id="B114">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>L.</given-names></name>
<name><surname>Dong</surname> <given-names>C.</given-names></name>
<name><surname>Zhang</surname> <given-names>Q.</given-names></name>
<name><surname>Zhao</surname> <given-names>G.</given-names></name>
<name><surname>Li</surname> <given-names>F.</given-names></name>
<name><surname>Xia</surname> <given-names>C.</given-names></name>
<etal/>
</person-group>. (<year>2016</year>). 
<article-title>The wheat MYB transcription factor TaMYB18 regulates leaf rolling in rice</article-title>. <source>Biochem. Biophys. Res. Commun.</source> <volume>481</volume>, <fpage>77</fpage>&#x2013;<lpage>83</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.bbrc.2016.11.014</pub-id>, PMID: <pub-id pub-id-type="pmid">27825968</pub-id>
</mixed-citation>
</ref>
<ref id="B115">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Ersoz</surname> <given-names>E.</given-names></name>
<name><surname>Lai</surname> <given-names>C. Q.</given-names></name>
<name><surname>Todhunter</surname> <given-names>R. J.</given-names></name>
<name><surname>Tiwari</surname> <given-names>H. K.</given-names></name>
<name><surname>Gore</surname> <given-names>M. A.</given-names></name>
<etal/>
</person-group>. (<year>2010</year>). 
<article-title>Mixed linear model approach adapted for genome-wide association studies</article-title>. <source>Nat. Genet.</source> <volume>42</volume>, <fpage>355</fpage>&#x2013;<lpage>360</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ng.546</pub-id>, PMID: <pub-id pub-id-type="pmid">20208535</pub-id>
</mixed-citation>
</ref>
<ref id="B116">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Yu</surname> <given-names>C.</given-names></name>
<name><surname>Lin</surname> <given-names>J.</given-names></name>
<name><surname>Liu</surname> <given-names>J.</given-names></name>
<name><surname>Liu</surname> <given-names>B.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2017</year>). 
<article-title>OsMPH1 regulates plant height and improves grain yield in rice</article-title>. <source>PLoS One</source> <volume>12</volume> (<issue>7</issue>), <elocation-id>e0180825</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0180825</pub-id>, PMID: <pub-id pub-id-type="pmid">28708834</pub-id>
</mixed-citation>
</ref>
<ref id="B117">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhao</surname> <given-names>H.</given-names></name>
<name><surname>Liu</surname> <given-names>L.</given-names></name>
<name><surname>Mo</surname> <given-names>H.</given-names></name>
<name><surname>Qian</surname> <given-names>L.</given-names></name>
<name><surname>Cao</surname> <given-names>Y.</given-names></name>
<name><surname>Cui</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2013</year>). 
<article-title>The ATP-binding cassette transporter ABCB19 regulates postembryonic organ separation in Arabidopsis</article-title>. <source>PLoS One</source> <volume>8</volume> (<issue>4</issue>), <elocation-id>e60809</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0060809</pub-id>, PMID: <pub-id pub-id-type="pmid">23560110</pub-id>
</mixed-citation>
</ref>
<ref id="B118">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>X.</given-names></name>
<name><surname>Stephens</surname> <given-names>M.</given-names></name>
</person-group> (<year>2012</year>). 
<article-title>Genome-wide efficient mixed-model analysis for association studies</article-title>. <source>Nat. Genet.</source> <volume>44</volume>, <fpage>821</fpage>&#x2013;<lpage>824</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ng.2310</pub-id>, PMID: <pub-id pub-id-type="pmid">22706312</pub-id>
</mixed-citation>
</ref>
</ref-list><glossary>
<title>Glossary</title><def-list><def-item><term>BLINK</term><def>
<p>Bayesian information and linkage disequilibrium iteratively nested keyway</p></def></def-item><def-item><term>CMLM</term><def>
<p>Compressed mixed linear model</p></def></def-item><def-item><term>EN</term><def>
<p>Elastic net</p></def></def-item><def-item><term>FarmCPU</term><def>
<p>Fixed and random model circulating probability unification</p></def></def-item><def-item><term>FDR</term><def>
<p>False discovery rate</p></def></def-item><def-item><term>GLM</term><def>
<p>Generalized linear model</p></def></def-item><def-item><term>GLMnet</term><def>
<p>Generalized linear model with elastic net regularization</p></def></def-item><def-item><term>GWAS</term><def>
<p>Genome-wide association study</p></def></def-item><def-item><term>LARS</term><def>
<p>Least angle regression</p></def></def-item><def-item><term>LD</term><def>
<p>Linkage disequilibrium</p></def></def-item><def-item><term>MAS</term><def>
<p>Marker-assisted selection</p></def></def-item><def-item><term>ML</term><def>
<p>Machine learning</p></def></def-item><def-item><term>MLM</term><def>
<p>Mixed linear model</p></def></def-item><def-item><term>MLMM</term><def>
<p>Multi-locus mixed linear model</p></def></def-item><def-item><term>MTA</term><def>
<p>Marker-trait association</p></def></def-item><def-item><term>PC</term><def>
<p>Principal component</p></def></def-item><def-item><term>PH</term><def>
<p>Plant height</p></def></def-item><def-item><term>QTL</term><def>
<p>Quantitative trait locus/loci</p></def></def-item><def-item><term>QTN</term><def>
<p>Quantitative trait nucleotide</p></def></def-item><def-item><term>RF</term><def>
<p>Random Forest</p></def></def-item><def-item><term>RMSE</term><def>
<p>Root mean square error</p></def></def-item><def-item><term>SUPER</term><def>
<p>Settlement of MLM under progressively exclusive relationship</p></def></def-item><def-item><term>SVM</term><def>
<p>Support vector machine</p></def></def-item><def-item><term>TKW</term><def>
<p>Thousand kernel weight</p></def></def-item><def-item><term>TSLRF</term><def>
<p>Two Stage algorithm based on Least angle regression and Random Forest</p></def></def-item><def-item><term>XGB</term><def>
<p>Extreme gradient boosting</p></def></def-item></def-list></glossary>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3092857">Chao Shen</ext-link>, Guangdong University of Petrochemical Technology, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1536612">Huiyuan Xu</ext-link>, Shandong Agricultural University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2090523">Honor&#xe9; Tekeu</ext-link>, Institut de Recherche et de D&#xe9;veloppement en Agroenvironnement, Canada</p></fn>
</fn-group>
</back>
</article>