<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2025.1664012</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Increased genomic predictive ability in mango using GWAS-preselected variants and fixed-effect SNPs</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Munyengwa</surname>
<given-names>Norman</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3126785/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wilkinson</surname>
<given-names>Melanie J.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ortiz-Barrientos</surname>
<given-names>Daniel</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dillon</surname>
<given-names>Natalie L.</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/413433/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Webb</surname>
<given-names>Matthew</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3184813/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ali</surname>
<given-names>Asjad</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1208344/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bally</surname>
<given-names>Ian S. E.</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2190820/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Myburg</surname>
<given-names>Alexander A.</given-names>
</name>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/36975/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hardner</surname>
<given-names>Craig M.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/654471/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Queensland Alliance for Agriculture and Food Innovation, The University of Queensland</institution>, <addr-line>Brisbane, QLD</addr-line>,&#xa0;<country>Australia</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>School of the Environment, The University of Queensland</institution>, <addr-line>Brisbane, QLD</addr-line>,&#xa0;<country>Australia</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Australian Research Council Centre of Excellence for Plant Success in Nature and Agriculture, The University of Queensland</institution>, <addr-line>Brisbane, QLD</addr-line>,&#xa0;<country>Australia</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Australian Research Council Training Centre in Predictive Breeding for Agricultural Futures, The University of Queensland</institution>, <addr-line>Brisbane, QLD</addr-line>,&#xa0;<country>Australia</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Queensland Department of Primary Industries</institution>, <addr-line>Mareeba, QLD</addr-line>,&#xa0;<country>Australia</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Queensland Department of Primary Industries</institution>, <addr-line>Brisbane, QLD</addr-line>,&#xa0;<country>Australia</country>
</aff>
<aff id="aff7">
<sup>7</sup>
<institution>Department of Genetics, Stellenbosch University</institution>, <addr-line>Stellenbosch</addr-line>,&#xa0;<country>South Africa</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/355619/overview">Komivi Dossa</ext-link>, UMR AGAP CIRAD, France</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/756420/overview">Jiban Shrestha</ext-link>, Nepal Agricultural Research Council, Nepal</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3153178/overview">Zhixu Pang</ext-link>, Shanxi Agriculture University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Norman Munyengwa, <email xlink:href="mailto:n.munyengwa@uq.edu.au">n.munyengwa@uq.edu.au</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>29</day>
<month>10</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1664012</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>09</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Munyengwa, Wilkinson, Ortiz-Barrientos, Dillon, Webb, Ali, Bally, Myburg and Hardner.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Munyengwa, Wilkinson, Ortiz-Barrientos, Dillon, Webb, Ali, Bally, Myburg and Hardner</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Genomic selection (GS) using whole-genome sequencing (WGS) data has potential to improve breeding value accuracy in fruit trees, but previous studies have reported limited gains compared to high-density marker sets. Incorporating preselected variants identified through genome-wide association studies (GWAS) is a promising strategy to enhance the predictive power of WGS data. We investigated whether incorporating GWAS-preselected variants and fixed-effect markers into genomic best linear unbiased prediction (GBLUP) models improves predictive ability for fruit blush color (FBC), average fruit weight (AFW), fruit firmness (FF), and trunk circumference (TC) in mango (<italic>Mangifera indica</italic> L.). The study used 225 gene pool accessions from the Queensland Department of Primary Industries in Australia, with phenotypes collected between 1999 and 2024. Predictive ability was assessed using models that ignored or accounted for population structure using fixed principal components. Accounting for population structure led to substantial reduction in predictive ability across all traits, suggesting that initially high predictive abilities may have been partly driven by genetic differences between subpopulations. GWAS-preselected variants improved predictive abilities compared to using all WGS data, especially when population structure was accounted for in both parental and 5-fold cross-validation. Gains under parental validation reached 0.28 for AFW (from 0.30 to 0.58) and 0.06 for FBC (from 0.44 to 0.50). In 5-fold cross validation, gains were up to 0.16 for AFW (from 0.32 to 0.48) and 0.10 for FBC (from 0.35 to 0.45). This suggests that prioritizing markers that better capture relationships at causal loci can improve predictive ability. Fixed-effect SNPs improved predictive ability of WGS data, particularly for FBC, with increases of up to 0.18 (from 0.44 to 0.62). The combination of GWAS-preselected variants and fixed-effect markers yielded the highest improvements in predictive ability for FBC and TC. GWAS identified 5 trait-associated SNPs for FBC, 11 for AFW, and 8 for TC. These results demonstrate that leveraging GWAS-preselected variants and fixed-effect SNPs improves predictive ability, potentially enhancing breeding efficiency in fruit trees.</p>
</abstract>
<kwd-group>
<kwd>genomic prediction</kwd>
<kwd>mango</kwd>
<kwd>GWAS-preselected variants</kwd>
<kwd>genome-wide association studies</kwd>
<kwd>whole-genome sequencing</kwd>
<kwd>prediction accuracy</kwd>
<kwd>population structure</kwd>
</kwd-group>
<counts>
<fig-count count="4"/>
<table-count count="4"/>
<equation-count count="1"/>
<ref-count count="103"/>
<page-count count="21"/>
<word-count count="13300"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Plant Breeding</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>    <p>Mango (<italic>Mangifera indica</italic> L.), the world&#x2019;s fifth most produced fruit crop, holds major economic value due to its global consumption and diverse applications (<xref ref-type="bibr" rid="B76">Srivastav et&#xa0;al., 2023</xref>). While global production exceeds 50 million tons, Australia contributes less than 0.2%, with an estimated 61,474 tons produced annually, 89% of which is consumed domestically (<xref ref-type="bibr" rid="B6">Bally and De Faveri, 2021</xref>; <xref ref-type="bibr" rid="B5">Bally et&#xa0;al., 2021</xref>). Genetic improvement of mango is essential to enhance productivity and to meet evolving market demands. Key breeding goals include dwarf or semi-dwarf tree architecture suitable for high-density orchards (<xref ref-type="bibr" rid="B51">Mahmud et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B70">Reddy et&#xa0;al., 2003</xref>), attractive skin color, and market-specific fruit weight (<xref ref-type="bibr" rid="B8">Bally et&#xa0;al., 2009</xref>). Genetic gain in conventional mango breeding is primarily constrained by lengthy breeding cycles exceeding 20 years, with juvenility alone accounting for nearly half of this duration (<xref ref-type="bibr" rid="B7">Bally and Dillon, 2018</xref>). New breeding approaches that can reduce the breeding cycle length are greatly needed to accelerate genetic gains in mango breeding programs.</p>
<p>Genomic selection (GS) has great potential to shorten breeding cycles in horticultural fruit trees by predicting genetic values (breeding or clonal) of unphenotyped individuals at the juvenile stage using statistical models trained on a training set with both genotypic and phenotypic data (<xref ref-type="bibr" rid="B54">Meuwissen et&#xa0;al., 2001</xref>). Proof of concept studies in apple (<xref ref-type="bibr" rid="B60">Muranty et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B72">Roth et&#xa0;al., 2020</xref>), macadamia (<xref ref-type="bibr" rid="B63">O&#x2019;Connor et&#xa0;al., 2021</xref>), and eucalyptus (<xref ref-type="bibr" rid="B78">Suontama et&#xa0;al., 2019</xref>) have demonstrated that GS can accelerate genetic gain per unit of time compared to conventional breeding by shortening the cycle length, primarily through skipping progeny testing. However, in oil palm, GS did not yield sufficient prediction accuracy for some key traits to justify skipping progeny testing (<xref ref-type="bibr" rid="B23">Cros et&#xa0;al., 2017</xref>), underscoring the importance of accurate genetic value prediction for effectively implementing GS in tree crops.</p>
<p>The genomic best linear unbiased prediction (GBLUP) model (<xref ref-type="bibr" rid="B82">VanRaden, 2008</xref>) is one of the most widely used approach for genomic prediction due to its flexibility and computational efficiency (<xref ref-type="bibr" rid="B9">Barreto et&#xa0;al., 2024</xref>). The GBLUP model estimates breeding values of selection candidates using a genomic relationship matrix (GRM), which aims to capture relationships among individuals at quantitative trait loci (QTLs). However, it assumes that all markers contribute equally to genetic variance (<xref ref-type="bibr" rid="B53">Meuwissen and Goddard, 2010</xref>), a limitation when a few major loci account for a substantial portion of trait variation. This can lead to underestimation of the contribution of major loci to genetic variation, and consequently, reduced genetic gain from GS (<xref ref-type="bibr" rid="B11">Bernardo, 2014</xref>). To address this, several studies have incorporated key trait-associated markers as fixed or random effects in GBLUP models, resulting in improved prediction accuracy (<xref ref-type="bibr" rid="B11">Bernardo, 2014</xref>; <xref ref-type="bibr" rid="B19">Chen et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B33">Hardner et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B43">Kostick et&#xa0;al., 2023</xref>).</p>
<p>Whole-genome sequencing (WGS) data has been proposed to improve the accuracy of genomic prediction by capturing QTL variants directly rather than relying on the linkage disequilibrium (LD) between markers and unobserved QTLs (<xref ref-type="bibr" rid="B55">Meuwissen et&#xa0;al., 2016</xref>). However, prior research has demonstrated that to enhance genomic prediction accuracy with WGS data, predictions should utilize preselected variants based on their association with target traits, such as those identified through genome-wide association studies (GWAS) (<xref ref-type="bibr" rid="B50">Liu et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B68">Raymond et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B90">Warburton et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B91">Wei et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B98">Ye et&#xa0;al., 2020</xref>). This is because not all markers in WGS data are causative or in strong LD with causative mutations for the target trait (<xref ref-type="bibr" rid="B81">van Binsbergen et&#xa0;al., 2015</xref>); instead, many may introduce noise into the prediction model, ultimately reducing prediction accuracy (<xref ref-type="bibr" rid="B68">Raymond et&#xa0;al., 2018</xref>). GWAS-preselected variants from WGS data may enhance prediction accuracy in GBLUP models by enabling the construction of trait-specific GRMs that prioritize causative mutations or markers in LD with them, thereby better capturing genetic relationships at causal loci. Although GWAS-preselected variants from WGS data have shown improved prediction accuracy in livestock (<xref ref-type="bibr" rid="B38">Jang et&#xa0;al., 2023a</xref>; <xref ref-type="bibr" rid="B68">Raymond et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B84">Veerkamp et&#xa0;al., 2016</xref>), this approach remains largely unexplored in fruit trees, including mango.</p>
<p>Genome-wide association studies (GWAS) remain the most widely used approach for identifying trait-associated single nucleotide polymorphisms (SNPs) and prioritizing markers for genomic prediction based on their potential causal effects. However, most studies employed single-locus GWAS (SL-GWAS) models, which test markers individually and have limited detection power for polygenic traits (<xref ref-type="bibr" rid="B86">Wang et&#xa0;al., 2016</xref>). The ability to detect causal variants is further influenced by factors such as effective population size (<italic>Ne</italic>), LD structure, GWAS sample size, and the statistical model used (<xref ref-type="bibr" rid="B39">Jang et&#xa0;al., 2023b</xref>). For instance, detection power is enhanced and sample size requirements are reduced for GWAS in populations with high <italic>Ne</italic> and low LD (<xref ref-type="bibr" rid="B58">Misztal et&#xa0;al., 2021</xref>), whereas small <italic>Ne</italic> increases long-range LD and noise, reducing detection power. To date, <italic>Ne</italic> has not been estimated in mango. In addition, most genomic prediction studies using GWAS-preselected variants have relied on a single GWAS methodology for variant discovery, limiting comparison across models. This represents a key research gap. To address this, we evaluate genomic prediction performance using GWAS-preselected variants identified from three multi-locus GWAS methods: Bayesian-information and Linkage-disequilibrium Iteratively Nested Keyway (BLINK) (<xref ref-type="bibr" rid="B36">Huang et&#xa0;al., 2019</xref>), the Fixed and random model Circulating Probability Unification (FarmCPU) (<xref ref-type="bibr" rid="B49">Liu et&#xa0;al., 2016</xref>) and the Multi-loci Mixed Linear Model (MLMM) (<xref ref-type="bibr" rid="B74">Segura et&#xa0;al., 2012</xref>). We also compare these with a single-locus approach, the general linear mixed model (GLMM).</p>
<p>A key challenge in genomic prediction is population structure, defined as the presence of genetically distinct subgroups with divergent allele frequencies (<xref ref-type="bibr" rid="B37">Jacquin et&#xa0;al., 2025</xref>). If unaccounted for, population structure can bias genomic estimated breeding values (GEBVs) and inflate estimates of selection accuracy (<xref ref-type="bibr" rid="B71">Riedelsheimer et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B92">Werner et&#xa0;al., 2020</xref>). Addressing population structure is especially critical in perennial tree crops, where training populations often represent broad genetic diversity to minimize phenotyping demands across populations or generations, given the long breeding cycles and extended juvenile phases (<xref ref-type="bibr" rid="B12">Brault et&#xa0;al., 2022</xref>). Despite its potential to confound predictions, population structure is frequently overlooked, especially when perceived to be weak. A common strategy used to account for population structure is to include principal components (PCs) derived from principal component analysis (PCA) of the GRM as fixed-effect covariates in prediction models (<xref ref-type="bibr" rid="B34">Hayatgheibi et&#xa0;al., 2024</xref>).</p>
<p>To the best of our knowledge, there are currently no published reports of genomic prediction in mango, and the use of GWAS-preselected variants from WGS data remains largely unexplored in tree crops. This represents a significant gap in the application of GS in mango and other fruit trees. To address this, we aimed to develop and evaluate strategies for improving genomic predictive ability for key traits in mango using WGS data. Specifically, we: (i) assessed the power of GWAS using multi-locus and single-locus models, (ii) evaluated the impact of increasing marker density to WGS level on predictive ability, (iii) evaluated whether predictive ability could be increased by using GWAS preselected variants, (iv) assessed the impact of incorporating significant GWAS loci as fixed effects in GBLUP models on predictive ability, and (v) investigated the impact of population structure on predictive ability. Together, these analyses inform strategies for optimizing genomic selection in mango.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Germplasm and trial design</title>
<p>This study used 225 mango (<italic>Mangifera indica</italic> L.) accessions from the gene-pool collection of the Queensland Mango Breeding Program (QMBP), maintained by the Queensland Department of Primary Industries (DPI) in Australia. This collection comprises historical cultivars from 24 countries and progenies from advanced selections, capturing a broad spectrum of <italic>Mangifera indica&#x2019;s</italic> genetic diversity (<xref ref-type="bibr" rid="B94">Wilkinson et&#xa0;al., 2025</xref>). The accessions exhibit strong population structure, divided into two primary sub-populations: 33 individuals of Southeast Asian origin and 192 of Indian ancestry (<xref ref-type="bibr" rid="B95">Wilkinson et&#xa0;al., 2022</xref>). Among the 225 gene-pool accessions, 41 are used as parents for the QMBP breeding population (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>). None of these parental accessions originated from Southeast Asia. The trees were grown at the Walkamin Research Station (WRS) and assessments of fruit quality traits and tree growth were conducted from 1999 to 2024.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Phenotypic data</title>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Trunk circumference</title>
<p>Trunk circumference (TC), an indicator of tree vigor, was measured using a tape measure positioned 10 cm above the graft union. Due to differences in planting times, the trees were assessed at different ages, resulting in unbalanced data. We used TC data for trees assessed at the ages of 9 (TC9, 200 unique accessions) and 12 (TC12, 199 unique accessions) years (total of 207 unique accessions) due to the availability of a relatively large number of individuals assessed in these years.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Fruit quality traits</title>
<p>Physiologically mature fruits were harvested from the outer tree canopy, where they were exposed to sunlight. The fruits were washed thoroughly with a detergent, treated with a fungicide dip (1.0 ml L-1 Fludioxonil (230g/L)) for five minutes at 52 &#xb0;C to control anthracnose. They were then stored in a ripening room maintained at 22&#xb0;C until they developed a soft texture. Fruit blush color (FBC) was assessed in 220 accessions over at least two seasons, using ten ripe fruits from each accession. FBC of the ripened fruit was rated on a categorical scale, in order from least to most desirable: no blush, orange, pink, pink-red, red, and burgundy. FBC categorical data was converted to a numerical scale as: no blush or yellow = 0, orange = 1, pink = 2, pink-red = 3, red = 4, and burgundy = 5.</p>
<p>The average fruit weight (AFW) in grams (g) was calculated across 222 accessions using the weight of ten fruits at the eating ripeness stage. Fruit firmness (FF) was measured in 221 mango accessions using an analogue firmness meter. Not all accessions were assessed for the three fruit quality traits in every season due to the irregular bearing of some cultivars and differences in planting seasons, resulting in unbalanced data.</p>
</sec>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Molecular data</title>
<p>Genomic DNA extraction, whole genome sequencing and variant calling followed the protocols outlined by <xref ref-type="bibr" rid="B94">Wilkinson et&#xa0;al. (2025)</xref>, using the same set of 225 mango gene-pool accessions utilized in this study. Briefly, genomic DNA was extracted from young mango leaf tissues using the modified cetyltrimethylammonium bromide (CTAB) method. Whole genome sequencing (WGS) was performed on all 225 accessions, with the 41 parental accessions sequenced at 40X coverage and the remaining 184 individuals at a depth of 15X. Joint SNP calling was performed using GATK4 software (<xref ref-type="bibr" rid="B65">Poplin et&#xa0;al., 2018</xref>), and trimmed paired-end reads were aligned to the <italic>M. indica</italic> &#x2018;Alphonso&#x2019; reference genome (<xref ref-type="bibr" rid="B87">Wang et&#xa0;al., 2020</xref>) to identify physical position. This resulted in a total of 44,125,383 SNPs.</p>
<p>To generate a high-quality SNP dataset, a series of quality filtering steps using VCFtools (<xref ref-type="bibr" rid="B25">Danecek et&#xa0;al., 2011</xref>) were applied. Data points with a read depth below five were set to missing, and SNPs exhibiting more than 20% missing data across the population were discarded. To ensure the inclusion of only the most reliable variants, we imposed a maximum mean read depth of 50, removed SNPs with a minor allele frequency (MAF) below 0.05, and applied a Hardy-Weinberg equilibrium <italic>p</italic>-value cut-off of 1e-6 to eliminate potential genotyping errors. Following these stringent quality control measures, 10,172,985 SNPs remained for downstream analyses. Missing markers in the final dataset were imputed using the Hidden Markov Model (HMM) implemented in Beagle 5.4 (<xref ref-type="bibr" rid="B13">Browning et&#xa0;al., 2018</xref>).</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Estimation of effective population size (<inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and linkage disequilibrium</title>
<p>To assess genetic diversity within the QMBP gene-pool collection, we estimated recent historical <italic>N<sub>e</sub>
</italic> for the 225 accessions based on LD between pairs of markers, as implemented in GONE software (<xref ref-type="bibr" rid="B73">Santiago et&#xa0;al., 2020</xref>). This method estimates the <italic>N<sub>e</sub>
</italic> from the variance of progeny number, which is equal to the number of breeding individuals (<italic>N</italic>). To minimize downward bias in <italic>N<sub>e</sub>
</italic> estimates due to elevated LD (<xref ref-type="bibr" rid="B89">Waples et&#xa0;al., 2016</xref>), we used 815,255 independent SNPs derived by pruning the initial set of ~10 million SNPs. Pruning was performed in PLINK 2.0 (<xref ref-type="bibr" rid="B66">Purcell et&#xa0;al., 2007</xref>) by removing one SNP from each pair with a squared correlation coefficient (<italic>r</italic>&#xb2;) &gt; 0.2 within a 35-SNP sliding window. Additionally, <italic>N<sub>e</sub>
</italic> was estimated for each of the two sub-populations defined by <xref ref-type="bibr" rid="B95">Wilkinson et&#xa0;al. (2022)</xref>, as population structure can bias <italic>N<sub>e</sub>
</italic> estimates (<xref ref-type="bibr" rid="B73">Santiago et&#xa0;al., 2020</xref>). Furthermore, <italic>N<sub>e</sub>
</italic> was estimated for the parental accessions in the QMBP to evaluate whether sufficient genetic diversity exists to sustain long-term genetic gains within the breeding program. Analyses were conducted using default GONE software parameters.</p>
<p>To evaluate LD decay with physical distance among the 225 gene-pool accessions, pairwise estimates of LD were calculated using the squared correlation of allele frequencies (<italic>r&#xb2;</italic>) for all SNP pairs within 1 Mbp windows across the entire set of 10,172,985 SNPs. The distance at which <italic>r&#xb2;</italic> decayed to 0.2, commonly regarded as the minimum threshold for high genomic prediction accuracy (<xref ref-type="bibr" rid="B16">Calus et&#xa0;al., 2008</xref>), was determined separately for each chromosome using PopLDdecay (<xref ref-type="bibr" rid="B100">Zhang et&#xa0;al., 2019</xref>).</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>GBLUP model implementation and parameter estimation</title>
<p>Linear mixed models were used to fit residual maximum likelihood (REML) as implemented in the R package ASReml-R 4 (<xref ref-type="bibr" rid="B14">Butler et&#xa0;al., 2023</xref>), within a GBLUP framework to estimate model parameters and predict random and fixed effects for all traits. When a GRM was ill-conditioned (i.e. not positive-definite), bending was applied to allow for matrix inversion, as implemented in the ASRgenomics R package (<xref ref-type="bibr" rid="B61">Nazarian and Gezan, 2016</xref>). The linear mixed model used to predict the genomic estimated breeding values (GEBVs) of mango individuals is given in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>y</mml:mi>
</mml:mstyle>
<mml:mo>=</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>X</mml:mi>
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>Z</mml:mi>
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im2">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>y</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> was the vector of phenotypic measurements, <inline-formula>
<mml:math display="inline" id="im3">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>X</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> was the design matrix relating phenotypic records to the vector of fixed effects (the intercept for all traits, age of tree at assessment for trunk circumference, significant markers for models that included these as fixed effects, and the first six principal components for models that accounted for population structure) denoted by <inline-formula>
<mml:math display="inline" id="im4">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im5">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>Z</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> was the design matrix linking phenotypic records to the additive genomic effects of the mango accessions, <inline-formula>
<mml:math display="inline" id="im6">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> was the vector of additive genomic effects, and <inline-formula>
<mml:math display="inline" id="im7">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> represented the random residual effects. We assumed the following distributions for the four traits: <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>a</mml:mi>
</mml:mstyle>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>~</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mrow>
<mml:mtext>a</mml:mtext>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>G</mml:mi>
</mml:mstyle>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>~</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>I</mml:mi>
</mml:mstyle>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mrow>
  <mml:mtext>e</mml:mtext>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math display="inline" id="im10">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>G</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> was an <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> symmetric and positive-definite additive GRM which described the additive genomic relationships among all pairs of individuals in both the training and validation sets. The additive genomic variance explained by the set of SNPs in each analysis was denoted by <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mrow>
<mml:mtext>a</mml:mtext>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. The residual variance was denoted by <inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mrow>
<mml:mtext>e</mml:mtext>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math display="inline" id="im14">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>I</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> was an <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> identity matrix. For trunk circumference, <inline-formula>
<mml:math display="inline" id="im16">
<mml:mrow>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mrow>
<mml:mtext>a</mml:mtext>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> was replaced by the additive genomic-by-age-at-assessment covariance matrix, <inline-formula>
<mml:math display="inline" id="im17">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and the <inline-formula>
<mml:math display="inline" id="im18">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> variance-covariance matrix of residual effects were modelled using a CORGH variance structure, assuming correlated heterogeneous variances among observations across the two ages of assessment (age 9 and 12). In this case, <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mrow>
<mml:mtext>e</mml:mtext>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> was replaced with the residual variance-covariance matrix capturing both the heterogeneous residual variances and the residual correlation between ages. The additive genomic relationship matrix (<bold>G</bold>) for each marker set was estimated using the method described by <xref ref-type="bibr" rid="B97">Yang et&#xa0;al. (2010)</xref>. Individual narrow-sense heritability (<inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) for each specific trait was estimated as <inline-formula>
<mml:math display="inline" id="im21">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo stretchy="false">/</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The Akaike Information Criteria (AIC) was used to assess the quality of model fit.</p>
<sec id="s2_5_1">
<label>2.5.1</label>
<title>Model validation</title>
<p>Two approaches were used to validate genomic prediction models in this study. In the first cross-validation approach (parental validation), own phenotypes of the 41 gene-pool accessions that are being used as parents in the QMBP served as an independent dataset for model validation, while the remaining gene-pool accessions served as the training population. Predictive ability was estimated as the Pearson correlation between the phenotypes predicted by the linear mixed models (GEBVs) and the observed phenotypes of parental accessions, <inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
<mml:mo stretchy="true">^</mml:mo>
</mml:mover>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>To provide a more robust evaluation of model performance, a second validation approach involving random 5-fold cross-validation (5-fold CV) was also implemented. In this approach, the entire gene pool collection was randomly partitioned into five subsets in which each subset consisted of 20% of the accessions. For each fold, four subsets (80% of total individuals) were used for model training and the remaining fold (20% of the accessions) for model validation. Predictive ability was calculated as the Pearson correlation between the GEBVs and the observed phenotypes after each 5-fold CV run. To ensure stability and reliability of the predictive ability estimates, the 5-fold CV procedure was repeated five times. Thus, 25 correlation values were calculated for each model. For trunk circumference, only phenotypic data collected from trees aged 12 years were used for validation. The bias of predictions was calculated as the regression of phenotypes on GEBVs for individuals in the validation set.</p>
</sec>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Linkage disequilibrium pruning of WGS data</title>
<p>To evaluate whether increasing marker density to WGS level enhances genomic predictive ability, we performed GP using the full set of available WGS markers (~10 million SNPs) and lower-density marker sets (~2 million, ~800k, ~80k, ~20k, and ~10k SNPs). These reduced marker sets were generated by pruning correlated markers based on LD thresholds. The LD pruning thresholds were chosen arbitrarily to generate a range of marker densities. LD pruning was performed using PLINK 2.0 (<xref ref-type="bibr" rid="B66">Purcell et&#xa0;al., 2007</xref>) to remove one SNP from each pair if their squared correlation (<italic>r&#xb2;</italic>) exceeds a user-defined threshold within a specified window. For example, the ~2 million SNP dataset (LD_2mil) was created by pruning one of each pair of SNPs if their <italic>r<sup>2</sup>
</italic> value exceeded 0.2 within a window size of 15 SNPs, shifting the window 10 SNPs forward and repeating the procedure. More stringent LD thresholds were applied to derive lower-density marker sets, as detailed in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. The final datasets - LD_2mil (~2 million SNPs), LD_800k (~800k SNPs), LD_80k (~80k SNPs), LD_20k (~20k SNPs), and LD_10k (~10k SNPs) were used to assess the impact of marker density on predictive ability.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Description of marker sets including whole-genome sequencing (WGS) data and LD-pruned markers.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Scenario</th>
<th valign="middle" align="left">
<italic>R</italic>
<sup>2</sup>
</th>
<th valign="middle" align="left">Window size</th>
<th valign="middle" align="left">Number of SNPs</th>
<th valign="middle" align="left">
<italic>R</italic>
<sup>2</sup> between adjacent SNPs</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">WGS</td>
<td valign="middle" align="center">NA</td>
<td valign="middle" align="center">NA</td>
<td valign="middle" align="center">10, 172, 985</td>
<td valign="middle" align="center">0.33</td>
</tr>
<tr>
<td valign="middle" align="left">LD_2mil</td>
<td valign="middle" align="center">0.2</td>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">2,016,911</td>
<td valign="middle" align="center">0.11</td>
</tr>
<tr>
<td valign="middle" align="left">LD_800k</td>
<td valign="middle" align="center">0.2</td>
<td valign="middle" align="center">35</td>
<td valign="middle" align="center">815,255</td>
<td valign="middle" align="center">0.08</td>
</tr>
<tr>
<td valign="middle" align="left">LD_80k</td>
<td valign="middle" align="center">0.2</td>
<td valign="middle" align="center">1,500</td>
<td valign="middle" align="center">82,504</td>
<td valign="middle" align="center">0.03</td>
</tr>
<tr>
<td valign="middle" align="left">LD_20k</td>
<td valign="middle" align="center">0.1</td>
<td valign="middle" align="center">8,000</td>
<td valign="middle" align="center">20,523</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="left">LD_10k</td>
<td valign="middle" align="center">0.1</td>
<td valign="middle" align="center">100,000</td>
<td valign="middle" align="center">10,068</td>
<td valign="middle" align="center">0.01</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>PLINK 2.0 was used to prune one of each pair of correlated SNPs at an arbitrarily chosen LD threshold using WGS data. For example, the LD_2mil scenario was created by pruning one of each pair of SNPs if their r<sup>2</sup> value exceeded 0.2 within a window size of 15 SNPs, shifting the window 10 SNPs forward and repeating the procedure again.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Accounting for population structure</title>
<p>To evaluate the impact of population structure on predictive ability, the top six principal components (PCs) derived from principal component analysis (PCA) of the GRM were included as fixed effects in GBLUP models. Since LD can affect PCA analysis (<xref ref-type="bibr" rid="B17">Campoy et&#xa0;al., 2016</xref>), we conducted PCA using a GRM constructed using a set of ~80k (LD_80k) unlinked markers derived from LD pruning of the ~10 million WGS markers. We selected the top six PCs to represent population structure based on their relative contributions to global molecular variance. Individually, these PCs accounted for between 2.5% and 10% of the molecular variance, and together they explained 33% of the total variation in the mango gene pool collection. The predictive ability of models that included fixed PCs was compared to that for models that did not include this adjustment.</p>
</sec>
<sec id="s2_8">
<label>2.8</label>
<title>Genome-wide association study</title>
<p>We performed GWAS using the LD_2mil marker set to identify trait-associated markers and establish an association-based criterion for preselecting SNPs from WGS data for use in genomic prediction. Although GWAS for the same traits and phenotypic data was conducted in the original study by <xref ref-type="bibr" rid="B94">Wilkinson et&#xa0;al. (2025)</xref>, our reanalysis aimed to enhance statistical power by leveraging a denser marker set and multi-locus GWAS methods. In this study, we evaluated three multi-locus GWAS methods: (1) the MLMM (<xref ref-type="bibr" rid="B74">Segura et&#xa0;al., 2012</xref>), (2) BLINK (<xref ref-type="bibr" rid="B36">Huang et&#xa0;al., 2019</xref>), and (3) FarmCPU (<xref ref-type="bibr" rid="B49">Liu et&#xa0;al., 2016</xref>). The MLMM employs a stepwise regression approach to iteratively incorporate the most influential markers (pseudo quantitative trait nucleotides: pseudo-QTNs) as covariates to account for population structure. The BLINK approach accounts for population structure using pseudo-QTNs selected using LD information and optimized for Bayesian information criterion (BIC), while FarmCPU employs the fixed-bin approach to select pseudo-QTNs, assuming a uniform distribution of pseudo-QTNs across the genome. All three multi-locus GWAS methods were implemented using GAPIT 3 (<xref ref-type="bibr" rid="B88">Wang and Zhang, 2021</xref>). For comparison, a single-locus GWAS was performed using a GLMM implemented in PLINK 2.0 (<xref ref-type="bibr" rid="B66">Purcell et&#xa0;al., 2007</xref>).</p>
<p>To account for population structure in GWAS analyses, both multi-locus and single-locus methods incorporated the first six PCs derived from PCA of the GRM as fixed effects as described above. A marker was considered significant if it surpassed the Bonferroni threshold (-log(p) = 7.61). For TC, GWAS was conducted separately for trees assessed at the ages of 9 and 12 years. To perform GWAS for variant preselection or the identification of fixed-effect SNPs, GWAS analyses were exclusively conducted using individuals from the training population. This exclusion was implemented to minimize the bias in GEBVs that could arise from discovering markers in the same population used for model validation.</p>
</sec>
<sec id="s2_9">
<label>2.9</label>
<title>Incorporation of GWAS results in GBLUP models</title>
<p>To evaluate whether predictive ability using WGS data can be improved by prioritizing markers based on potential LD with QTLs, we created marker subsets containing preselected variants identified using GWAS approaches described earlier. Markers were first ranked in descending order of estimated effect from GWAS (-log10(p-value)), with the most statistically significant SNPs selected first. Different densities of preselected variants were evaluated as top 1,000, 10,000, 15,000, 20,000, 30,000, 50,000, and 100,000 SNPs. Markers preselected through GWAS conducted using BLINK, FarmCPU, MLMM, and the GLMM methodologies are referred to as TOP-BLINK, TOP-FarmCPU, TOP-MLMM, and TOP-GLMM, respectively. Genomic predictive ability from GBLUP models using additive GRMs based on preselected variants from different GWAS models and unselected marker sets (WGS data and LD-pruned data) were compared.</p>
<p>To test the hypothesis that fitting significant SNPs from GWAS as fixed effects enhances predictive ability, the additive genetic effects of significant markers identified by at least two GWAS methods, hereafter referred to as reliable SNPs, were added to GBLUP models as fixed effects. These reliable SNPs were identified using GWAS in the training population. In models incorporating fixed-effect SNPs, reliable SNPs were excluded from GRM construction, and their best linear unbiased estimates (BLUEs) were added to the GEBVs prior to model validation. The fixed-effect SNPs were added to models based on GWAS-preselected variants, WGS data, and LD-pruned marker sets.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Effective population size and linkage disequilibrium</title>
<p>The effective population size (<italic>N<sub>e</sub>
</italic>) varied considerably between sub-populations within the QMBP&#x2019;s mango gene-pool collection. The overall <italic>N<sub>e</sub>
</italic> for the entire gene-pool collection was estimated to be 113. Subpopulation-specific estimates revealed relatively high <italic>N<sub>e</sub>
</italic> values for non-Southeast Asian accessions (<italic>N<sub>e</sub>
</italic> = 129) and for individuals currently used as parents in the QMBP (<italic>N<sub>e</sub>
</italic> = 104). In contrast, the Southeast Asian accessions exhibited a markedly lower effective population size (<italic>N<sub>e</sub>
</italic> = 29).</p>
<p>Linkage disequilibrium (LD) decayed sharply with increasing physical distance between markers. The <italic>r<sup>2</sup>
</italic> estimates between pairs of SNPs dropped below the widely accepted critical threshold for accurate genomic prediction (<italic>r</italic>
<sup>2</sup> = 0.20) within 3.6 kb (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>). The mean genome-wide <italic>r<sup>2</sup>
</italic> between adjacent SNPs across all chromosomes in WGS dataset was 0.33. In contrast, the mean <italic>r<sup>2</sup>
</italic> values for the LD-pruned marker subsets (LD_2mil, LD_800k, LD_80k, LD_20k, and LD_20k, and LD_10k) were substantially lower (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Linkage disequilibrium (LD) decay in the mango gene pool. The X-axis shows the physical distance between SNPs in kilobases (kb), and the Y-axis represents the squared correlation (r&#xb2;) between allele frequencies. The dotted line marks the threshold of r&#xb2; = 0.2.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664012-g001.tif">
<alt-text content-type="machine-generated">A graph showing the decay of linkage disequilibrium (r&#xb2;) with increasing pairwise SNP distance (kb), where the y-axis represents linkage disequilibrium from 0.05 to 0.30 and the x-axis represents SNP distance from 0 to 1000 kb, with a solid green line sloping downward to indicate decreased linkage disequilibrium as distance increases and a horizontal red dashed line at 0.20 marking a reference threshold.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Phenotypic analysis</title>
<p>We observed substantial to relatively low phenotypic variation across the evaluated traits in the mango gene pool (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;2</bold>
</xref>). The greatest variability was observed for FBC and AFW, with coefficients of variation (CV) of 88.5% and 42.4%, respectively, indicating pronounced differences in pigmentation and fruit weight among accessions. In contrast, FF showed moderate variability (CV = 28.3%), while TC at ages 9 and 12 showed relatively lower variation (CV = 21.1% and 19.3%, respectively), with mean values of 50.4 cm and 56.06 cm. The density distributions of TC (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;1</bold>
</xref>) reveal a rightward shift from age 9 to 12, reflecting overall tree growth.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Heritability</title>
<p>Estimates of narrow-sense heritability (<inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) based on the full marker set (WGS data) varied widely across traits and models, revealing notably high heritabilities for FBC (<inline-formula>
<mml:math display="inline" id="im24">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>=0.98) and AFW (<inline-formula>
<mml:math display="inline" id="im25">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> = 0.95), but considerably lower estimates for FF (<inline-formula>
<mml:math display="inline" id="im26">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>=0.26) and TC (<inline-formula>
<mml:math display="inline" id="im27">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>=0.33) (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;3</bold>
</xref>). Marker density exerted minimal overall impact on heritability estimates; however, contrary to expectations, an increase in marker density from ~10k (LD_10k) to full WGS coverage led to a reduction in heritability estimates for TC from 0.40 to 0.33. Incorporation of the first six principal components derived from the GRM as fixed effects intended to control for population structure resulted in only subtle changes in <inline-formula>
<mml:math display="inline" id="im28">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> across all traits (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;4</bold>
</xref>).</p>
<p>Optimal model fits as indicated by lower AIC values were generally observed with intermediate to high marker densities, suggesting that an optimal balance exists between capturing genetic variation and avoiding over-parameterization. Moreover, prediction models employing GRMs constructed from GWAS-preselected variants consistently had better model fit than models based on the full WGS dataset.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Genomic prediction using WGS data</title>
<sec id="s3_4_1">
<label>3.4.1</label>
<title>Predictive ability with WGS data and effect of marker density on predictive ability</title>
<p>Genomic predictive ability varied across traits, marker density, and validation strategy (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;7</bold>
</xref>), and generally aligned with the narrow-sense heritability estimates (<inline-formula>
<mml:math display="inline" id="im29">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>). When considering predictions based on WGS data and baseline GBLUP models (i.e., models without population structure correction or fixed-effect SNPs), higher predictive abilities (PA) were observed for highly heritable traits and lower predictive abilities for traits with lower <inline-formula>
<mml:math display="inline" id="im30">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. Under the parental validation strategy, the highest predictive abilities were observed for FBC and AFW (PA = 0.67 for both traits), followed by TC (PA = 0.54), with FF showing the lowest predictive ability (PA = 0.41). A similar trend was observed in the 5-fold cross-validation (CV) strategy, where predictive abilities for AFW (0.65) and TC (0.57) were comparable to those from the parental validation (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;7</bold>
</xref>). However, the predictive ability for FBC increased substantially under the 5-fold CV strategy (0.80), while that for FF decreased markedly (0.28), relative to the parental validation results.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Genomic predictive abilities for fruit blush color (FBC), average fruit weight (AFW), fruit firmness (FF) and trunk circumference (TC) across different marker sets and prediction models under parental validation.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">Trait</th>
<th valign="middle" rowspan="2" align="left">Scenario</th>
<th valign="middle" colspan="10" align="center">Marker set</th>
</tr>
<tr>
<th valign="middle" align="left">LD_10k</th>
<th valign="middle" align="left">LD_20k</th>
<th valign="middle" align="left">LD_80k</th>
<th valign="middle" align="left">LD_800k</th>
<th valign="middle" align="left">LD_2mil</th>
<th valign="middle" align="left">WGS</th>
<th valign="middle" align="left">TOP-BLINK</th>
<th valign="middle" align="left">TOP_FarmCPU</th>
<th valign="middle" align="left">TOP-MLMM</th>
<th valign="middle" align="left">TOP-GLM</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="left">FBC</td>
<td valign="middle" align="left">GBLUP</td>
<td valign="middle" align="center">0.60</td>
<td valign="middle" align="center">0.65</td>
<td valign="middle" align="center">0.65</td>
<td valign="middle" align="center">0.66</td>
<td valign="middle" align="center">0.66</td>
<td valign="middle" align="center">0.67</td>
<td valign="middle" align="center">0.70</td>
<td valign="middle" align="center">0.71</td>
<td valign="middle" align="center">0.67</td>
<td valign="middle" align="center">0.66</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed PCs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.44</td>
<td valign="middle" align="center">0.45</td>
<td valign="middle" align="center">0.50</td>
<td valign="middle" align="center">0.37</td>
<td valign="middle" align="center">0.33</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed-SNPs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.74</td>
<td valign="middle" align="center">0.77</td>
<td valign="middle" align="center">0.74</td>
<td valign="middle" align="center">0.68</td>
<td valign="middle" align="center">0.70</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed-SNPs + fixed PCs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.62</td>
<td valign="middle" align="center">0.69</td>
<td valign="middle" align="center">0.64</td>
<td valign="middle" align="center">0.51</td>
<td valign="middle" align="center">0.48</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="left">AFW</td>
<td valign="middle" align="left">GBLUP</td>
<td valign="middle" align="center">0.59</td>
<td valign="middle" align="center">0.65</td>
<td valign="middle" align="center">0.66</td>
<td valign="middle" align="center">0.67</td>
<td valign="middle" align="center">0.67</td>
<td valign="middle" align="center">0.67</td>
<td valign="middle" align="center">0.78</td>
<td valign="middle" align="center">0.68</td>
<td valign="middle" align="center">0.70</td>
<td valign="middle" align="center">0.68</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed PCs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.30</td>
<td valign="middle" align="center">0.37</td>
<td valign="middle" align="center">0.58</td>
<td valign="middle" align="center">0.48</td>
<td valign="middle" align="center">0.54</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed-SNPs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.68</td>
<td valign="middle" align="center">0.78</td>
<td valign="middle" align="center">0.69</td>
<td valign="middle" align="center">0.71</td>
<td valign="middle" align="center">0.69</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed-SNPs + fixed PCs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.30</td>
<td valign="middle" align="center">0.36</td>
<td valign="middle" align="center">0.59</td>
<td valign="middle" align="center">0.48</td>
<td valign="middle" align="center">0.55</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="left">FF</td>
<td valign="middle" align="left">GBLUP</td>
<td valign="middle" align="left">0.35</td>
<td valign="middle" align="left">0.38</td>
<td valign="middle" align="center">0.40</td>
<td valign="middle" align="center">0.41</td>
<td valign="middle" align="center">0.41</td>
<td valign="middle" align="center">0.41</td>
<td valign="middle" align="center">0.43</td>
<td valign="middle" align="center">0.43</td>
<td valign="middle" align="center">0.40</td>
<td valign="middle" align="center">0.45</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed PCs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.30</td>
<td valign="middle" align="center">0.34</td>
<td valign="middle" align="center">0.34</td>
<td valign="middle" align="center">0.27</td>
<td valign="middle" align="center">0.35</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="left">TC</td>
<td valign="middle" align="left">GBLUP</td>
<td valign="middle" align="center">0.51</td>
<td valign="middle" align="center">0.52</td>
<td valign="middle" align="center">0.53</td>
<td valign="middle" align="center">0.54</td>
<td valign="middle" align="center">0.54</td>
<td valign="middle" align="center">0.54</td>
<td valign="middle" align="center">0.59</td>
<td valign="middle" align="center">0.59</td>
<td valign="middle" align="center">0.54</td>
<td valign="middle" align="center">0.58</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed PCs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.57</td>
<td valign="middle" align="center">0.61</td>
<td valign="middle" align="center">0.61</td>
<td valign="middle" align="center">0.55</td>
<td valign="middle" align="center">0.60</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed-SNPs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.58</td>
<td valign="middle" align="center">0.64</td>
<td valign="middle" align="center">0.64</td>
<td valign="middle" align="center">0.61</td>
<td valign="middle" align="center">0.64</td>
</tr>
<tr>
<td valign="middle" align="left">GBLUP + fixed-SNPs + fixed PCs</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">0.61</td>
<td valign="middle" align="center">0.66</td>
<td valign="middle" align="center">0.66</td>
<td valign="middle" align="center">0.62</td>
<td valign="middle" align="center">0.65</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Marker Sets Include: Whole-Genome Sequence (WGS) data, LD Pruned SNP Sets (LD_2mil to LD_10k), and the optimum density of GWAS-Preselected Variants (TOP-BLINK, TOP-FarmCPU, TOP-MLMM, TOP-GLMM) for each GWAS-method-by-trait combination. Prediction models include: (1) Base GBLUP (without population structure control or fixed-effect SNPs), (2) GBLUP with a fixed-effect SNP (GBLUP + fixed SNP), (3) GBLUP with top six Principal Components as fixed effects (GBLUP + fixed PCs), and (4) GBLUP with both fixed-effect SNP and fixed PCs (GBLUP + fixed PCs + fixed SNP).</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Results from the evaluation of marker density effects under the parental validation strategy using baseline GBLUP models revealed that predictive ability varied with density. Predictive ability ranged from 0.60 to 0.67 for FBC, 0.59 to 0.67 for AFW, 0.35 to 0.41 for FF, and 0.51 to 0.54 for TC (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;5</bold>
</xref>). Across all traits, predictive ability generally increased with marker density but plateaued beyond LD_20k (~20,000 SNPs), indicating little gains at higher SNP densities. Models incorporating a GRM estimated from the lowest-density marker set (LD_10k) exhibited substantially lower predictive ability compared to those using higher-density marker sets (LD_20k to WGS), which showed only marginal variation in predictive ability among themselves. For TC, differences in predictive ability were relatively stable across marker densities, with a maximum difference of just 0.03 between LD_10k and WGS. Under the 5-fold CV, differences in predictive ability across marker densities were minimal for all traits (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;7</bold>
</xref>).</p>
</sec>
<sec id="s3_4_2">
<label>3.4.2</label>
<title>Effect of population structure on predictive ability</title>
<p>Incorporating the top six principal components (PCs) as fixed effects to account for population structure resulted in substantial reductions in predictive ability for all traits (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). The decrease in predictive ability ranged from 0 to over 100% depending on marker set and validation approach employed, highlighting the dominant influence of population structure on genomic prediction within this gene-pool for these traits. Under parental validation, predictive ability based on WGS data decreased from 0.67 to 0.44 for FBC, from 0.67 to 0.30 for AFW, and from 0.41 to 0.30 for FF when population structure was accounted for (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;6</bold>
</xref>). The exception was TC, where a slight increase in predictive ability was observed, rising from 0.54 to 0.57. Notably, while population structure correction reduced predictive ability for FBC, this decline was substantially mitigated when the FBC-associated SNP on chromosome 15 was fitted as a fixed effect in GBLUP models. When only the top six PCs were included as fixed effects, predictive ability for FBC dropped by 34%. However, when both the first six PCs and the most significant GWAS-identified SNP were jointly fitted as fixed effects, the reduction in predictive ability was mitigated to just 7%.</p>
<p>Similarly, results from 5-fold cross validation revealed a marked decline in predictive ability after correcting for population structure (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;8</bold>
</xref>). However, unlike in the parental validation strategy where the predictive ability for TC remained stable despite population structure correction, the predictive ability in the 5-fold cross-validation declined sharply, dropping from 0.57 to 0.45 when using WGS data.</p>
</sec>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Genome-wide association studies</title>
<p>Utilizing three multi-locus GWAS approaches and one single-locus GWAS method on ~2 million SNPs, we identified 24 unique associations across three traits (<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>): fruit blush color (FBC, n = 5; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;2</bold>
</xref>), average fruit weight (AFW, n = 11; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;3</bold>
</xref>), and trunk circumference (TC, n = 8; <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;4</bold>
</xref>). Notably, the FBC-associated SNPs on chromosome 15 identified by the GLMM were in very strong LD with each other (mean <italic>r<sup>2</sup>
</italic> = 0.94), forming a distinct peak. FarmCPU identified the most trait-associated SNPs among the four GWAS methods evaluated, identifying 20 significant associations, followed by BLINK (7), and the MLMM (2). In contrast, the GLMM only detected one association. For TC, all significant marker-trait associations were detected in trees assessed at 9 years of age, whereas no significant SNPs were identified in trees assessed at 12 years of age. The comparison of SNP positions with the annotated &#x2018;Alphonso&#x2019; genome suggested that some SNPs were associated with regions containing putative loci for FBC, AFW, and TC previously identified in mango and other tree species (<xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>).</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Significant marker-trait associations for average fruit weight (AFW), fruit blush color (FBC), and trunk circumference (TC).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Trait</th>
<th valign="middle" align="left">Marker name</th>
<th valign="middle" align="left">Chr</th>
<th valign="middle" align="left">Pos (bp)</th>
<th valign="middle" align="left">P-value</th>
<th valign="middle" align="left">MAF</th>
<th valign="middle" align="left">GWAS method</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="11" align="left">AFW</td>
<td valign="middle" align="left">NC_058139.1_14599216</td>
<td valign="middle" align="left">3</td>
<td valign="middle" align="left">14599216</td>
<td valign="middle" align="left">2.19e-10</td>
<td valign="middle" align="left">0.08</td>
<td valign="middle" align="left">BLINK</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058143.1_71757</td>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">71757</td>
<td valign="middle" align="left">7.70e-10</td>
<td valign="middle" align="left">0.3</td>
<td valign="middle" align="left">BLINK</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058151.1_3295704</td>
<td valign="middle" align="left">15</td>
<td valign="middle" align="left">3295704</td>
<td valign="middle" align="left">1.42e-10</td>
<td valign="middle" align="left">0.17</td>
<td valign="middle" align="left">BLINK</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058153.1_7169193</td>
<td valign="middle" align="left">17</td>
<td valign="middle" align="left">7169193</td>
<td valign="middle" align="left">2.12e-20</td>
<td valign="middle" align="left">0.13</td>
<td valign="middle" align="left">BLINK, FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058156.1_9929325</td>
<td valign="middle" align="left">20</td>
<td valign="middle" align="left">9929325</td>
<td valign="middle" align="left">1.33e-10</td>
<td valign="middle" align="left">0.37</td>
<td valign="middle" align="left">BLINK</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058138.1_17016987</td>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">17016987</td>
<td valign="middle" align="left">1.78e-10</td>
<td valign="middle" align="left">0.49</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058146.1_6357250</td>
<td valign="middle" align="left">10</td>
<td valign="middle" align="left">6357250</td>
<td valign="middle" align="left">8.97e-09</td>
<td valign="middle" align="left">0.09</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058149.1_10041368</td>
<td valign="middle" align="left">13</td>
<td valign="middle" align="left">10041368</td>
<td valign="middle" align="left">1.99e-13</td>
<td valign="middle" align="left">0.38</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058151.1_14147173</td>
<td valign="middle" align="left">15</td>
<td valign="middle" align="left">14147173</td>
<td valign="middle" align="left">1.91e-11</td>
<td valign="middle" align="left">0.11</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058153.1_2108313</td>
<td valign="middle" align="left">17</td>
<td valign="middle" align="left">2108313</td>
<td valign="middle" align="left">2.10e-09</td>
<td valign="middle" align="left">0.44</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058156.1_9195450</td>
<td valign="middle" align="left">20</td>
<td valign="middle" align="left">9195450</td>
<td valign="middle" align="left">1.21e-10</td>
<td valign="middle" align="left">0.27</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" rowspan="6" align="left">FBC</td>
<td valign="middle" align="left">NC_058151.1_10729807</td>
<td valign="middle" align="left">15</td>
<td valign="middle" align="left">10729807</td>
<td valign="middle" align="left">3.32e-22</td>
<td valign="middle" align="left">0.35</td>
<td valign="middle" align="left">BLINK, FarmCPU, GLMM</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058140.1_17796415</td>
<td valign="middle" align="left">4</td>
<td valign="middle" align="left">17796415</td>
<td valign="middle" align="left">1.92e-20</td>
<td valign="middle" align="left">0.06</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058143.1_10454361</td>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">10454361</td>
<td valign="middle" align="left">2.28e-09</td>
<td valign="middle" align="left">0.46</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058143.1_15901033</td>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">15901033</td>
<td valign="middle" align="left">1.87e-12</td>
<td valign="middle" align="left">0.47</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058148.1_6029563</td>
<td valign="middle" align="left">12</td>
<td valign="middle" align="left">6029563</td>
<td valign="middle" align="left">8.04e-10</td>
<td valign="middle" align="left">0.18</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058151.1_10744410</td>
<td valign="middle" align="left">15</td>
<td valign="middle" align="left">10744410</td>
<td valign="middle" align="left">4.12e-11</td>
<td valign="middle" align="left">0.36</td>
<td valign="middle" align="left">MLMM</td>
</tr>
<tr>
<td valign="middle" rowspan="8" align="left">TC</td>
<td valign="middle" align="left">NC_058143.1_14357156</td>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">14357156</td>
<td valign="middle" align="left">2.23e-14</td>
<td valign="middle" align="left">0.35</td>
<td valign="middle" align="left">BLINK, FarmCPU, MLMM</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058137.1_13654943</td>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">13654943</td>
<td valign="middle" align="left">5.37e-09</td>
<td valign="middle" align="left">0.09</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058138.1_3215561</td>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">3215561</td>
<td valign="middle" align="left">1.50e-09</td>
<td valign="middle" align="left">0.36</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058138.1_9666205</td>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">9666205</td>
<td valign="middle" align="left">2.04e-08</td>
<td valign="middle" align="left">0.07</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058139.1_20457585</td>
<td valign="middle" align="left">3</td>
<td valign="middle" align="left">20457585</td>
<td valign="middle" align="left">7.01e-11</td>
<td valign="middle" align="left">0.16</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058148.1_14363648</td>
<td valign="middle" align="left">12</td>
<td valign="middle" align="left">14363648</td>
<td valign="middle" align="left">1.66e-10</td>
<td valign="middle" align="left">0.19</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058149.1_5010618</td>
<td valign="middle" align="left">13</td>
<td valign="middle" align="left">5010618</td>
<td valign="middle" align="left">7.26e-10</td>
<td valign="middle" align="left">0.16</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
<tr>
<td valign="middle" align="left">NC_058154.1_7179850</td>
<td valign="middle" align="left">18</td>
<td valign="middle" align="left">7179850</td>
<td valign="middle" align="left">2.44e-10</td>
<td valign="middle" align="left">0.12</td>
<td valign="middle" align="left">FarmCPU</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Table legend: The table displays trait, marker name, chromosome (Chr), position (Pos) in base pairs, GWAS-derived p-value, minor allele frequency (MAF) of the trait-associated SNP, and GWAS Method.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Candidate genes identified near significant SNP markers associated with fruit blush color (FBC), average fruit weight (AFW), and trunk circumference (TC) in mango.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Trait</th>
<th valign="middle" align="left">Chr</th>
<th valign="middle" align="left">MAF</th>
<th valign="middle" align="left">Distance from SNP (kb)</th>
<th valign="middle" align="left">Candidate gene</th>
<th valign="middle" align="left">Functional role</th>
<th valign="middle" align="left">Reference</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">FBC</td>
<td valign="middle" align="left">15</td>
<td valign="middle" align="left">0.35</td>
<td valign="middle" align="left">0.52 kb</td>
<td valign="middle" align="left">MYB114-like transcription factor</td>
<td valign="middle" align="left">Fruit coloration</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B41">Kanzaki et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B64">Plunkett et&#xa0;al., 2019</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">AFW</td>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">0.49</td>
<td valign="middle" align="left">158 kb</td>
<td valign="middle" align="left">Cell division control protein</td>
<td valign="middle" align="left">Fruit size</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B27">Devoghalaere et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B42">Karim et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B101">Zhang et&#xa0;al., 2006</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">AFW</td>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">0.30</td>
<td valign="middle" align="left">110 kb</td>
<td valign="middle" align="left">Two cell division control proteins</td>
<td valign="middle" align="left">Fruit size</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B27">Devoghalaere et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B42">Karim et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B101">Zhang et&#xa0;al., 2006</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">AFW</td>
<td valign="middle" align="left">13</td>
<td valign="middle" align="left">0.38</td>
<td valign="middle" align="left">12 kb</td>
<td valign="middle" align="left">Two auxin response factors</td>
<td valign="middle" align="left">Fruit size</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B27">Devoghalaere et&#xa0;al., 2012</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">AFW</td>
<td valign="middle" align="left">13</td>
<td valign="middle" align="left">0.38</td>
<td valign="middle" align="left">26 kb</td>
<td valign="middle" align="left">Ethylene-responsive transcription factor</td>
<td valign="middle" align="left">Fruit size</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B5">Bally et&#xa0;al., 2021</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">AFW</td>
<td valign="middle" align="left">15</td>
<td valign="middle" align="left">0.17</td>
<td valign="middle" align="left">33 kb</td>
<td valign="middle" align="left">GDSL esterase/lipase</td>
<td valign="middle" align="left">Fruit size</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B5">Bally et&#xa0;al., 2021</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">AFW</td>
<td valign="middle" align="left">15</td>
<td valign="middle" align="left">0.17</td>
<td valign="middle" align="left">160 kb</td>
<td valign="middle" align="left">Cell number regulator</td>
<td valign="middle" align="left">Fruit size</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B27">Devoghalaere et&#xa0;al., 2012</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">TC</td>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">0.07</td>
<td valign="middle" align="left">21 kb</td>
<td valign="middle" align="left">Growth regulating factor gene</td>
<td valign="middle" align="left">Tree trunk diameter</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B96">Wu et&#xa0;al., 2021</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">TC</td>
<td valign="middle" align="left">2</td>
<td valign="middle" align="left">0.36</td>
<td valign="middle" align="left">6 kb and 16 kb</td>
<td valign="middle" align="left">Two auxin efflux carrier genes</td>
<td valign="middle" align="left">Tree growth</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B67">Qi et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B99">Zhang et&#xa0;al., 2015</xref>)</td>
</tr>
<tr>
<td valign="middle" align="left">TC</td>
<td valign="middle" align="left">7</td>
<td valign="middle" align="left">0.35</td>
<td valign="middle" align="left">68 kb</td>
<td valign="middle" align="left">GATA transcription factor</td>
<td valign="middle" align="left">Tree growth</td>
<td valign="middle" align="left">(<xref ref-type="bibr" rid="B3">An et&#xa0;al., 2014</xref>)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Candidate genes were identified based on alignment with the annotated &#x2018;Alphonso&#x2019; reference genome. The Table lists the associated Trait, Chromosome (Chr), minor allele frequency (MAF) of the trait-associated SNP, distance between the SNP and candidate gene, the candidate gene or transcription factor, its functional role, and supporting references where the gene or transcription factor&#x2019;s role has previously been reported.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<sec id="s3_5_1">
<label>3.5.1</label>
<title>Genotype and GEBVs relationship</title>
<p>Reliable trait-associated SNPs (identified by at least two GWAS methods) showed clear effects on phenotypic variation, as revealed by GEBVs for the three genotypic classes: homozygous reference, heterozygous, and homozygous alternate allele (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;5</bold>
</xref>-<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;7</bold>
</xref>). For FBC, the SNP on chromosome 15 (G/A) showed that cultivars with the GG genotype (e.g., &#x2018;Ah Ha!&#x2019;, &#x2018;Tommy Atkins&#x2019;, and &#x2018;Irwin&#x2019;) had significantly higher FBC ratings (<italic>p</italic> &lt; 0.0005, mean GEBV = 2.0) compared to those carrying the A allele either in homozygous form (mean GEBV = 1.2; e.g., &#x2018;Dashehari&#x2019;, &#x2018;Mallika&#x2019;, and &#x2018;Arumanis A&#x2019;) or heterozygous form (mean GEBV = 1.0; e.g., &#x2018;Maha Chanook&#x2019;, &#x2018;Alphonso&#x2019;, and &#x2018;Carabao Pep&#x2019;). For AFW, the SNP on chromosome 17 (A/G) revealed that cultivars with the A allele in homozygous form had significantly lower fruit weight (p&lt; 0.0005; mean GEBV = 322.0 g) than the heterozygous cultivars (mean GEBV = 415.2 g). For TC, the SNP on chromosome 7 (T/A) revealed that AA genotypes (e.g., &#x2018;Manjeera&#x2019;, &#x2018;Lippens&#x2019;) had significantly lower trunk circumference (<italic>p</italic> &lt; 0.0005; mean GEBV = 45.5 cm) compared to cultivars carrying the T allele either in homozygous form (mean GEBV = 56.5 cm) or heterozygous form (mean GEBV = 52.4 cm). Notably, heterozygous (T/A) genotypes also had significantly smaller trunk circumference (<italic>p</italic> &lt; 0.0005) than homozygous TT genotypes.</p>
</sec>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Incorporation of GWAS results in GBLUP models</title>
<sec id="s3_6_1">
<label>3.6.1</label>
<title>Preselected variants from GWAS increased predictive ability</title>
<p>Models incorporating a GRM derived from variants preselected based on the highest ranked probability of effect as estimated using GWAS improved predictive ability across all traits, with improvements of up to 93% under parental validation (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). The magnitude of these improvements varied depending on the trait, density of GWAS-preselected variants, GWAS method applied, and whether population structure was accounted for. When using base models (i.e., models without population structure correction or fixed-effect SNPs) under the parental validation strategy, preselecting variants based on GWAS showed an advantage depending on the GWAS method used to identify variants, particularly for AFW and to a lesser extent for FBC, FF, and TC (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>). The predictive ability for AFW was markedly higher when using 20,000 TOP-BLINK GWAS-preselected variants, reaching 0.78, compared to 0.67 using the complete WGS dataset. In contrast, improvements in predictive ability for other traits were more modest, increasing from 0.67 to 0.71 for FBC using 100,000 SNPs from the TOP-FarmCPU set, from 0.54 to 0.59 for TC using either 20,000 or 50,000 SNPs from the TOP-BLINK or TOP-FarmCPU set, and from 0.41 to 0.45 for FF using 1,000 SNPs from the TOP-GLMM set. However, under 5-fold cross-validation using models that did not account for population structure, GWAS-based SNP preselection did not lead to improvements in predictive ability across any of the traits (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;7</bold>
</xref>).</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Predictive ability of breeding population parent phenotypes: <bold>(A)</bold> without accounting for population structure and <bold>(B)</bold> while accounting for population structure, using models without fixed-effect SNPs. Bars represent predictive abilities across marker sets: WGS data and GWAS-preselected variants (TOP-BLINK, TOP-FarmCPU, TOP-MLMM, TOP-GLMM). Notably, under scenario A, predictive ability for AFW increased from 0.67 to 0.78 when 20,000 TOP-BLINK SNPs were used instead of WGS data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664012-g002.tif">
<alt-text content-type="machine-generated">Bar charts comparing predictive abilities across four traits&#x2014;fruit blush color, average fruit weight, fruit firmness, and trunk circumference&#x2014;each containing bars for five marker sets: WGS (red), TOP-BLINK (green), TOP-FarmCPU (yellow), TOP-MLMM (blue), and TOP-GLMM (orange), with predictive ability values shown above each bar ranging from 0.40 to 0.78 in models without population structure correction (A) and from 0.27 to 0.61 in models with population structure correction (B).</alt-text>
</graphic>
</fig>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Predictive ability of gene pool individuals under 5-fold cross-validation: <bold>(A)</bold> without accounting for population structure and <bold>(B)</bold> with population structure correction, using models without fixed-effect SNPs. Bars represent predictive abilities across different marker sets, including WGS data (WGS) and GWAS-preselected variants (TOP-BLINK, TOP-FarmCPU, TOP-MLMM, TOP-GLMM). Notably, under scenario B, predictive ability for AFW increased from 0.32 to 0.48 when 1,000 TOP-FarmCPU SNPs were used instead of WGS data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664012-g003.tif">
<alt-text content-type="machine-generated">Bar charts comparing the predictive ability of different models for four traits&#x2014;fruit blush color, average fruit weight, fruit firmness, and trunk circumference&#x2014;where chart (A) shows results for WGS, TOP-BLINK, TOP-FarmCPU, TOP-MLMM, and TOP-GLMM in models ignoring population structure with predictive abilities ranging from 0.28 to 0.81, and chart (B) presents corresponding results when population structure is accounted for, generally showing lower predictive abilities from 0.08 to 0.50; marker sets are color-coded as WGS (red), TOP-BLINK (green), TOP-FarmCPU (yellow), TOP-MLMM (blue), and TOP-GLMM (orange).</alt-text>
</graphic>
</fig>
<p>The increases in predictive ability observed with GWAS-preselected variants relative to WGS data were much larger when population structure was accounted for (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>). Under the parental validation strategy, adjusting for population structure in GBLUP models led to a 93% improvement in predictive ability for AFW, increasing from 0.30 to up to 0.58 when 15,000 variants from the TOP-FarmCPU set were used instead of WGS data. Similar improvements in predictive ability were observed for FBC and FF, rising from 0.44 to 0.50 using either 50,000 or 100,000 TOP-FarmCPU SNPs for FBC, and from 0.30 to 0.35 using top 1,000 SNPs selected by GLMM for FF. In contrast, there was little variation in predictive ability for TC between models that included GWAS pre-selected variants with or without adjustment for population structure.</p>
<p>A comparable pattern was observed under the 5-fold cross-validation strategy in GBLUP models that included population structure correction (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>). Specifically, predictive ability increased by up to 29% for FBC (from 0.35 to 0.45), 50% for AFW (from 0.32 to 0.48), and 150% for FF (from 0.08 to 0.20), while TC showed a modest improvement of 11% (from 0.45 to 0.50). The highest predictive abilities under 5-fold cross validations were achieved using 10,000 SNPs from TOP-GLM for FBC, 1,000 SNPs from TOP-MLMM for AFW, 1,000 SNPs from all GWAS methods for FF, and 10,000 or more SNPs from either TOP-FarmCPU or TOP-GLMM for TC (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;8</bold>
</xref>). Notably, for FF and TC, the predictive abilities obtained using GWAS-preselected variants were comparable to those achieved using LD-pruned marker sets (LD_10k to LD_2mil).</p>
<p>Predictive abilities using GWAS-preselected variants showed substantial variation depending on marker density and validation strategy, with no consistent trend across traits (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Tables&#xa0;5</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>7</bold>
</xref>). Under the parental validation strategy in models ignoring population structure, the highest predictive abilities were achieved using 20,000 SNPs from BLINK for AFW, 100,000 SNPs from FarmCPU for FBC, 1,000 SNPs from GLMM for FF, and either 20,000 or 50,000 SNPs from BLINK or FarmCPU for TC. In contrast, under 5-fold cross validation, predictive abilities remained relatively stable across different marker densities (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;7</bold>
</xref>). Differences in maximum predictive ability between GWAS models were generally small (&lt; 0.03), except for FBC and AFW in models that accounted for population structure (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;8</bold>
</xref>). In these cases, the highest predictive abilities were achieved using 10,000 and 1,000 SNPs from TOP-GLM and TOP-MLMM, respectively. All subsequent results are based on the parental validation strategy using both the full WGS dataset and the optimal set of GWAS-preselected variants for each trait.</p>
</sec>
<sec id="s3_6_2">
<label>3.6.2</label>
<title>Fixed-effect SNPs increased predictive ability for fruit blush color and trunk circumference</title>
<p>The impact of incorporating reliable markers as fixed effects on predictive ability varied depending on the trait, marker set, and whether population structure was accounted for (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;9</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;10</bold>
</xref>). Our findings indicate that incorporating a reliable SNP as a fixed effect in prediction models markedly improved predictive ability for FBC and TC, with gains of up to 0.26 and 0.07, respectively.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Predictive ability of breeding population parent phenotypes: <bold>(A)</bold> without population structure control and including fixed-effect SNPs, and <bold>(B)</bold> with population structure correction and fixed-effect SNPs. Bars represent predictive abilities across marker sets: WGS data (WGS), WGS with fixed-effect SNP (WGS + fixedSNP), and GWAS-preselected variants with fixed-effect SNP (TOP-BLINK + fixedSNP, TOP-FarmCPU + fixedSNP, TOP-MLMM + fixedSNP, TOP-GLMM + fixedSNP). Notably, under scenario B, predictive ability for FBC increased from 0.44 to 0.69 when using 50,000 TOP-BLINK SNPs with fixed-effect SNP instead of using WGS data alone.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664012-g004.tif">
<alt-text content-type="machine-generated">Bar charts showing the predictive ability of six models for three traits&#x2014;fruit blush color, average fruit weight, and trunk circumference&#x2014;where the upper chart (A) displays higher predictive values than the lower chart (B); models are color-coded as red (WGS), black (WGS + fixedSNP), green (TOP-BLINK + fixedSNP), yellow (TOP-FarmCPU + fixedSNP), blue (TOP-MLMM + fixedSNP), and orange (TOP-GLMM + fixedSNP), with predictive ability values ranging from 0.54 to 0.78 in models ignoring population structure (A) and from 0.30 to 0.69 in models accounting for population structure (B).</alt-text>
</graphic>
</fig>
<p>For FBC, incorporating the reliable trait-associated SNP on chromosome 15 as a fixed effect in GBLUP models without accounting for population structure resulted in an improvement in predictive ability ranging from 0.01 to 0.07 compared to models without the fixed-effect SNP. Notably, predictive ability increased from 0.67 to 0.74 with WGS data and from 0.70 to 0.77 using 100,000 TOP-BLINK markers when the FBC-reliable marker was included as a fixed effect. Strikingly, under population structure correction, the enhancement in predictive ability due to the inclusion of the FBC-reliable marker as a fixed effect was even more pronounced, with gains ranging from 0.12 to 0.26. In these population structure corrected models, predictive ability increased from 0.44 to 0.62 with WGS data, from 0.45 to 0.69 using 50,000 TOP-BLINK markers, from 0.50 to 0.62 using either 50,000 or 100,000 TOP-FarmCPU markers, from 0.37 to 0.51 using 15,000 TOP-MLMM markers, and from 0.33 to 0.48 using 10,000 TOP-GLMM markers. Further analysis using ~ 2 million SNPs showed that this FBC-associated SNP accounted for 36% of the genetic variance (results not shown).</p>
<p>Incorporating the reliable TC-associated SNP on chromosome 7 as a fixed effect in GBLUP models also improved predictive ability, with gains of up to 0.07 in models without population structure control, and up to 0.06 when population structure was accounted for. For example, predictive ability increased from 0.54 to 0.58 with WGS data, from 0.59 to 0.64 using 20,000 or 50,000 SNPs from either BLINK or FarmCPU, and from 0.54 to 0.61 with 20,000 TOP-MLMM markers when the reliable TC-associated SNP was included as a fixed effect in models without population structure correction. A similar pattern was observed when population structure was accounted for, with the predictive ability for WGS data increasing from 0.57 to 0.61, from 0.61 to 0.66 using 50,000 SNPs from either BLINK or FarmCPU, and from 0.55 to 0.62 using 15,000 TOP-MLMM markers. In contrast, for AFW, adding fixed-effect markers to the prediction models did not improve predictive ability.</p>
</sec>
<sec id="s3_6_3">
<label>3.6.3</label>
<title>Improved prediction via combined use of GWAS-preselected variants and fixed-effect SNPs</title>
<p>Combining GWAS-preselected variants with fixed-effect SNPs substantially improved predictive ability for FBC and TC compared to models using WGS data alone or GWAS-preselected variants alone. The highest predictive abilities for these traits were achieved using this integrated approach both with and without population structure correction (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>; <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>). For example, substituting WGS data with TOP-BLINK markers improved predictive ability for FBC from 0.67 to 0.70 (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>). Incorporating the FBC-associated reliable SNP on chromosome 15 as a fixed effect increased predictive ability with WGS data from 0.67 to 0.74 (a 0.07 increase). Notably, combining 100,000 GWAS-preselected variants from BLINK with the fixed-effect SNP yielded a substantial improvement, boosting predictive ability by 0.10 (from 0.67 with WGS data to 0.77 using a combination of GWAS-preselected variants and the fixed-effect SNP). A similar trend was observed when population structure was accounted for, with the highest predictive ability (0.69) achieved by incorporating the FBC-reliable SNP as a fixed effect in a GBLUP model based on a GRM derived from 50,000 TOP-BLINK markers. This predictive ability represents a substantial improvement, exceeding that of WGS data alone by 0.25 and that of TOP-BLINK markers alone by 0.24, and surpassing WGS data with a fixed-effect SNP by 0.07.</p>
<p>A similar trend was observed for TC, where the highest predictive abilities were achieved by integrating GWAS-preselected variants with fixed-effect SNPs in a single GBLUP model. Specifically, including 20,000 or 50,000 GWAS-preselected variants from TOP-FarmCPU or TOP-BLINK alongside fixed-effect SNPs improved predictive ability by 0.1, increasing from 0.54 with WGS data to 0.64. This enhancement in predictive ability surpasses the gains of 0.06 and 0.05 obtained when using either WGS data plus fixed-effect SNPs or GWAS-preselected variants alone. Notably, a comparable pattern emerged in GBLUP models that accounted for population structure, with predictive abilities remaining nearly identical to those observed in models without population structure correction.</p>
<p>When considering the optimal marker density for GWAS-preselected variants under parental validation, defined as the density yielding the highest predictive ability, TOP-BLINK and TOP-FarmCPU both achieved the highest predictive abilities in eight of the fourteen trait-by-scenario combinations (four traits and four scenarios [GBLUP, GBLUP + fixed SNP, GBLUP + fixed PCs, and GBLUP + fixed PCs + fixed SNP]). TOP-GLMM produced the highest predictive ability in three combinations, while MLMM did not result in the highest predictive ability in any of the scenarios. In contrast, the performance of GWAS models under 5-fold cross-validations was comparable across all traits when population structure was ignored. However, under models that accounted for population structure, GWAS-preselected variants identified using the MLMM and GLMM yielded the highest predictive ability for AFW and FBC, respectively.</p>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<sec id="s4_1">
<label>4.1</label>
<title>Effective population size and linkage disequilibrium</title>
<p>Our results indicate that estimates of effective population size (<italic>Ne</italic>) in the QMBP gene-pool collection (<italic>Ne</italic> = 129, excluding accessions from Southeast Asia) and the parental population (<italic>Ne</italic> = 104) are well above the recommended minimum of 50 required to minimize short-term inbreeding (<xref ref-type="bibr" rid="B21">Clarke et&#xa0;al., 2024</xref>). These large estimates of <italic>Ne</italic> indicate a high number of independently segregating chromosome segments, suggesting that high density marker sets are needed to ensure marker-QTL LD for accurate genomic prediction (<xref ref-type="bibr" rid="B30">Grattapaglia, 2014</xref>). The estimates of <italic>Ne</italic> in both the gene-pool collection and parental population suggest that these populations maintain sufficient genetic diversity to sustain long-term genetic gains (<xref ref-type="bibr" rid="B93">White et&#xa0;al., 2007</xref>) in the QMBP.</p>
<p>Mango is an outcrossing and highly heterozygous species (<xref ref-type="bibr" rid="B95">Wilkinson et&#xa0;al., 2022</xref>) and thus would be expected to have rapid LD decay (<xref ref-type="bibr" rid="B85">Vos et&#xa0;al., 2017</xref>). The rapid LD decay observed in our study likely reflects the substantial genetic diversity within the gene-pool collection (<xref ref-type="bibr" rid="B95">Wilkinson et&#xa0;al., 2022</xref>), in agreement with the high <italic>Ne</italic> estimates. Specifically, LD decay of <italic>r<sup>2</sup>
</italic> = 0.2 (the commonly considered minimum LD threshold for accurate genomic prediction) occurred at 3.6 kb in our study using WGS data. This is comparable to estimates in other outcrossing species like <italic>Eucalyptus</italic> (4 kb; <xref ref-type="bibr" rid="B15">Butler et&#xa0;al., 2022</xref>) and <italic>Populus</italic> (3&#x2013;6 kb; <xref ref-type="bibr" rid="B75">Slavov et&#xa0;al., 2012</xref>) but lower than reported in a diverse historical apple population (0.1 kb; <xref ref-type="bibr" rid="B56">Migicovsky et&#xa0;al., 2016</xref>). The rapid LD decay observed in this study should increase the resolution of GWAS studies by allowing for accurate identification of causal variants. This improvement stems from the presence of short haplotype blocks which mitigate the confounding effects of strong LD between causal mutations and numerous non-causal loci, thereby reducing the noise-to-signal ratio and improving GWAS resolution (<xref ref-type="bibr" rid="B39">Jang et&#xa0;al., 2023b</xref>). The mean <italic>r<sup>2</sup>
</italic> between adjacent WGS SNPs in our study (0.33) is comparable to values reported in apple (0.32; <xref ref-type="bibr" rid="B44">Kumar et&#xa0;al., 2012</xref>) and pear (0.33; <xref ref-type="bibr" rid="B57">Minamikawa et&#xa0;al., 2018</xref>), indicating a strong potential for implementing genomic selection in mango.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Genome-wide association studies</title>
<sec id="s4_2_1">
<label>4.2.1</label>
<title>Fruit blush color</title>
<p>This study identified five distinct and statistically significant associations for FBC (<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;2</bold>
</xref>). Notably, a MYB114 transcription factor was located just 0.5 kb from a key FBC-associated marker on chromosome 15, consistently identified by three different GWAS methods. MYB transcription factors are widely reported to regulate fruit skin color in multiple fruit tree species, including mango (<xref ref-type="bibr" rid="B41">Kanzaki et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B94">Wilkinson et&#xa0;al., 2025</xref>), apple (<xref ref-type="bibr" rid="B64">Plunkett et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B77">Sun et&#xa0;al., 2021</xref>), pear (<xref ref-type="bibr" rid="B22">Cong et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B102">Zhang et&#xa0;al., 2021</xref>) and kiwifruit (<xref ref-type="bibr" rid="B1">Ampomah-Dwamena et&#xa0;al., 2019</xref>). These genes are central regulators of the anthocyanin biosynthesis pathway, which plays a critical role in pigmentation of fruit peels (<xref ref-type="bibr" rid="B29">Gao et&#xa0;al., 2021</xref>). The well-established role of anthocyanin accumulation in contributing to red skin coloration in fruits is consistent with previous findings in mango. <xref ref-type="bibr" rid="B87">Wang et&#xa0;al. (2020)</xref> demonstrated that anthocyanin biosynthesis genes were significantly upregulated in the peel of red-skinned mango cultivars compared to yellow- or green-skinned types. Additionally, <xref ref-type="bibr" rid="B41">Kanzaki et&#xa0;al. (2020)</xref> reported that exposure to light stimulus increased the expression of <italic>MiMYB1</italic> and <italic>MiMYB4</italic> transcription factors in reddened mango fruit, further highlighting the involvement of MYB transcription factors and light exposure in regulating peel coloration.</p>
</sec>
<sec id="s4_2_2">
<label>4.2.2</label>
<title>Fruit weight</title>
<p>This study identified 11 novel SNPs significantly associated with AFW (<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;3</bold>
</xref>), a key trait for influencing consumer appeal and market value, and therefore a major target for improvement in mango breeding programs (<xref ref-type="bibr" rid="B5">Bally et&#xa0;al., 2021</xref>). Our results support the role of hormone-mediated cell division in determining fruit weight, consistent with findings in other horticultural fruit tree species (<xref ref-type="bibr" rid="B42">Karim et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B46">Li et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B101">Zhang et&#xa0;al., 2006</xref>). Notably, two auxin response factors were identified within 12 kb of an AFW-associated SNP on chromosome 13, suggesting a likely regulatory role of auxin signaling in fruit weight variation. Auxin response factors have previously been implicated in apple fruit weight variation through modulation of cell division and expansion (<xref ref-type="bibr" rid="B27">Devoghalaere et&#xa0;al., 2012</xref>). Additionally, an AFW-associated SNP on chromosome 7 was located ~110 kb from two cell division control proteins, reinforcing the mechanistic link between cell division during mango fruit development and fruit size. Similar associations have been reported in sweet cherry (<italic>Prunus avium</italic> L.), where a fruit size QTL was closely linked to a gene governing cell number, underscoring the conserved nature of these genetic mechanisms across species (<xref ref-type="bibr" rid="B26">De Franceschi et&#xa0;al., 2013</xref>).</p>
</sec>
<sec id="s4_2_3">
<label>4.2.3</label>
<title>Trunk circumference</title>    <p>We identified eight unique marker-trait associations for TC across seven chromosomes (<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;4</bold>
</xref>). Our analyses identified a GATA transcription factor located within 70 kb of a TC-associated SNP on chromosome 7. GATA transcription factors have been reported to regulate tree growth in <italic>Populus</italic> (<xref ref-type="bibr" rid="B4">An et&#xa0;al., 2020</xref>). BLAST analysis revealed that the GATA transcription factor identified in our study shares 86% sequence similarity with the one reported in <italic>Populus</italic> (<xref ref-type="bibr" rid="B4">An et&#xa0;al., 2020</xref>), suggesting similar regulatory mechanisms in mango tree growth. GATA transcription factors are known to modulate the expression of auxin efflux carrier genes, facilitating the basipetal movement of auxins to the roots (<xref ref-type="bibr" rid="B4">An et&#xa0;al., 2020</xref>, <xref ref-type="bibr" rid="B3">An et&#xa0;al., 2014</xref>). In our study, two auxin efflux carrier genes were located just 6 kb and 16 kb from TC-associated SNPs on chromosome 2, further supporting the potential regulatory role of auxin transport in mango tree growth.</p>
<p>Prior studies strongly support a model in which plant dwarfism results from reduced expression of PIN genes (auxin efflux carriers) in stem bark tissues, leading to impaired auxin transport to the roots. This disruption limits root growth and cytokinin biosynthesis, ultimately constraining shoot development (<xref ref-type="bibr" rid="B2">An et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B48">Li et&#xa0;al., 2018</xref>). These mechanisms align with previous studies in apple, where use of dwarfing inter-stock (M9) led to decreased expression of auxin efflux carrier genes in stem bark tissues, suppressing the basipetal movement of auxins and leading to reduced root and shoot development (<xref ref-type="bibr" rid="B99">Zhang et&#xa0;al., 2015</xref>). Similar findings in pear demonstrated significantly higher expression levels of the <italic>PcPIN-L</italic> auxin efflux carrier gene in standard-size trees compared to dwarf types (<xref ref-type="bibr" rid="B67">Qi et&#xa0;al., 2020</xref>), further reinforcing the role of auxin transport in tree growth regulation.</p>
<p>In addition to the GATA transcription factor and auxin efflux carrier proteins, we identified a growth-regulating factor gene located approximately 21 kb from a TC-associated SNP on chromosome 2. This, together with the proximity of auxin efflux carrier genes and the GATA transcription factor to TC-associated SNPs, suggests the involvement of a coordinated regulatory network governing tree growth in mango. The trait-associated markers identified for FBC, AFW and TC in this study represent a valuable resource for marker-assisted breeding in mango, pending validation in independent populations.</p>
</sec>
<sec id="s4_2_4">
<label>4.2.4</label>
<title>Multi-locus GWAS are powerful at detecting trait-associated SNPs</title>
<p>Our findings underscore the superior statistical power of multi-locus GWAS methods compared to single-locus approaches. Consistent with previous studies (<xref ref-type="bibr" rid="B18">Cebeci et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B36">Huang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B57">Minamikawa et&#xa0;al., 2018</xref>), multi-locus GWAS methods, particularly BLINK and FarmCPU, identified more significant marker-trait associations than the single-locus GWAS approach (GLMM). The increased power of multi-locus GWAS stems from their ability to account for LD between SNPs (as in BLINK) while simultaneously testing multiple markers, enhancing the detection of small-effect loci associated with a trait (<xref ref-type="bibr" rid="B74">Segura et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B86">Wang et&#xa0;al., 2016</xref>). BLINK and FarmCPU, which use multi-locus strategies and iterative inclusion of pseudo-QTNs, tend to capture both large- and small-effect loci more robustly, especially in polygenic traits. In contrast, MLMM&#x2019;s stepwise regression approach appears to underperform, likely due to over-adjustment for population structure and the inherent sensitivity of its sequential covariate inclusion, which may mask genuine signals.</p>
<p>Our results, particularly for AFW where BLINK and FarmCPU identified more significant marker-trait associations, highlight the value of this multi-method GWAS strategy. These findings are consistent with reports by <xref ref-type="bibr" rid="B57">Minamikawa et&#xa0;al. (2018)</xref> and <xref ref-type="bibr" rid="B45">Kumar et&#xa0;al. (2019)</xref>, who identified a higher number of trait-associated SNPs in pears (<italic>Pyrus pyrifolia</italic>) by employing multiple GWAS methods rather than relying on a single approach. Integrating results across multiple GWAS methods is a powerful strategy to identify additional marker-trait associations as no single method is optimal for all traits. Moreover, loci detected by the different methods do not completely overlap (<xref ref-type="bibr" rid="B103">Zhou et&#xa0;al., 2023</xref>). The use of a combination of complementary GWAS methods not only strengthens statistical robustness but also strengthens confidence in associations consistently detected across analyses, making these associations strong candidates for marker development and functional validation.</p>
</sec>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Genomic prediction</title>
<sec id="s4_3_1">
<label>4.3.1</label>
<title>Simply increasing marker density to WGS level does not increase predictive ability</title>
<p>In our study, we observed that increasing marker density beyond a certain threshold, even up to WGS level, did not yield further improvements in predictive ability (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>). These findings are consistent with previous studies (<xref ref-type="bibr" rid="B10">Bedhane et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B59">Moghaddar et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B68">Raymond et&#xa0;al., 2018</xref>b, <xref ref-type="bibr" rid="B69">Raymond et&#xa0;al., 2018c</xref>; <xref ref-type="bibr" rid="B81">van Binsbergen et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B83">VanRaden et&#xa0;al., 2017</xref>) that also found little or no improvement in prediction accuracy when using WGS variants compared to lower density or high-density SNP chips. A plausible explanation is that WGS data include many variants that are not in strong LD with the causative loci (<xref ref-type="bibr" rid="B81">van Binsbergen et&#xa0;al., 2015</xref>). These non-informative markers may not capture the QTL effects or accurately reflect genetic relationships at causal loci, potentially undermining the performance of GP models through over-shrinkage of QTL effects. This phenomenon likely reflects the balance between capturing true causal variation and overfitting to random, non-informative variation. Our analyses using different LD-pruned subsets (e.g. LD_2mil, LD_800k, etc.) indicated that predictive ability tended to plateau or even decline when the number of markers exceeded an optimal threshold. This threshold is inherently linked to the underlying LD structure and genetic architecture of the trait in question.</p>
</sec>
<sec id="s4_3_2">
<label>4.3.2</label>
<title>Marker preselection could enhance genomic predictive ability</title>
<p>This study showed that GRMs constructed using GWAS-preselected variants resulted in higher predictive abilities across the four studied traits compared to GRMs built using all WGS variants (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, <xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2</bold>
</xref>, <xref ref-type="fig" rid="f3">
<bold>3</bold>
</xref>). These findings highlight that preselecting WGS markers likely to be in LD with causal mutations, while excluding those that do not capture genetic relationships at causal loci, can improve genomic predictive ability. Thus, it appears that including markers not in LD with causative mutations in GRM construction may cause the realized genetic relationships to diverge from true relationships at causal loci, thereby reducing the performance of GBLUP models. However, when markers preselected for their potential causal effects are used, the GRM is dominated by SNPs in high LD with QTL for the target trait. Thus, the trait-specific GRM may better capture the genetic relationships among individuals at unobserved causal loci, potentially enhancing the accuracy of genomic predictions. Our results are consistent with those of <xref ref-type="bibr" rid="B79">Tan and Ingvarsson (2022)</xref> who showed that when the top 1% of markers from GWAS are selected, the accuracy of genomic predictions can be increased significantly. <xref ref-type="bibr" rid="B19">Chen et&#xa0;al. (2023)</xref> also showed that performing GP using a GRM built using 100 preselected markers resulted in improved prediction accuracies compared to models based on all markers.</p>
<p>While our results clearly demonstrate that the integration of GWAS-preselected variants improves predictive ability, we acknowledge that validation confined to a single, relatively small dataset may limit the external applicability and generalizability of our findings. Such internal validation alone does not adequately account for potential biases introduced by population-specific genetic structure or unique environmental factors. Although we employed a 5-fold cross-validation strategy to strengthen robustness of our model assessment, external validation in large, independent datasets such as a full-sib population remains essential. Such validation would verify whether the observed improvement in predictive performance genuinely reflects enhanced capture of causal genetic variation.</p>
</sec>
<sec id="s4_3_3">
<label>4.3.3</label>
<title>Fixed-effect SNPs improve predictive ability</title>
<p>While the use of GWAS-preselected variants increased genomic predictive ability in our analyses, this approach still suffers from the assumption of the GBLUP model that all markers contribute an equal and individually small proportion of the total genetic variance (<xref ref-type="bibr" rid="B53">Meuwissen and Goddard, 2010</xref>). However, increasing evidence supports the hypothesis that SNPs in high LD with causal mutations explain more genetic variance than those in low LD (<xref ref-type="bibr" rid="B52">Meuwissen et&#xa0;al., 2024</xref>). Incorporating fixed-effect SNPs into GBLUP models appeared to improve predictive ability for both FBC and TC, likely by capturing variation associated with major QTLs (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>). This strategy enabled us to account explicitly for the effects of markers with large estimated effects, potentially helping to separate their contribution from those assumed under the infinitesimal model. While these results suggest benefits from including such markers, it remains important to recognize that the identified SNPs may not represent true causal variants, and further validation in an independent population such as a full-sib family would be needed to confirm their functional significance. The differentiation between large- and small-effect QTLs appears to model better the true genetic architecture of traits, leading to more accurate prediction models. This is especially true when markers in LD with major genes are treated as fixed effects (<xref ref-type="bibr" rid="B47">Li et&#xa0;al., 2019</xref>). Our findings are consistent with prior studies. For example, <xref ref-type="bibr" rid="B43">Kostick et&#xa0;al. (2023)</xref> demonstrated a substantial improvement in the predictive ability of &#x2018;percent red overcolor&#x2019; in apple, which increased from 0.33 to 0.80 upon inclusion of a fixed-effect SNP at a fruit color locus. Similarly, <xref ref-type="bibr" rid="B62">Nsibi et&#xa0;al. (2020)</xref> reported a 25.8% increase in prediction accuracy for apricot (<italic>Prunus armeniaca</italic>) fruit color (hue angle) after incorporating two major QTLs as fixed effects.</p>
<p>Critically, the effectiveness of using fixed-effect SNPs relies on their LD with a QTL, as reported by <xref ref-type="bibr" rid="B47">Li et&#xa0;al. (2019)</xref>. In this study, the fixed-effect SNPs that enhanced predictive abilities were consistently identified by three GWAS methods (reliable SNPs), strengthening the evidence that these SNPs are likely in LD with underlying QTLs.</p>
</sec>
<sec id="s4_3_4">
<label>4.3.4</label>
<title>Combining preselected variants and fixed-effect SNPs further enhances predictive ability</title>
<p>In our study, we demonstrated that while the utilization of GWAS-preselected variants or fixed-effect SNPs can enhance predictive ability, further improvements can be achieved through the integration of preselected variants with fixed-effect SNPs (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>). Traditional GBLUP models employing a single GRM constructed from GWAS-preselected variants do not fully capitalize on the predictive potential of large-effect SNPs due to the inherent assumptions of the infinitesimal model, which overly constrains their contribution to the total genetic variance. By contrast, our approach, combining preselected variants and fixed-effect SNPs, benefits from more accurate estimation of genomic relationships at causative loci. If all markers explain the same proportion of the total genetic variance, as is the assumption of the infinitesimal model, there would be no notable reduction in heritability when significant SNPs from GWAS are fitted as fixed effects in GBLUP models. However, our analyses demonstrated a notable reduction in additive genetic variance due to the anonymous markers and heritability when the fixed-effect SNP for FBC was included in GBLUP models, suggesting that a substantial portion of the additive genetic variance was explained by this SNP potentially due to its LD with the causative mutation. For mango breeding, fixed SNPs associated with FBC and TC provide particularly strong gains in predictive ability and should be prioritized for marker-assisted prediction pipelines.</p>
<p>While our findings demonstrate that integrating GWAS-preselected variants with fixed-effect SNPs can enhance genomic predictive ability, several limitations warrant discussion. First, the relatively modest training population size used in our study may limit statistical power to detect small-effect loci and increase the risk of overfitting, raising concerns about the external validity of this approach. Additionally, the specific population structure of our study may not fully represent the broader genetic landscape of mango germplasm, potentially affecting the transferability of our findings to more diverse populations. If between-subpopulation genetic variance differs across populations, the benefits of marker preselection and fixed-effect SNP integration may not be universally applicable. Future studies should validate these results in larger, independent datasets and assess the approach&#x2019;s robustness across different genetic backgrounds to ensure broader applicability.</p>
<p>Several inconsistencies in predictive ability across varying densities of GWAS-preselected SNPs and different GWAS models highlight the practical challenges of selecting an appropriate GWAS method for variant preselection and determining the optimal number of SNPs to include. Such inconsistencies have important downstream implications, as the choice of GWAS method and preselected variants directly influences the construction of the GRM and the inclusion of fixed-effect SNPs in prediction models, ultimately affecting prediction accuracy. To address these inconsistencies and leverage the complementary strengths of individual GWAS methods, an ensemble-based approach that aggregates summary statistics from multiple GWAS models may offer a more robust solution. Such an approach could combine p-values, effect sizes, or marker rankings to prioritize SNPs that are consistently identified across methods, thereby balancing both sensitivity and specificity. Although ensemble GWAS has primarily been applied to the identification of causative variants (<xref ref-type="bibr" rid="B103">Zhou et&#xa0;al., 2023</xref>), its potential for SNP preselection in genomic prediction remains untapped. Meanwhile, ensemble genomic prediction models which aggregate predictions from multiple methods, have demonstrated improved accuracy in maize (<xref ref-type="bibr" rid="B80">Tomura et&#xa0;al., 2025</xref>), common bean (<xref ref-type="bibr" rid="B20">Chiaravallotti et&#xa0;al., 2025</xref>), and across cattle, wheat, and human datasets (<xref ref-type="bibr" rid="B31">Gu et&#xa0;al., 2024</xref>), underscoring the potential of model integration at various stages of the genomic prediction pipeline. While ensemble GWAS remains underexplored, a practical strategy for breeders is to prioritize markers consistently identified across multiple GWAS methods and benchmark the resulting models through cross-validation. This ensures that selected SNPs are both reproducible and practically useful in applied breeding programs.</p>
</sec>
<sec id="s4_3_5">
<label>4.3.5</label>
<title>Multi-locus GWAS are powerful approaches for variant preselection</title>
<p>Our findings demonstrate that the predictive ability of models based on GWAS-preselected variants varies depending on the GWAS methodology employed. The superior performance of BLINK and FarmCPU compared to MLMM and the GLMM indicates their greater power in ranking markers based on LD with QTLs, thereby enabling the selection of more informative SNPs for genomic prediction. Beyond detecting a higher number of trait-associated SNPs than the MLMM and GLMM, these methods likely provide a more refined prioritization of markers with strong trait relevance. This superior performance can be attributed to their ability to effectively eliminate confounding effects between testing markers and both population structure (Q) and kinship (K) by dividing the multi-locus linear mixed model (MLMM) into components using either a fixed-effects model (FEM) and a random effects model (REM, pseudo-QTNs) in FarmCPU, or a fixed-effects model (FEM, for selecting pseudo-QTNs) and Bayesian Information Criterion (BIC) in BLINK (<xref ref-type="bibr" rid="B36">Huang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B49">Liu et&#xa0;al., 2016</xref>). The use of pseudo-QTNs selected using REM in FarmCPU and FEM in BLINK as covariates effectively control false positives while retaining power to detect true associations. These features likely increase the probability of detecting SNPs that surpass the Bonferroni threshold as well as prioritizing biologically informative variants for use in genomic prediction.</p>
<p>The observation that, in some cases, differences in predictive ability across GWAS methods and varying densities of preselected SNPs were minimal suggests possible redundancy among SNP sets, shared association signals across GWAS methods, or the inherently polygenic architecture of the traits. One possible explanation is that methods such as BLINK and FarmCPU initially fit a general linear model (GLM), and when no significant associations are detected, they may default to reporting GLM results (Zhiwu Zhang, personal communication). This can result in overlapping sets of preselected SNPs across methods, which may explain the similar or comparable predictive abilities observed among BLINK, FarmCPU, and the GLMM for fruit firmness and trunk circumference under parental validation. A second contributing factor to the minor differences in predictive ability may be the presence of shared association signals across GWAS methods, where overlapping SNPs are selected due to consistently low p-values, suggesting potential relevance to the trait despite not reaching strict statistical significance. A third contributing factor is marker redundancy, which may occur even when the sets of GWAS preselected variants differ, if the SNPs are in LD and tag the same underlying QTLs. As a result, different sets of preselected SNPs may contribute similar genetic information to the prediction model, resulting in minimal variation in predictive ability. These modest differences are also consistent with the polygenic architecture of fruit quality traits and tree growth, where predictive ability is distributed across many loci rather than being driven by a few large-effect variants (<xref ref-type="bibr" rid="B28">Dong et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B76">Srivastav et&#xa0;al., 2023</xref>).</p>
</sec>
<sec id="s4_3_6">
<label>4.3.6</label>
<title>Accounting for population structure reduces predictive ability in mango gene-pool</title>
<p>Our analysis revealed a marked decline in predictive ability when population structure was accounted for in prediction models (<xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2</bold>
</xref>, <xref ref-type="fig" rid="f3">
<bold>3</bold>
</xref>), a pattern consistent with that reported by <xref ref-type="bibr" rid="B32">Guo et&#xa0;al. (2014)</xref> for wheat and rice. Our findings indicate that, for these traits in the gene-pool population, a considerable portion of predictive ability is derived from across sub-population genetic variance (i.e. the model&#x2019;s ability to classify individuals into their respective sub-populations), rather than solely from within sub-population genetic variance (i.e. predictive ability attributable to LD between markers and QTLs). This result is consistent with the observations of <xref ref-type="bibr" rid="B24">Daetwyler et&#xa0;al. (2012)</xref>, who reported a decline in GEBV accuracy when population structure was accounted for and argued that the reduced accuracy reflects the predictive power attributable to LD between markers and QTLs.</p>
<p>The relatively larger gains in predictive ability with GWAS-preselected variants when population structure was accounted for, compared to models without control for population structure, likely reflect the greater contribution of LD information once the confounding effects of population structure are minimized. A previous study in the Australian mango breeding population found that TC, FBC and fruit blush intensity are strongly associated to population structure (<xref ref-type="bibr" rid="B95">Wilkinson et&#xa0;al., 2022</xref>). To avoid spurious associations, separating trait-associated loci from loci associated to ancestry is particularly important in this population. Because population structure was already accounted for during GWAS (through inclusion of PCs as fixed effects), the preselected variants are more likely to tag causative QTLs or be in meaningful LD with them, rather than merely reflecting population stratification. In contrast, WGS data contain many markers that may not be in LD with causative loci but can still contribute to predictive ability by capturing population structure. When population structure is explicitly controlled for in the prediction model, these markers provide little useful genetic signal and may introduce noise, leading to a sharper decline in predictive ability compared to models using trait-informative GWAS-preselected variants.</p>
<p>While our findings demonstrate a marked decline in predictive ability after accounting for population structure using fixed PCs, this sharp reduction may reflect over-correction for population structure arising from double-counting population structure effects (<xref ref-type="bibr" rid="B35">Hong et&#xa0;al., 2025</xref>). As argued by <xref ref-type="bibr" rid="B40">Janss et&#xa0;al. (2012)</xref>, incorporating fixed PCs derived from the same GRM used in the random component of the model can redundantly adjust for population structure, thereby diminishing predictive ability by removing genuine genetic signals alongside confounding effects. Future studies should evaluate methods that address this issue, such as the reparameterized GBLUP model of <xref ref-type="bibr" rid="B40">Janss et&#xa0;al. (2012)</xref>, which enables natural partitioning of across-subpopulation genetic variance due to population structure and within-subpopulation genetic variance that is of primary interest to breeders. <xref ref-type="bibr" rid="B35">Hong et&#xa0;al. (2025)</xref> advocated for accounting for population structure using PCs as random effects to avoid the over-correction that may occur when PCs are fitted as fixed effects in GBLUP models. However, in our study, fitting PCs as fixed effects provided conservative estimates of predictive ability, which are likely more transferable to homogeneous breeding populations or across-population predictions.</p>
</sec>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>Preselecting SNPs from WGS data based on their estimated effects on target traits enhanced predictive ability in mango, particularly when population structure was accounted for. In contrast, limited improvements were observed when population structure was ignored, likely due to inflated prediction estimates. Integrating GWAS-preselected variants with fixed-effect SNPs yielded superior predictive performance, especially for FBC, across models both accounting for or ignoring population structure. This combined approach outperformed models based solely on WGS data, WGS plus fixed-effect SNPs, or GWAS-preselected variants alone. These findings underscore the value of strategic SNP selection and model refinement using prior biological knowledge to maximize the utility of WGS data in genomic prediction. While our results demonstrate the potential of leveraging GWAS-preselected variants, further validation in larger, more homogenous datasets, particularly those reflecting practical breeding scenarios such as across-population or across-generation predictions is recommended to assess robustness and broader applicability. The sharp decline in predictive ability after accounting for population structure highlights its dominant influence in this mango gene pool, emphasizing the need to account for this factor in genetic analysis to distinguish true LD-driven associations from spurious signals arising from subpopulation differences. The identification of several markers associated with key fruit quality traits and tree vigor provides a valuable resource for future marker-assisted selection and functional genomics research in mango. To ensure their reliability and practical utility in breeding programs, these markers should be further validated under realistic breeding scenarios, such as selection within full-sib families. Overall, this research contributes to the optimization of genomic selection strategies in fruit tree breeding programs, offering a promising pathway to accelerate genetic gain in long-lived species where conventional breeding remains time-consuming and resource-intensive. Once validated in practical breeding populations, the use of GWAS-preselected variants in genomic prediction could enable earlier and more accurate selection, thereby reducing breeding cycle length and accelerating cultivar development in mango.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>All data analyzed in this study was previously published by <xref ref-type="bibr" rid="B94">Wilkinson et&#xa0;al. (2025)</xref>. The whole genome assemblies and annotations for Irwin, Kensington Pride and M. laurina are submitted to the Genome Warehouse under Bioproject nos. PRJCA020898, PRJCA029779, and PRJCA029972, respectively. Raw sequencing reads have been submitted to NCBI under BioProject nos. PRJNA1148201 (Kensington Pride and M. laurina), PRJNA1034099 (Irwin) and PRJNA1175065 (225&#x2009;M. indica).</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>NM: Conceptualization, Formal analysis, Investigation, Methodology, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. MJW: Methodology, Writing &#x2013; review &amp; editing. DO-B: Conceptualization, Supervision, Writing &#x2013; review &amp; editing. NLD: Conceptualization, Supervision, Writing &#x2013; review &amp; editing. MW: Methodology, Software, Writing &#x2013; review &amp; editing. AA: Methodology, Writing &#x2013; review &amp; editing. ISEB: Methodology, Writing &#x2013; review &amp; editing. AAM: Supervision, Writing &#x2013; review &amp; editing. CMH: Conceptualization, Methodology, Supervision, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research and/or publication of this article. This project was funded by the Hort Frontiers Advanced Production Systems Fund (National Tree Genomics Program, AS17000) as part of the Hort Frontiers strategic partnership initiative developed by Hort Innovation, with co-investment from the Queensland Government and contributions from the Australian Government.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>This research was carried out as part of the National Tree Genomics Program &#x2013; Phenotype Prediction project (AS17000) which was funded by the Hort Frontiers Advanced Production Systems as part of the Hort Frontiers strategic partnership initiative developed by Hort Innovation, with co-investment from The University of Queensland, Queensland Government, and contributions from the Australian Government. Norman Munyengwa received a PhD scholarship from the University of Queensland.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that Generative AI was used in the creation of this manuscript. Generative AI was used to solve analysis issues such as writing code.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If&#xa0;you identify any issues, please contact us.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors&#xa0;and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2025.1664012/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2025.1664012/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ampomah-Dwamena</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Thrimawithana</surname> <given-names>A. H.</given-names>
</name>
<name>
<surname>Dejnoprat</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lewis</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Espley</surname> <given-names>R. V.</given-names>
</name>
<name>
<surname>Allan</surname> <given-names>A. C.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A kiwifruit (Actinidia deliciosa) R2R3-MYB transcription factor modulates chlorophyll and carotenoid accumulation</article-title>. <source>New Phytol.</source> <volume>221</volume>, <fpage>309</fpage>&#x2013;<lpage>325</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/nph.15362</pub-id>, PMID: <pub-id pub-id-type="pmid">30067292</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>An</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>You</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Shu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Molecular cloning and functional characterization of MdPIN1 in apple</article-title>. <source>J. Integr. Agric.</source> <volume>16</volume>, <fpage>1103</fpage>&#x2013;<lpage>1111</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S2095-3119(16)61554-X</pub-id>
</citation></ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>An</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xia</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Poplar GATA transcription factor PdGNC is capable of regulating chloroplast ultrastructure, photosynthesis, and vegetative growth in Arabidopsis under varying nitrogen levels</article-title>. <source>Plant Cell Tiss Organ Cult</source> <volume>119</volume>, <fpage>313</fpage>&#x2013;<lpage>327</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11240-014-0536-y</pub-id>
</citation></ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>An</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>The GATA transcription factor GNC plays an important role in photosynthesis and growth in poplar</article-title>. <source>J. Exp. Bot.</source> <volume>71</volume>, <fpage>1969</fpage>&#x2013;<lpage>1984</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/jxb/erz564</pub-id>, PMID: <pub-id pub-id-type="pmid">31872214</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bally</surname> <given-names>I. S. E.</given-names>
</name>
<name>
<surname>Bombarely</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chambers</surname> <given-names>A. H.</given-names>
</name>
<name>
<surname>Cohen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Dillon</surname> <given-names>N. L.</given-names>
</name>
<name>
<surname>Innes</surname> <given-names>D. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>The &#x2018;Tommy Atkins&#x2019; mango genome reveals candidate genes for fruit quality</article-title>. <source>BMC Plant Biol.</source> <volume>21</volume>, <fpage>108</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12870-021-02858-1</pub-id>, PMID: <pub-id pub-id-type="pmid">33618672</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bally</surname> <given-names>I. S. E.</given-names>
</name>
<name>
<surname>De Faveri</surname> <given-names>J</given-names>
</name>
</person-group>. (<year>2021</year>). <article-title>Genetic analysis of multiple fruit quality traits in mango across sites and years</article-title>. <source>Euphytica</source>, <volume>217</volume>, <elocation-id>44</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10681-020-02750-3</pub-id>
</citation></ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Bally</surname> <given-names>I. S. E.</given-names>
</name>
<name>
<surname>Dillon</surname> <given-names>N. L.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Mango (Mangifera indica L.) breeding</article-title>,&#x201d; in <source>Advances in plant breeding strategies: fruits</source>. Eds. <person-group person-group-type="editor">
<name>
<surname>Al-Khayri</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Jain</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Johnson</surname> <given-names>D. V.</given-names>
</name>
</person-group> (<publisher-name>Springer International Publishing</publisher-name>, <publisher-loc>Cham</publisher-loc>), <fpage>811</fpage>&#x2013;<lpage>896</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-319-91944-7_20</pub-id>
</citation></ref>
<ref id="B8">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Bally</surname> <given-names>I. S. E.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Johnson</surname> <given-names>P. R.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>Mango breeding</article-title>,&#x201d; in <source>Breeding plantation tree crops: tropical species</source>. Eds. <person-group person-group-type="editor">
<name>
<surname>Jain</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Priyadarshan</surname> <given-names>P. M.</given-names>
</name>
</person-group> (<publisher-name>Springer</publisher-name>, <publisher-loc>New York, NY</publisher-loc>), <fpage>51</fpage>&#x2013;<lpage>82</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-0-387-71201-7_2</pub-id>
</citation></ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barreto</surname> <given-names>C. A. V.</given-names>
</name>
<name>
<surname>das Gra&#xe7;as Dias</surname> <given-names>K. O.</given-names>
</name>
<name>
<surname>de Sousa</surname> <given-names>I. C.</given-names>
</name>
<name>
<surname>Azevedo</surname> <given-names>C. F.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>A. C. C.</given-names>
</name>
<name>
<surname>Guimar&#xe3;es</surname> <given-names>L. J. M.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Genomic prediction in multi-environment trials in maize using statistical and machine learning methods</article-title>. <source>Sci. Rep.</source> <volume>14</volume>, <fpage>1062</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-024-51792-3</pub-id>, PMID: <pub-id pub-id-type="pmid">38212638</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bedhane</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Werf</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Van Der</surname> <given-names>H.-S.</given-names>
</name>
<name>
<surname>Las</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lim</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>The accuracy of genomic prediction for meat quality traits in Hanwoo cattle when using genotypes from different SNP densities and preselected variants from imputed whole genome sequence</article-title>. <source>Anim. Prod. Sci.</source> <volume>62</volume>, <fpage>21</fpage>&#x2013;<lpage>28</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1071/AN20659</pub-id>
</citation></ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bernardo</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Genomewide selection when major genes are known</article-title>. <source>Crop Sci</source> <volume>54</volume>, <fpage>68</fpage>&#x2013;<lpage>75</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2135/cropsci2013.05.0315</pub-id>
</citation></ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brault</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Segura</surname> <given-names>V.</given-names>
</name>
<name>
<surname>This</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Le Cunff</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Flutre</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Fran&#xe7;ois</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Across-population genomic prediction in grapevine opens up promising prospects for breeding</article-title>. <source>Horticulture Res.</source> <volume>9</volume>, <elocation-id>uhac041</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/hr/uhac041</pub-id>, PMID: <pub-id pub-id-type="pmid">35184162</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Browning</surname> <given-names>B. L.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Browning</surname> <given-names>S. R.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A one-penny imputed genome from next-generation reference panels</article-title>. <source>Am. J. Hum. Genet.</source> <volume>103</volume>, <fpage>338</fpage>&#x2013;<lpage>348</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ajhg.2018.07.015</pub-id>, PMID: <pub-id pub-id-type="pmid">30100085</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Butler</surname> <given-names>D. G.</given-names>
</name>
<name>
<surname>Cullis, Brian.</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Gilmour</surname> <given-names>A. R.</given-names>
</name>
<name>
<surname>Gogel</surname> <given-names>B. G.</given-names>
</name>
<name>
<surname>Thompson</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <source>ASReml-R reference manual verion 4.2</source>. <publisher-loc>Hemel Hempstead, UK</publisher-loc>: <publisher-name>VSN International Ltd</publisher-name>.</citation></ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Butler</surname> <given-names>J. B.</given-names>
</name>
<name>
<surname>Freeman</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Potts</surname> <given-names>B. M.</given-names>
</name>
<name>
<surname>Vaillancourt</surname> <given-names>R. E.</given-names>
</name>
<name>
<surname>Kahrood</surname> <given-names>H. V.</given-names>
</name>
<name>
<surname>Ades</surname> <given-names>P. K.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Patterns of genomic diversity and linkage disequilibrium across the disjunct range of the Australian forest tree Eucalyptus globulus</article-title>. <source>Tree Genet. Genomes</source> <volume>18</volume>, <fpage>28</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11295-022-01558-7</pub-id>
</citation></ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Calus</surname> <given-names>M. P. L.</given-names>
</name>
<name>
<surname>Meuwissen</surname> <given-names>T. H. E.</given-names>
</name>
<name>
<surname>de Roos</surname> <given-names>A. P. W.</given-names>
</name>
<name>
<surname>Veerkamp</surname> <given-names>R. F.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Accuracy of genomic selection using different methods to define haplotypes</article-title>. <source>Genetics</source> <volume>178</volume>, <fpage>553</fpage>&#x2013;<lpage>561</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/genetics.107.080838</pub-id>, PMID: <pub-id pub-id-type="pmid">18202394</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Campoy</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Lerigoleur-Balsemin</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Christmann</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Beauvieux</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Girollet</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Quero-Garc&#xed;a</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Genetic diversity, linkage disequilibrium, population structure and construction of a core collection of Prunus avium L. landraces and bred cultivars</article-title>. <source>BMC Plant Biol.</source> <volume>16</volume>, <fpage>49</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12870-016-0712-9</pub-id>, PMID: <pub-id pub-id-type="pmid">26912051</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cebeci</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Bayraktar</surname> <given-names>M.</given-names>
</name>
<name>
<surname>G&#xf6;k&#xe7;e</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Comparison of the statistical methods for genome-wide association studies on simulated quantitative traits of domesticated goats (Capra hircus L.)</article-title>. <source>Small Ruminant Res.</source> <volume>227</volume>, <elocation-id>107053</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.smallrumres.2023.107053</pub-id>
</citation></ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Z.-Q.</given-names>
</name>
<name>
<surname>Klingberg</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Hallingb&#xe4;ck</surname> <given-names>H. R.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>H. X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Preselection of QTL markers enhances accuracy of genomic selection in Norway spruce</article-title>. <source>BMC Genomics</source> <volume>24</volume>, <fpage>147</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12864-023-09250-3</pub-id>, PMID: <pub-id pub-id-type="pmid">36973641</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chiaravallotti</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Pauptit</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Hoyos-Villegas</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Environment ensemble models for genomic prediction in common bean (Phaseolus vulgaris L.)</article-title>. <source>Plant Genome</source> <volume>18</volume>, <elocation-id>e70057</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/tpg2.70057</pub-id>, PMID: <pub-id pub-id-type="pmid">40501200</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Clarke</surname> <given-names>S. H.</given-names>
</name>
<name>
<surname>Lawrence</surname> <given-names>E. R.</given-names>
</name>
<name>
<surname>Matte</surname> <given-names>J.-M.</given-names>
</name>
<name>
<surname>Gallagher</surname> <given-names>B. K.</given-names>
</name>
<name>
<surname>Salisbury</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Michaelides</surname> <given-names>S. N.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Global assessment of effective population sizes: Consistent taxonomic differences in meeting the 50/500 rule</article-title>. <source>Mol. Ecol.</source> <volume>33</volume>, <elocation-id>e17353</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/mec.17353</pub-id>, PMID: <pub-id pub-id-type="pmid">38613250</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cong</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Qu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Sha</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>PbWRKY75 promotes anthocyanin synthesis by activating PbDFR, PbUFGT, and PbMYB10b in pear</article-title>. <source>Physiologia Plantarum</source> <volume>173</volume>, <fpage>1841</fpage>&#x2013;<lpage>1849</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/ppl.13525</pub-id>, PMID: <pub-id pub-id-type="pmid">34418106</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cros</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Bocs</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Riou</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Ortega-Abboud</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Tisn&#xe9;</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Argout</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Genomic preselection with genotyping-by-sequencing increases performance of commercial oil palm hybrid crosses</article-title>. <source>BMC Genomics</source> <volume>18</volume>, <fpage>839</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12864-017-4179-3</pub-id>, PMID: <pub-id pub-id-type="pmid">29096603</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Daetwyler</surname> <given-names>H. D.</given-names>
</name>
<name>
<surname>Kemper</surname> <given-names>K. E.</given-names>
</name>
<name>
<surname>van der Werf</surname> <given-names>J. H. J.</given-names>
</name>
<name>
<surname>Hayes</surname> <given-names>B. J.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Components of the accuracy of genomic prediction in a multi-breed sheep population</article-title>. <source>J. Anim. Sci.</source> <volume>90</volume>, <fpage>3375</fpage>&#x2013;<lpage>3384</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2527/jas.2011-4557</pub-id>, PMID: <pub-id pub-id-type="pmid">23038744</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Danecek</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Auton</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Abecasis</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Albers</surname> <given-names>C. A.</given-names>
</name>
<name>
<surname>Banks</surname> <given-names>E.</given-names>
</name>
<name>
<surname>DePristo</surname> <given-names>M. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2011</year>). <collab>Genomes Project Analysis Group</collab> <article-title>The variant call format and VCFtools</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>2156</fpage>&#x2013;<lpage>2158</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btr330</pub-id>, PMID: <pub-id pub-id-type="pmid">21653522</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Franceschi</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Stegmeir</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Cabrera</surname> <given-names>A.</given-names>
</name>
<name>
<surname>van der Knaap</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Rosyara</surname> <given-names>U. R.</given-names>
</name>
<name>
<surname>Sebolt</surname> <given-names>A. M.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>Cell number regulator genes in Prunus provide candidate genes for the control of fruit size in sweet and sour cherry</article-title>. <source>Mol. Breed.</source> <volume>32</volume>, <fpage>311</fpage>&#x2013;<lpage>326</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11032-013-9872-6</pub-id>, PMID: <pub-id pub-id-type="pmid">23976873</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Devoghalaere</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Doucen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Guitton</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Keeling</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Payne</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ling</surname> <given-names>T. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>A genomics approach to understanding the role of auxin in apple (Malus x domestica)fruit size control</article-title>. <source>BMC Plant Biol.</source> <volume>12</volume>, <elocation-id>7</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/1471-2229-12-7</pub-id>, PMID: <pub-id pub-id-type="pmid">22243694</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Genomic dissection of additive and non-additive genetic effects and genomic prediction in an open-pollinated family test of Japanese larch</article-title>. <source>BMC Genomics</source> <volume>25</volume>, <elocation-id>11</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12864-023-09891-4</pub-id>, PMID: <pub-id pub-id-type="pmid">38166605</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>H.-N.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Cui</surname> <given-names>J.-Y.</given-names>
</name>
<name>
<surname>You</surname> <given-names>C.-X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.-Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Review: The effects of hormones and environmental factors on anthocyanin biosynthesis in apple</article-title>. <source>Plant Sci</source> <volume>312</volume>, <elocation-id>111024</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.plantsci.2021.111024</pub-id>, PMID: <pub-id pub-id-type="pmid">34620429</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Grattapaglia</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Breeding forest trees by genomic selection: current progress and the way forward</article-title>,&#x201d; in <source>Genomics of plant genetic resources: volume 1. Managing, sequencing and mining genetic resources</source>. Eds. <person-group person-group-type="editor">
<name>
<surname>Tuberosa</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Graner</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Frison</surname> <given-names>E.</given-names>
</name>
</person-group> (<publisher-name>Springer Netherlands</publisher-name>, <publisher-loc>Dordrecht</publisher-loc>), <fpage>651</fpage>&#x2013;<lpage>682</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-94-007-7572-5_26</pub-id>
</citation></ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname> <given-names>L.-L.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>R.-Q.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.-Y.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Fang</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Ensemble learning for integrative prediction of genetic values with genomic variants</article-title>. <source>BMC Bioinf.</source> <volume>25</volume>, <fpage>120</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12859-024-05720-x</pub-id>, PMID: <pub-id pub-id-type="pmid">38515026</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Tucker</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>Basten</surname> <given-names>C. J.</given-names>
</name>
<name>
<surname>Gandhi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ersoz</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>The impact of population structure on genomic prediction in stratified populations</article-title>. <source>Theor. Appl. Genet.</source> <volume>127</volume>, <fpage>749</fpage>&#x2013;<lpage>762</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00122-013-2255-x</pub-id>, PMID: <pub-id pub-id-type="pmid">24452438</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hardner</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Fikere</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Gasic</surname> <given-names>K.</given-names>
</name>
<name>
<surname>da Silva Linge</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Worthington</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Byrne</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Multi-environment genomic prediction for soluble solids content in peach (Prunus persica)</article-title>. <source>Front. Plant Sci</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.960449</pub-id>, PMID: <pub-id pub-id-type="pmid">36275520</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hayatgheibi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Hallingb&#xe4;ck</surname> <given-names>H. R.</given-names>
</name>
<name>
<surname>Lundqvist</surname> <given-names>S.-O.</given-names>
</name>
<name>
<surname>Grahn</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Scheepers</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Nordstr&#xf6;m</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Implications of accounting for marker-based population structure in the quantitative genetic evaluation of genetic parameters related to growth and wood properties in Norway spruce</article-title>. <source>BMC Genomic Data</source> <volume>25</volume>, <fpage>60</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12863-024-01241-x</pub-id>, PMID: <pub-id pub-id-type="pmid">38877416</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Chung</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Dinh</surname> <given-names>P. T. N.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Maeng</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Choi</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2025</year>). <article-title>Effect of breed composition in genomic prediction using crossbred pig reference population</article-title>. <source>J. Anim. Sci Technol.</source> <volume>67</volume>, <fpage>56</fpage>&#x2013;<lpage>68</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5187/jast.2025.e2</pub-id>, PMID: <pub-id pub-id-type="pmid">39974785</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Summers</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>BLINK: a package for the next level of genome-wide association studies with both individuals and markers in the millions</article-title>. <source>GigaScience</source> <volume>8</volume>, <elocation-id>giy154</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/gigascience/giy154</pub-id>, PMID: <pub-id pub-id-type="pmid">30535326</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jacquin</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Guerra</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Lewandowski</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Patocchi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Rymenants</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Durel</surname> <given-names>C.-E.</given-names>
</name>
<etal/>
</person-group>. (<year>2025</year>). <article-title>WISER: an innovative and efficient method for correcting population structure in omics-based selection and association studies</article-title>. <source>BioRxiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/2025.02.07.637171</pub-id>
</citation></ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ros-Freixedes</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Hickey</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Holl</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Herring</surname> <given-names>W. O.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>a). <article-title>Using pre-selected variants from large-scale whole-genome sequence data for single-step genomic predictions in pigs</article-title>. <source>Genet. Selection Evol.</source> <volume>55</volume>, <fpage>55</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-023-00831-0</pub-id>, PMID: <pub-id pub-id-type="pmid">37495982</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Tsuruta</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Leite</surname> <given-names>N. G.</given-names>
</name>
<name>
<surname>Misztal</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Lourenco</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2023</year>b). <article-title>Dimensionality of genomic information and its impact on genome-wide associations and variant selection for genomic prediction: a simulation study</article-title>. <source>Genet. Selection Evol.</source> <volume>55</volume>, <fpage>49</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-023-00823-0</pub-id>, PMID: <pub-id pub-id-type="pmid">37460964</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Janss</surname> <given-names>L.</given-names>
</name>
<name>
<surname>de los Campos</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Sheehan</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Sorensen</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Inferences from genomic models in stratified populations</article-title>. <source>Genetics</source> <volume>192</volume>, <fpage>693</fpage>&#x2013;<lpage>704</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/genetics.112.141143</pub-id>, PMID: <pub-id pub-id-type="pmid">22813891</pub-id></citation></ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kanzaki</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ichihi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Tanaka</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fujishige</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Koeda</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Shimizu</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The R2R3-MYB transcription factor <italic>MiMYB1</italic> regulates light dependent red coloration of &#x2018;Irwin&#x2019; mango fruit skin</article-title>. <source>Scientia Hortic.</source> <volume>272</volume>, <elocation-id>109567</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.scienta.2020.109567</pub-id>
</citation></ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karim</surname> <given-names>S. K. A.</given-names>
</name>
<name>
<surname>Allan</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Schaffer</surname> <given-names>R. J.</given-names>
</name>
<name>
<surname>David</surname> <given-names>K. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Cell Division Controls Final Fruit Size in Three Apple (Malus x domestica) Cultivars</article-title>. <source>Horticulturae</source> <volume>8</volume>, <elocation-id>657</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/horticulturae8070657</pub-id>
</citation></ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kostick</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Bernardo</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Luby</surname> <given-names>J. J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Genomewide selection for fruit quality traits in apple: breeding insights gained from prediction and postdiction</article-title>. <source>Horticulture Res.</source> <volume>10</volume>, <elocation-id>uhad088</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/hr/uhad088</pub-id>, PMID: <pub-id pub-id-type="pmid">37334180</pub-id></citation></ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chagn&#xe9;</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Bink</surname> <given-names>M. C. A. M.</given-names>
</name>
<name>
<surname>Volz</surname> <given-names>R. K.</given-names>
</name>
<name>
<surname>Whitworth</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Carlisle</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Genomic selection for fruit quality traits in apple (Malus&#xd7;domestica borkh.)</article-title>. <source>PloS One</source> <volume>7</volume>, <elocation-id>e36674</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0036674</pub-id>, PMID: <pub-id pub-id-type="pmid">22574211</pub-id></citation></ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kirk</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>C. H.</given-names>
</name>
<name>
<surname>Shirtliff</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wiedow</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Marker-trait associations and genomic predictions of interspecific pear (Pyrus) fruit characteristics</article-title>. <source>Sci. Rep</source> <volume>9</volume>, <elocation-id>9072</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-019-45618-w</pub-id>, PMID: <pub-id pub-id-type="pmid">31227781</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>B.-J.</given-names>
</name>
<name>
<surname>Bao</surname> <given-names>R.-X.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>Y.-N.</given-names>
</name>
<name>
<surname>Grierson</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>K.-S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Auxin response factors: important keys for understanding regulatory mechanisms of fleshy fruit development and ripening</article-title>. <source>Horticulture Res.</source> <volume>11</volume>, <elocation-id>uhae209</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/hr/uhae209</pub-id>, PMID: <pub-id pub-id-type="pmid">39372288</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Lyle</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Enhancing genomic selection by fitting large-effect SNPs as fixed effects and a genotype-by-environment effect using a maize BC1F3:4 population</article-title>. <source>PloS One</source> <volume>14</volume>, <elocation-id>e0223898</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0223898</pub-id>, PMID: <pub-id pub-id-type="pmid">31622400</pub-id></citation></ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Enhancing auxin accumulation in maize root tips improves root growth and dwarfs plant height</article-title>. <source>Plant Biotechnol. J.</source> <volume>16</volume>, <fpage>86</fpage>&#x2013;<lpage>99</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/pbi.12751</pub-id>, PMID: <pub-id pub-id-type="pmid">28499064</pub-id></citation></ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Buckler</surname> <given-names>E. S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Iterative usage of fixed and random effect models for powerful and efficient genome-wide association studies</article-title>. <source>PloS Genet.</source> <volume>12</volume>, <elocation-id>e1005767</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pgen.1005767</pub-id>, PMID: <pub-id pub-id-type="pmid">26828793</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Increased accuracy of genomic prediction using preselected SNPs from GWAS with imputed whole-genome sequence data in pigs</article-title>. <source>Anim. (Basel)</source> <volume>13</volume>, <elocation-id>3871</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ani13243871</pub-id>, PMID: <pub-id pub-id-type="pmid">38136908</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mahmud</surname> <given-names>K. P.</given-names>
</name>
<name>
<surname>Ibell</surname> <given-names>P. T.</given-names>
</name>
<name>
<surname>Wright</surname> <given-names>C. L.</given-names>
</name>
<name>
<surname>Monks</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Bally</surname> <given-names>I.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>High-density espalier trained mangoes make better use of light</article-title>. <source>Agronomy</source> <volume>13</volume>, <elocation-id>2557</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy13102557</pub-id>
</citation></ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meuwissen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Eikje</surname> <given-names>L. S.</given-names>
</name>
<name>
<surname>Gjuvsland</surname> <given-names>A. B.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>GWABLUP: genome-wide association assisted best linear unbiased prediction of genetic values</article-title>. <source>Genet. Sel Evol.</source> <volume>56</volume>, <fpage>17</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-024-00881-y</pub-id>, PMID: <pub-id pub-id-type="pmid">38429665</pub-id></citation></ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meuwissen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Goddard</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Accurate prediction of genetic values for complex traits by whole-genome resequencing</article-title>. <source>Genetics</source> <volume>185</volume>, <fpage>623</fpage>&#x2013;<lpage>631</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/genetics.110.116590</pub-id>, PMID: <pub-id pub-id-type="pmid">20308278</pub-id></citation></ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meuwissen</surname> <given-names>T. H.</given-names>
</name>
<name>
<surname>Hayes</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Goddard</surname> <given-names>M. E.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Prediction of total genetic value using genome-wide dense marker maps</article-title>. <source>Genetics</source> <volume>157</volume>, <fpage>1819</fpage>&#x2013;<lpage>1829</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/genetics/157.4.1819</pub-id>, PMID: <pub-id pub-id-type="pmid">11290733</pub-id></citation></ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meuwissen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Hayes</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Goddard</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Genomic selection: A paradigm shift in animal breeding</article-title>. <source>Anim. Front.</source> <volume>6</volume>, <fpage>6</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2527/af.2016-0002</pub-id>
</citation></ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Migicovsky</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Gardner</surname> <given-names>K. M.</given-names>
</name>
<name>
<surname>Money</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Sawler</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Bloom</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Moffett</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Genome to phenome mapping in apple using historical data</article-title>. <source>Plant Genome</source> <volume>9</volume>, <elocation-id>plantgenome2015.11.0113</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3835/plantgenome2015.11.0113</pub-id>, PMID: <pub-id pub-id-type="pmid">27898813</pub-id></citation></ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Minamikawa</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Takada</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Terakami</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Saito</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Onogi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kajiya-Kanegae</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Genome-wide association study and genomic prediction using parental and breeding populations of Japanese pear (Pyrus pyrifolia Nakai)</article-title>. <source>Sci. Rep.</source> <volume>8</volume>, <fpage>11994</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-018-30154-w</pub-id>, PMID: <pub-id pub-id-type="pmid">30097588</pub-id></citation></ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Misztal</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Pocrnic</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Lourenco</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>40 factors influencing accuracy of genomic selection with sequence information</article-title>. <source>J. Anim. Sci</source> <volume>99</volume>, <fpage>20</fpage>&#x2013;<lpage>21</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/jas/skab235.034</pub-id>
</citation></ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moghaddar</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Khansefid</surname> <given-names>M.</given-names>
</name>
<name>
<surname>van der Werf</surname> <given-names>J. H. J.</given-names>
</name>
<name>
<surname>Bolormaa</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Duijvesteijn</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Clark</surname> <given-names>S. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Genomic prediction based on selected variants from imputed whole-genome sequence data in Australian sheep populations</article-title>. <source>Genet. Sel Evol.</source> <volume>51</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-019-0514-2</pub-id>, PMID: <pub-id pub-id-type="pmid">31805849</pub-id></citation></ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Muranty</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Troggio</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sadok</surname> <given-names>I. B.</given-names>
</name>
<name>
<surname>Rifa&#xef;</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Auwerkerken</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Banchi</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>Accuracy and responses of genomic selection on key traits in apple breeding</article-title>. <source>Hortic. Res.</source> <volume>2</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/hortres.2015.60</pub-id>, PMID: <pub-id pub-id-type="pmid">26744627</pub-id></citation></ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nazarian</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gezan</surname> <given-names>S. A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>GenoMatrix: A software package for pedigree-based and genomic prediction analyses on complex traits</article-title>. <source>J. Hered</source> <volume>107</volume>, <fpage>372</fpage>&#x2013;<lpage>379</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/jhered/esw020</pub-id>, PMID: <pub-id pub-id-type="pmid">27025440</pub-id></citation></ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nsibi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Gouble</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Bureau</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Flutre</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sauvage</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Audergon</surname> <given-names>J.-M.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Adoption and optimization of genomic selection to sustain breeding for apricot fruit quality</article-title>. <source>G3 Genes|Genomes|Genetics</source> <volume>10</volume>, <fpage>4513</fpage>&#x2013;<lpage>4529</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/g3.120.401452</pub-id>, PMID: <pub-id pub-id-type="pmid">33067307</pub-id></citation></ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>O&#x2019;Connor</surname> <given-names>K. M.</given-names>
</name>
<name>
<surname>Hayes</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Hardner</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Alam</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Henry</surname> <given-names>R. J.</given-names>
</name>
<name>
<surname>Topp</surname> <given-names>B. L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Genomic selection and genetic gain for nut yield in an Australian macadamia breeding population</article-title>. <source>BMC Genomics</source> <volume>22</volume>, <fpage>370</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12864-021-07694-z</pub-id>, PMID: <pub-id pub-id-type="pmid">34016055</pub-id></citation></ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Plunkett</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Henry-Kirk</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Friend</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Diack</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Helbig</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Mouhu</surname> <given-names>K.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Apple B-box factors regulate light-responsive anthocyanin biosynthesis genes</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>17762</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-019-54166-2</pub-id>, PMID: <pub-id pub-id-type="pmid">31780719</pub-id></citation></ref>
<ref id="B65">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Poplin</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Ruano-Rubio</surname> <given-names>V.</given-names>
</name>
<name>
<surname>DePristo</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Fennell</surname> <given-names>T. J.</given-names>
</name>
<name>
<surname>Carneiro</surname> <given-names>M. O.</given-names>
</name>
<name>
<surname>Auwera</surname> <given-names>G. A. V.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <source>Scaling accurate genetic variant discovery to tens of thousands of samples. bioRxiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1101/201178</pub-id>
</citation></ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Purcell</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Neale</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Todd-Brown</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Thomas</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ferreira</surname> <given-names>M. A. R.</given-names>
</name>
<name>
<surname>Bender</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2007</year>). <article-title>PLINK: A tool set for whole-genome association and population-based linkage analyses</article-title>. <source>Am. J. Hum. Genet.</source> <volume>81</volume>, <fpage>559</fpage>&#x2013;<lpage>575</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1086/519795</pub-id>, PMID: <pub-id pub-id-type="pmid">17701901</pub-id></citation></ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Characterization of the auxin efflux transporter PIN proteins in pear</article-title>. <source>Plants</source> <volume>9</volume>, <elocation-id>349</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/plants9030349</pub-id>, PMID: <pub-id pub-id-type="pmid">32164258</pub-id></citation></ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raymond</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Bouwman</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Schrooten</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Houwing-Duistermaat</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Veerkamp</surname> <given-names>R. F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Utility of whole-genome sequence data for across-breed genomic prediction</article-title>. <source>Genet. Selection Evol.</source> <volume>50</volume>, <elocation-id>27</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-018-0396-8</pub-id>, PMID: <pub-id pub-id-type="pmid">29776327</pub-id></citation></ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raymond</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Bouwman</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Wientjes</surname> <given-names>Y. C. J.</given-names>
</name>
<name>
<surname>Schrooten</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Houwing-Duistermaat</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Veerkamp</surname> <given-names>R. F.</given-names>
</name>
</person-group> (<year>2018</year>c). <article-title>Genomic prediction for numerically small breeds, using models with pre-selected and differentially weighted markers</article-title>. <source>Genet. Selection Evol.</source> <volume>50</volume>, <fpage>49</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-018-0419-5</pub-id>, PMID: <pub-id pub-id-type="pmid">30314431</pub-id></citation></ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reddy</surname> <given-names>Y. T. N.</given-names>
</name>
<name>
<surname>Kurian</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Ramachander</surname> <given-names>P. R.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Kohli</surname> <given-names>R. R.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Long-term effects of rootstocks on growth and fruit yielding patterns of &#x2018;Alphonso&#x2019; mango (Mangifera indica L.)</article-title>. <source>Scientia Hortic.</source> <volume>97</volume>, <fpage>95</fpage>&#x2013;<lpage>108</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0304-4238(02)00025-0</pub-id>
</citation></ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Riedelsheimer</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Endelman</surname> <given-names>J. B.</given-names>
</name>
<name>
<surname>Stange</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sorrells</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Jannink</surname> <given-names>J.-L.</given-names>
</name>
<name>
<surname>Melchinger</surname> <given-names>A. E.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Genomic predictability of interconnected biparental maize populations</article-title>. <source>Genetics</source> <volume>194</volume>, <fpage>493</fpage>&#x2013;<lpage>503</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/genetics.113.150227</pub-id>, PMID: <pub-id pub-id-type="pmid">23535384</pub-id></citation></ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roth</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Muranty</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Di Guardo</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Guerra</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Patocchi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Costa</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Genomic prediction of fruit texture and training population optimization towards the application of genomic selection in apple</article-title>. <source>Hortic. Res.</source> <volume>7</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41438-020-00370-5</pub-id>, PMID: <pub-id pub-id-type="pmid">32922820</pub-id></citation></ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Santiago</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Novo</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Pardi&#xf1;as</surname> <given-names>A. F.</given-names>
</name>
<name>
<surname>Saura</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Caballero</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Recent demographic history inferred by high-resolution analysis of linkage disequilibrium</article-title>. <source>Mol. Biol. Evol.</source> <volume>37</volume>, <fpage>3642</fpage>&#x2013;<lpage>3653</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/molbev/msaa169</pub-id>, PMID: <pub-id pub-id-type="pmid">32642779</pub-id></citation></ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Segura</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Vilhj&#xe1;lmsson</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Platt</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Korte</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Seren</surname> <given-names>&#xdc;.</given-names>
</name>
<name>
<surname>Long</surname> <given-names>Q.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>An efficient multi-locus mixed-model approach for genome-wide association studies in structured populations</article-title>. <source>Nat. Genet.</source> <volume>44</volume>, <fpage>825</fpage>&#x2013;<lpage>830</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ng.2314</pub-id>, PMID: <pub-id pub-id-type="pmid">22706313</pub-id></citation></ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Slavov</surname> <given-names>G. T.</given-names>
</name>
<name>
<surname>DiFazio</surname> <given-names>S. P.</given-names>
</name>
<name>
<surname>Martin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Schackwitz</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Muchero</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Rodgers-Melnick</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Genome resequencing reveals multiscale geographic structure and extensive linkage disequilibrium in the forest tree Populus trichocarpa</article-title>. <source>New Phytol.</source> <volume>196</volume>, <fpage>713</fpage>&#x2013;<lpage>725</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1469-8137.2012.04258.x</pub-id>, PMID: <pub-id pub-id-type="pmid">22861491</pub-id></citation></ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Srivastav</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Radadiya</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Ramachandra</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Jayaswal</surname> <given-names>P. K.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>High resolution mapping of QTLs for fruit color and firmness in&#xa0;Amrapali/Sensation mango hybrids</article-title>. <source>Front. Plant Sci</source> <volume>14</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1135285</pub-id>, PMID: <pub-id pub-id-type="pmid">37351213</pub-id></citation></ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>The R2R3-type MYB transcription factor MdMYB90-like is responsible for the enhanced skin color of an apple bud sport mutant</article-title>. <source>Hortic. Res.</source> <volume>8</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41438-021-00590-3</pub-id>, PMID: <pub-id pub-id-type="pmid">34193856</pub-id></citation></ref>
<ref id="B78">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Suontama</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kl&#xe1;p&#x161;t&#x11b;</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Telfer</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Graham</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Stovold</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Low</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Efficiency of genomic prediction across two Eucalyptus nitens seed orchards with different selection histories</article-title>. <source>Heredity</source> <volume>122</volume>, <fpage>370</fpage>&#x2013;<lpage>379</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41437-018-0119-5</pub-id>, PMID: <pub-id pub-id-type="pmid">29980794</pub-id></citation></ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ingvarsson</surname> <given-names>P. K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Integrating genome-wide association mapping of additive and dominance genetic effects to improve genomic prediction accuracy in Eucalyptus</article-title>. <source>Plant Genome</source> <volume>15</volume>, <elocation-id>e20208</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/tpg2.20208</pub-id>, PMID: <pub-id pub-id-type="pmid">35441826</pub-id></citation></ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tomura</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Wilkinson</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Cooper</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Powell</surname> <given-names>O.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Improved genomic prediction performance with ensembles of diverse models</article-title>. <source>G3 Genes|Genomes|Genetics</source> <volume>15</volume>, <elocation-id>jkaf048</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/g3journal/jkaf048</pub-id>, PMID: <pub-id pub-id-type="pmid">40037571</pub-id></citation></ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>van Binsbergen</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Calus</surname> <given-names>M. P. L.</given-names>
</name>
<name>
<surname>Bink</surname> <given-names>M. C. A. M.</given-names>
</name>
<name>
<surname>van Eeuwijk</surname> <given-names>F. A.</given-names>
</name>
<name>
<surname>Schrooten</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Veerkamp</surname> <given-names>R. F.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Genomic prediction using imputed whole-genome sequence data in Holstein Friesian cattle</article-title>. <source>Genet. Sel Evol.</source> <volume>47</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-015-0149-x</pub-id>, PMID: <pub-id pub-id-type="pmid">26381777</pub-id></citation></ref>
<ref id="B82">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>VanRaden</surname> <given-names>P. M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Efficient methods to compute genomic predictions</article-title>. <source>J. Dairy Sci</source> <volume>91</volume>, <fpage>4414</fpage>&#x2013;<lpage>4423</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3168/jds.2007-0980</pub-id>, PMID: <pub-id pub-id-type="pmid">18946147</pub-id></citation></ref>
<ref id="B83">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>VanRaden</surname> <given-names>P. M.</given-names>
</name>
<name>
<surname>Tooker</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>O&#x2019;Connell</surname> <given-names>J. R.</given-names>
</name>
<name>
<surname>Cole</surname> <given-names>J. B.</given-names>
</name>
<name>
<surname>Bickhart</surname> <given-names>D. M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Selecting sequence variants to improve genomic predictions for dairy cattle</article-title>. <source>Genet. Sel Evol.</source> <volume>49</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-017-0307-4</pub-id>, PMID: <pub-id pub-id-type="pmid">28270096</pub-id></citation></ref>
<ref id="B84">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Veerkamp</surname> <given-names>R. F.</given-names>
</name>
<name>
<surname>Bouwman</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Schrooten</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Calus</surname> <given-names>M. P. L.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Genomic prediction using preselected DNA variants from a GWAS with whole-genome sequence data in Holstein&#x2013;Friesian cattle</article-title>. <source>Genet. Sel Evol.</source> <volume>48</volume>, <fpage>95</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-016-0274-1</pub-id>, PMID: <pub-id pub-id-type="pmid">27905878</pub-id></citation></ref>
<ref id="B85">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vos</surname> <given-names>P. G.</given-names>
</name>
<name>
<surname>Paulo</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Voorrips</surname> <given-names>R. E.</given-names>
</name>
<name>
<surname>Visser</surname> <given-names>R. G. F.</given-names>
</name>
<name>
<surname>van Eck</surname> <given-names>H. J.</given-names>
</name>
<name>
<surname>van Eeuwijk</surname> <given-names>F. A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Evaluation of LD decay and various LD-decay estimators in simulated and SNP-array data of tetraploid potato</article-title>. <source>Theor. Appl. Genet.</source> <volume>130</volume>, <fpage>123</fpage>&#x2013;<lpage>135</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00122-016-2798-8</pub-id>, PMID: <pub-id pub-id-type="pmid">27699464</pub-id></citation></ref>
<ref id="B86">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>S.-B.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>J.-Y.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>W.-L.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>Y.-J.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Improving power and accuracy of genome-wide association studies via a multi-locus mixed linear model methodology</article-title>. <source>Sci. Rep.</source> <volume>6</volume>, <elocation-id>19444</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/srep19444</pub-id>, PMID: <pub-id pub-id-type="pmid">26787347</pub-id></citation></ref>
<ref id="B87">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Dang</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>The genome evolution and domestication of tropical fruit mango</article-title>. <source>Genome Biol.</source> <volume>21</volume>, <fpage>60</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13059-020-01959-8</pub-id>, PMID: <pub-id pub-id-type="pmid">32143734</pub-id></citation></ref>
<ref id="B88">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>GAPIT version 3: boosting power and accuracy for genomic association and prediction</article-title>. <source>Genomics Proteomics Bioinformatics Bioinf. Commons</source> <volume>19</volume>, <fpage>629</fpage>&#x2013;<lpage>640</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gpb.2021.08.005</pub-id>, PMID: <pub-id pub-id-type="pmid">34492338</pub-id></citation></ref>
<ref id="B89">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waples</surname> <given-names>R. K.</given-names>
</name>
<name>
<surname>Larson</surname> <given-names>W. A.</given-names>
</name>
<name>
<surname>Waples</surname> <given-names>R. S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Estimating contemporary effective population size in non-model species using linkage disequilibrium across thousands of loci</article-title>. <source>Heredity</source> <volume>117</volume>, <fpage>233</fpage>&#x2013;<lpage>240</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/hdy.2016.60</pub-id>, PMID: <pub-id pub-id-type="pmid">27553452</pub-id></citation></ref>
<ref id="B90">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Warburton</surname> <given-names>C. L.</given-names>
</name>
<name>
<surname>Engle</surname> <given-names>B. N.</given-names>
</name>
<name>
<surname>Ross</surname> <given-names>E. M.</given-names>
</name>
<name>
<surname>Costilla</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Moore</surname> <given-names>S. S.</given-names>
</name>
<name>
<surname>Corbet</surname> <given-names>N. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Use of whole-genome sequence data and novel genomic selection strategies to improve selection for age at puberty in tropically-adapted beef heifers</article-title>. <source>Genet. Sel Evol.</source> <volume>52</volume>, <fpage>28</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-020-00547-5</pub-id>, PMID: <pub-id pub-id-type="pmid">32460805</pub-id></citation></ref>
<ref id="B91">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Preselecting variants from large-scale genome-wide association study meta-analyses increases the genomic prediction accuracy of growth and carcass traits in large white pigs</article-title>. <source>Anim. (Basel)</source> <volume>13</volume>, <elocation-id>3746</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ani13243746</pub-id>, PMID: <pub-id pub-id-type="pmid">38136785</pub-id></citation></ref>
<ref id="B92">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Werner</surname> <given-names>C. R.</given-names>
</name>
<name>
<surname>Gaynor</surname> <given-names>R. C.</given-names>
</name>
<name>
<surname>Gorjanc</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Hickey</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Kox</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Abbadi</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>How population structure impacts genomic selection accuracy in cross-validation: implications for practical breeding</article-title>. <source>Front. Plant Sci.</source> <volume>11</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2020.592977</pub-id>, PMID: <pub-id pub-id-type="pmid">33391305</pub-id></citation></ref>
<ref id="B93">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>White</surname> <given-names>T. L.</given-names>
</name>
<name>
<surname>Adams</surname> <given-names>W. T.</given-names>
</name>
<name>
<surname>Neale</surname> <given-names>D. B.</given-names>
</name>
</person-group> (<year>2007</year>). <source>Forest genetics</source> (<publisher-loc>Wallingford</publisher-loc>: <publisher-name>CABI</publisher-name>).</citation></ref>
<ref id="B94">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilkinson</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>McLay</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Kainer</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Elphinstone</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Dillon</surname> <given-names>N. L.</given-names>
</name>
<name>
<surname>Webb</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2025</year>). <article-title>Centromeres are hotspots for chromosomal inversions and breeding traits in mango</article-title>. <source>New Phytolog</source>. <volume>245</volume>, <fpage>899</fpage>&#x2013;<lpage>913</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/nph.20252</pub-id>, PMID: <pub-id pub-id-type="pmid">39548673</pub-id></citation></ref>
<ref id="B95">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilkinson</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Yamashita</surname> <given-names>R.</given-names>
</name>
<name>
<surname>James</surname> <given-names>M. E.</given-names>
</name>
<name>
<surname>Bally</surname> <given-names>I. S. E.</given-names>
</name>
<name>
<surname>Dillon</surname> <given-names>N. L.</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>The influence of genetic structure on phenotypic diversity in the Australian mango (Mangifera indica) gene pool</article-title>. <source>Sci. Rep.</source> <volume>12</volume>, <fpage>20614</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-022-24800-7</pub-id>, PMID: <pub-id pub-id-type="pmid">36450793</pub-id></citation></ref>
<ref id="B96">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Lv</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Growth-regulating factor 5 (GRF5)-mediated gene regulatory network promotes leaf growth and expansion in poplar</article-title>. <source>New Phytol.</source> <volume>230</volume>, <fpage>612</fpage>&#x2013;<lpage>628</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/nph.17179</pub-id>, PMID: <pub-id pub-id-type="pmid">33423287</pub-id></citation></ref>
<ref id="B97">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Benyamin</surname> <given-names>B.</given-names>
</name>
<name>
<surname>McEvoy</surname> <given-names>B. P.</given-names>
</name>
<name>
<surname>Gordon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Henders</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Nyholt</surname> <given-names>D. R.</given-names>
</name>
<etal/>
</person-group>. (<year>2010</year>). <article-title>Common SNPs explain a large proportion of heritability for human height</article-title>. <source>Nat.&#xa0;Genet.</source> <volume>42</volume>, <fpage>565</fpage>&#x2013;<lpage>569</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ng.608</pub-id>, PMID: <pub-id pub-id-type="pmid">20562875</pub-id></citation></ref>
<ref id="B98">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ye</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Pre-selecting markers based&#xa0;on&#xa0;fixation index scores improved the power of genomic evaluations in a combined Yorkshire pig population</article-title>. <source>Animal</source> <volume>14</volume>, <fpage>1555</fpage>&#x2013;<lpage>1564</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1017/S1751731120000506</pub-id>, PMID: <pub-id pub-id-type="pmid">32209149</pub-id></citation></ref>
<ref id="B99">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>An</surname> <given-names>H. S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X. Z.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Z. H.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Low expression of&#xa0;PIN gene family members is involved in triggering the dwarfing effect in M9 interstem but not in M9 rootstock apple trees</article-title>. <source>Acta Physiol. Plant</source> <volume>37</volume>, <fpage>104</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11738-015-1851-6</pub-id>
</citation></ref>
<ref id="B100">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>S.-S.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>J.-Y.</given-names>
</name>
<name>
<surname>He</surname> <given-names>W.-M.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>T.-L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>PopLDdecay: a fast and effective tool for linkage disequilibrium decay analysis based on variant call format files</article-title>. <source>Bioinformatics</source> <volume>35</volume>, <fpage>1786</fpage>&#x2013;<lpage>1788</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/bty875</pub-id>, PMID: <pub-id pub-id-type="pmid">30321304</pub-id></citation></ref>
<ref id="B101">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Tanabe</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Tamura</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Yoshida</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Matsumoto</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>The impact of cell division and cell enlargement on the evolution of fruit size in pyrus pyrifolia</article-title>. <source>Ann. Bot.</source> <volume>98</volume>, <fpage>537</fpage>&#x2013;<lpage>543</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/aob/mcl144</pub-id>, PMID: <pub-id pub-id-type="pmid">16845135</pub-id></citation></ref>
<ref id="B102">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>M.-Y.</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Genome-wide association studies provide insights into the genetic determination of fruit traits of pear</article-title>. <source>Nat. Commun.</source> <volume>12</volume>, <fpage>1144</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-021-21378-y</pub-id>, PMID: <pub-id pub-id-type="pmid">33602909</pub-id></citation></ref>
<ref id="B103">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>G.-L.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>F.-J.</given-names>
</name>
<name>
<surname>Qiao</surname> <given-names>J.-K.</given-names>
</name>
<name>
<surname>Che</surname> <given-names>Z.-X.</given-names>
</name>
<name>
<surname>Xiang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.-L.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>E-GWAS: an ensemble-like GWAS strategy that provides effective control over false positive rates without decreasing true positives</article-title>. <source>Genet. Selection Evol.</source> <volume>55</volume>, <fpage>46</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-023-00820-3</pub-id>, PMID: <pub-id pub-id-type="pmid">37407918</pub-id></citation></ref>
</ref-list>
</back>
</article>