<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2024.1373318</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Enhancing genomic prediction with Stacking Ensemble Learning in Arabica Coffee</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Nascimento</surname>
<given-names>Moyses</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/823352"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nascimento</surname>
<given-names>Ana Carolina Campana</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2522494"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Azevedo</surname>
<given-names>Camila Ferreira</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/824178"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Oliveira</surname>
<given-names>Antonio Carlos Baiao de</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Caixeta</surname>
<given-names>Eveline Teixeira</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/599197"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Jarquin</surname>
<given-names>Diego</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1824842"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Laboratory of Intelligence Computational and Statistical Learning (LICAE), Department of Statistics, Federal University of Vi&#xe7;osa</institution>, <addr-line>Vi&#xe7;osa</addr-line>, <country>Brazil</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Agronomy Department, University of Florida</institution>, <addr-line>Gainesville, FL</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Embrapa Coffee, Brazilian Agricultural Research Corporation (Embrapa)</institution>, <addr-line>Bras&#xed;lia</addr-line>, <country>Brazil</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Andr&#xe9;s J. Cort&#xe9;s, Colombian Corporation for Agricultural Research (AGROSAVIA), Colombia</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Germano Costa-Neto, Syngenta, United States</p>
<p>Zitong Li, Commonwealth Scientific and Industrial Research Organisation (CSIRO), Australia</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Moyses Nascimento, <email xlink:href="mailto:moysesnascim@ufv.br">moysesnascim@ufv.br</email>; Diego Jarquin, <email xlink:href="mailto:jhernandezjarqui@ufl.edu">jhernandezjarqui@ufl.edu</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>17</day>
<month>07</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1373318</elocation-id>
<history>
<date date-type="received">
<day>19</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>12</day>
<month>06</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Nascimento, Nascimento, Azevedo, Oliveira, Caixeta and Jarquin</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Nascimento, Nascimento, Azevedo, Oliveira, Caixeta and Jarquin</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Coffee Breeding programs have traditionally relied on observing plant characteristics over years, a slow and costly process. Genomic selection (GS) offers a DNA-based alternative for faster selection of superior cultivars. Stacking Ensemble Learning (SEL) combines multiple models for potentially even more accurate selection. This study explores SEL potential in coffee breeding, aiming to improve prediction accuracy for important traits [yield (YL), total number of the fruits (NF), leaf miner infestation (LM), and cercosporiosis incidence (Cer)] in Coffea Arabica. We analyzed data from 195 individuals genotyped for 21,211 single-nucleotide polymorphism (SNP) markers. To comprehensively assess model performance, we employed a cross-validation (CV) scheme. Genomic Best Linear Unbiased Prediction (GBLUP), multivariate adaptive regression splines (MARS), Quantile Random Forest (QRF), and Random Forest (RF) served as base learners. For the meta-learner within the SEL framework, various options were explored, including Ridge Regression, RF, GBLUP, and Single Average. The SEL method was able to predict the predictive ability (PA) of important traits in Coffea Arabica. SEL presented higher PA compared with those obtained for all base learner methods. The gains in PA in relation to GBLUP were 87.44% (the ratio between the PA obtained from best Stacking model and the GBLUP), 37.83%, 199.82%, and 14.59% for YL, NF, LM and Cer, respectively. Overall, SEL presents a promising approach for GS. By combining predictions from multiple models, SEL can potentially enhance the PA of GS for complex traits.</p>
</abstract>
<kwd-group>
<kwd>statistical and machine learning</kwd>
<kwd>prediction accuracy</kwd>
<kwd>plant breeding</kwd>
<kwd>ensemble methods</kwd>
<kwd>GBLUP</kwd>
</kwd-group>
<counts>
<fig-count count="7"/>
<table-count count="1"/>
<equation-count count="9"/>
<ref-count count="62"/>
<page-count count="14"/>
<word-count count="7507"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Plant Breeding</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Coffee is one of the most globally beverages presenting importance in terms of its potential health, socioeconomic, and economic effects (<xref ref-type="bibr" rid="B51">Porto et&#xa0;al., 2019</xref>). These effects drive breeding programs worldwide to develop high-yielding, adaptable cultivars delivering superior bean quality (<xref ref-type="bibr" rid="B6">Barbosa et&#xa0;al., 2019</xref>). However, traditional selection methods based on phenotypic observations of the plants or their family history (pedigree) are expensive and time consuming, especially for perennial crops as coffee.</p>
<p>An alternative approach denoted genomic selection (GS) has been used as a successful tool in genetic improvement (<xref ref-type="bibr" rid="B41">Meuwissen et&#xa0;al., 2001</xref>). GS helps increase genetic gain per generation by allowing for earlier selection through improved prediction of the potential of individual plants (<xref ref-type="bibr" rid="B15">Daetwyler et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B48">Nascimento et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B58">Voss-Fels et&#xa0;al., 2019</xref>). In the field of coffee breeding, GS has been utilized with the dual aim of accelerating genetic gain through early selection and improving prediction accuracy (<xref ref-type="bibr" rid="B55">Sousa et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B4">Alkimim et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B56">Sousa et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B13">Coelho de Sousa et&#xa0;al., 2022</xref>).</p>
<p>Among several prediction models, Genomic Best Linear Unbiased Prediction (GBLUP) is the most widely used approach for genomic prediction due to its advantages (<xref ref-type="bibr" rid="B62">Zhang et&#xa0;al., 2021</xref>). Compared to other parametric methods, GBLUP allows to accurately estimate narrow-sense heritability (<xref ref-type="bibr" rid="B33">Li et&#xa0;al., 2019</xref>) and presents higher computational efficiency (<xref ref-type="bibr" rid="B25">Hernandez et&#xa0;al., 2020</xref>). GBLUP modeling is also flexible. It can be modified to incorporate additional genetic information beyond the typical single-nucleotide polymorphism (SNP) markers. Specifically, this modeling allows to account for non-additive genetic effects, environmental factors, and even genotype-by-environment interactions, enriching the analysis and potentially improving prediction accuracy (<xref ref-type="bibr" rid="B27">Jarqu&#xed;n et&#xa0;al., 2014</xref>).</p>
<p>In the Artificial Intelligence Era, the interest in semi- and non-parametric methods for GS is increasing (<xref ref-type="bibr" rid="B31">Larkin et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B13">Coelho de Sousa et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B53">Seyum et&#xa0;al., 2022</xref>). These approaches, such as Artificial Neural Networks and Decision Trees, do not require prior assumptions about the relationships between inputs (SNP markers) and the output (phenotypic observations), allowing great flexibility to handle complex non-additive effects, such as dominance and epistasis (<xref ref-type="bibr" rid="B37">McKinney et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B1">Abdollahi-Arpanahi et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B13">Coelho de Sousa et&#xa0;al., 2022</xref>). In general, despite their potential, these approaches do not outperform the traditional parametric methods (e.g., GBLUP, Bayesian Alphabet - <xref ref-type="bibr" rid="B22">Gianola et&#xa0;al., 2009</xref>) used to predict the genetic merit of individuals (<xref ref-type="bibr" rid="B34">Liang et&#xa0;al., 2021</xref>).</p>
<p>Aiming to enhance predictive ability (PA), Ensemble Learning (EL) combines predictions from multiple models (base learners) into a single prediction (meta-learner) (<xref ref-type="bibr" rid="B40">Mendes-Moreira et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B20">Ganaie et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B42">Mienye and Sun, 2022</xref>). This approach leverages the strengths of diverse models to potentially generate more robust results compared to relying on a single learner (<xref ref-type="bibr" rid="B34">Liang et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B28">Kalule et&#xa0;al., 2023</xref>). In the context of GS, EL has found application through methods such as Random Forest (RF) and Bagging (Bag) (<xref ref-type="bibr" rid="B60">Xu et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B1">Abdollahi-Arpanahi et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B56">Sousa et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B14">Costa et&#xa0;al., 2022</xref>). These methods, categorized as Homogenous Learning (HL), utilize a single framework to produce a single prediction value. Conversely, the Stacking Ensembles Learning (SEL) approach combines predictions from diverse methods, potentially outperforming HL (<xref ref-type="bibr" rid="B40">Mendes-Moreira et&#xa0;al., 2012</xref>). SEL has seen success in GS, improving PA in Chinese Simmental cattle, Dutch cattle, and pine (<xref ref-type="bibr" rid="B34">Liang et&#xa0;al., 2021</xref>), achieving higher accuracy than GBLUP for most evaluated traits.</p>
<p>Despite being interesting, EL approach arises some issues that needs to be considered. First, since the same individuals are used to fitting the model(s) in the EL approach, it is expected the existence of correlation between the predictions derived from the different methods. This well-known statistical problem is referred as multicollinearity (<xref ref-type="bibr" rid="B47">Montgomery et&#xa0;al., 2021</xref>) and causes high variability of the estimated effects. The second issue is related to which dataset should be used to fit the meta-learner. One option is to use directly the predicted values derived from the base learners. In this case, the simple mean or some regression model that accounts for multicollinearity problem (e.g., Ridge Regression) can be implemented to makeup a single prediction. An alternative option also could consider combining the predicted values with the genomic covariates (i.e., SNP markers|predicted values from the base learners) with the previous training data as new inputs. In this regard, in addition to the multicollinearity, the course of the dimensionality is another issue to consider mainly because these are covariables of different type.</p>
<p>
<xref ref-type="bibr" rid="B34">Liang et&#xa0;al. (2021)</xref> used the predicted values derived from a multiple regression model as meta-learner. These authors obtained good results to improve PA compared to the conventional genomic prediction models on three different datasets. However, the use of an expanded training data augmented by SNP markers could be beneficial to further enhance the PA, and it emerges as an interesting approach. In this case, a model that addresses both multicollinearity and dimensionality problems should be used. One of the possible solutions can be considered to use a two-kernel GBLUP model as the meta-learner model. Another approach to evaluate is to consider only the predicted values provided by the best base learner models.</p>
<p>To date, no research has applied SEL to improve the prediction accuracy of important traits in coffee cultivars. This approach presents potential for coffee breeding, as it has been shown to outperform standard methods in other applications (<xref ref-type="bibr" rid="B34">Liang et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B44">Mohammed and Kora, 2023</xref>). By combining the strengths of multiple prediction models, SEL could lead to more reliable and accurate identification of valuable genetic traits in coffee plants, accelerating the development of superior coffee varieties.</p>
<p>In light of the mentioned points, the objective of this study was to use and evaluate the SEL to improve PA of important traits in Coffea Arabica. For that, the GBLUP, multivariate adaptive regression splines (MARS), Quantile Random Forest (QRF), and RF models were used as the base learner. Several approaches were considered as the meta-learner to construct the SEL framework. Specifically, the expanded- and non-expanded datasets were used for training. In addition, models that account for multicollinearity (Ridge Regression) and multicollinearity and dimensionality jointly (GBLUP) were also implemented.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Phenotypic and genotypic data</title>
<p>The data were collected from the C. arabica breeding program, which is a joint partnership among the Agricultural Research Company of Minas Gerais (EPAMIG), the Federal University of Vi&#xe7;osa (UFV), and the Brazilian Agricultural Research Corporation (EMBRAPA). An experimental area is maintained at the Department of Phytopathology&#x2014;UFV (20&#xb0;44&#x2032;25&#x201d; S, 42&#xb0;50&#x2032;52&#x201d; W). The database is composed of 13 progenies derived from crosses between three parents of the Catua&#xed; cultivar and three parents of the H&#xed;brido de Timor (HdT). Fifteen genotypes of the abovementioned progeny set (totaling 195 individuals) were genotyped for 21,211 SNP markers by Rapid Genomics, located in  Gainesville Florida, USA. Information about the probes design and SNP identification are detailed in <xref ref-type="bibr" rid="B54">Sousa et&#xa0;al. (2017)</xref>. The SNP markers set are widely distributed in the genome and in all coffee chromosomes, being useful for accurate studies on diversity and population structure, as well as selection and genomic association in C. arabica (<xref ref-type="bibr" rid="B54">Sousa et&#xa0;al., 2017</xref>, <xref ref-type="bibr" rid="B55">Sousa et&#xa0;al., 2019</xref>). The SNP quality control was carried out considering genotypic call rate and minor allele frequency equal to or greater than 90% and smaller than 5%, respectively. In this study a pre-selected set of 5,970 markers that did not reduce the PA of Arabica Coffee compared to the original set SNP markers in a previous study was used (<xref ref-type="bibr" rid="B5">Arcanjo et&#xa0;al., 2024</xref>).</p>
<p>The genotypes were planted on February 11, 2011, using a spacing of 3.0&#xa0;m between rows and 0.7&#xa0;m between plants following an augmented (check varieties) blocks experimental design. Nutritional management was carried out following the requirements of the crop. The phenotypic evaluations were performed in 2014, 2015, and 2016.&#xa0;A total of four traits were scored, two associated with the productivity, yield (YL&#x2014;liters of fresh cherries harvested per plant) and total number of fruits (NF) &#x2014;and two more associated with disease resistance&#x2014;leaf miner infestation (LM) and cercosporiosis incidence (Cer) in Coffea Arabica. The incidence of cercosporiosis and leaf miner was evaluated using a score scale ranging from 1 to 5, in which 1 corresponded to genotypes without symptoms and 5 referred to highly susceptible genotypes. A comprehensive description of how the evaluations of each trait were performed can be found in <xref ref-type="bibr" rid="B55">Sousa et&#xa0;al. (2019)</xref>.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Phenotypic data analysis</title>
<p>The phenotypic data for YL, NF, LM, and Cer were analyzed according to the following statistical model</p>
<disp-formula>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>y</mml:mi>
</mml:mstyle>
<mml:mo>=</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>X</mml:mi>
<mml:mi>u</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>Z</mml:mi>
<mml:mi>g</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>W</mml:mi>
<mml:mi>p</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>V</mml:mi>
<mml:mi>r</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>T</mml:mi>
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im1">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>y</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> represents the vector of observed phenotypes; <inline-formula>
<mml:math display="inline" id="im2">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>u</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> is the vector referring to the general mean in each evaluation year; <inline-formula>
<mml:math display="inline" id="im3">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>g</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> is the vector of genetic random effects corresponding to the progeny such that <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>g</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="bold">I</mml:mtext>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>g</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula>
<mml:math display="inline" id="im5">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>p</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> is the random permanent environmental effect <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>p</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="bold">I</mml:mtext>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula>
<mml:math display="inline" id="im7">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>r</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> is the population random effect <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>r</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="bold">I</mml:mtext>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula>
<mml:math display="inline" id="im9">
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
</mml:math>
</inline-formula> is the plot random effect <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="bold">I</mml:mtext>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>b</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <bold>i</bold> corresponds to the random effect of the interaction between progenies and the years <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>i</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="bold">I</mml:mtext>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; and <bold>e</bold> is the experimental error assumed to be Independent and Identically Distributed (IID) outcomes from a normal density such that <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="bold">I</mml:mtext>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The genetic parameters, heritability and correlation, were also estimated for the evaluated traits. The individual heritability was estimated by <inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:msup>
<mml:mi>h</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>g</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>g</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. In addition, the adjusted phenotypes (<bold>y</bold>*, corrected BLUPs) for the year, plot, and year &#xd7; progenies interaction effects were calculated and used for GS. The analyses were carried out using Selegen-REML/BLUP <italic>software</italic> (<xref ref-type="bibr" rid="B16">de Resende, 2016</xref>).</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Individual genomic prediction</title>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>GBLUP</title>
<p>The parameterization of the Genomic prediction G-BLUP model can be defined as follows</p>
<disp-formula>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>y</mml:mi>
</mml:mstyle>
<mml:mo>*</mml:mo>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>X</mml:mi>
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>Z</mml:mi>
<mml:mi>u</mml:mi>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im14">
<mml:mrow>
<mml:msup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>y</mml:mi>
</mml:mstyle>
<mml:mo>*</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the vector of adjusted phenotypic observations as previously detailed; <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>b</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is the vector of means; <bold>X</bold> is the incidence matrix corresponding to the fixed effects; <inline-formula>
<mml:math display="inline" id="im16">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>u</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is the vector of individual additive genomic effects such that <inline-formula>
<mml:math display="inline" id="im17">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>u</mml:mi>
</mml:mstyle>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>G</mml:mi>
</mml:mstyle>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>g</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> where <inline-formula>
<mml:math display="inline" id="im18">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>G</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is the kinship matrix describing genomic similarities between pairs of individuals, <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>g</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the additive genetic variance, <bold>Z</bold> is the incidence matrix that connect phenotypes with genotypes; <inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is the random error vector with <inline-formula>
<mml:math display="inline" id="im21">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>e</mml:mi>
</mml:mstyle>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>N</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn mathvariant="bold">0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>I</mml:mi>
</mml:mstyle>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> where <inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:msubsup>
<mml:mtext>&#x3c3;</mml:mtext>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the residual variance. The additive genomic kinship matrix <bold>G</bold> was obtained as described by <xref ref-type="bibr" rid="B57">VanRaden (2008)</xref>
</p>
<disp-formula>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>G</mml:mi>
</mml:mstyle>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>W</mml:mi>
</mml:mstyle>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>T</mml:mi>
</mml:mstyle>
</mml:msup>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>W</mml:mi>
</mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>i</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mtext>n</mml:mtext>
</mml:msubsup>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mtext>p</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mtext>p</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where, <inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:mstyle mathvariant="bold" mathsize="normal">
<mml:mi>W</mml:mi>
</mml:mstyle>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is the centered (by columns) matrix of SNPs, which specifies the marker genotypes for each individual as 0, 1 or 2; <inline-formula>
<mml:math display="inline" id="im24">
<mml:mrow>
<mml:msub>
<mml:mtext>p</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is the frequency of the second allele at the locus, that is,</p>
<disp-formula>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mtext>W</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtable>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mtext>p</mml:mtext>
<mml:mrow>
<mml:mtext>j&#xa0;</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>if</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mtext>M</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>AA</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mtext>p</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>if</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mtext>M</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>Aa</mml:mtext>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mtext>p</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>if</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mtext>M</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>aa</mml:mtext>
</mml:mrow>
</mml:mtable>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The <italic>BGLR</italic> function of the BGLR package (<xref ref-type="bibr" rid="B50">P&#xe9;rez and de los Campos, 2014</xref>) in R software (<xref ref-type="bibr" rid="B52">R Core Team, 2022</xref>) was used to fitting GBLUP model.</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>Decision tree</title>
<p>The decision tree structure in this case is built using a regression tree algorithm. The objective is to create regions (R<sub>1</sub>, R<sub>2</sub>,&#x2026;, R<sub>M</sub>) that minimize the difference between the predicted values and the adjusted observed values. This difference is measured by the Residual Sum of Squares (RSS). To achieve this, the algorithm performs a recursive binary splitting process. At each step, it considers all available features (<inline-formula>
<mml:math display="inline" id="im25">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> markers) and all possible split points (cutoff values) within each feature. The split that results in the lowest RSS for the resulting child nodes is chosen. This process continues recursively until a stopping criterion is met, such as reaching a minimum number of data points in a region. Mathematically, the two disjoint regions can be defined by (<xref ref-type="bibr" rid="B24">Hastie et&#xa0;al., 2009</xref>)</p>
<disp-formula>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>j</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>s</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtext>X</mml:mtext>
<mml:mo>|</mml:mo>
<mml:msub>
<mml:mtext>X</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mo>&lt;</mml:mo>
<mml:mtext>s</mml:mtext>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mtext>&#xa0;and&#xa0;</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>j</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>s</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtext>X</mml:mtext>
<mml:mo>|</mml:mo>
<mml:msub>
<mml:mtext>X</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mtext>s</mml:mtext>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>and the goal is to minimize:</p>
<disp-formula>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>i</mml:mtext>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>j</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>s</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:munder>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext>y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>*</mml:mo>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>*</mml:mo>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>i</mml:mtext>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>j</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>s</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:munder>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mtext>y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>*</mml:mo>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>*</mml:mo>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im27">
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>*</mml:mo>
</mml:msubsup>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the average of the adjusted phenotypic values of the training observations belonging to the region <inline-formula>
<mml:math display="inline" id="im28">
<mml:mrow>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>j</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;s</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>X|X</mml:mtext>
</mml:mrow>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&lt;</mml:mo>
<mml:mtext>&#xa0;s</mml:mtext>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im29">
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>*</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the average of the adjusted phenotypic values of the training observations belonging to the region <inline-formula>
<mml:math display="inline" id="im30">
<mml:mrow>
<mml:msub>
<mml:mtext>R</mml:mtext>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>j</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;s</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>X|X</mml:mtext>
</mml:mrow>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2265;</mml:mo>
<mml:mtext>&#xa0;s</mml:mtext>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mtext>&#xa0;and&#xa0;</mml:mtext>
<mml:mrow>
<mml:msubsup>
<mml:mtext>y</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>*</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the true value of each individual.</p>
</sec>
<sec id="s2_3_3">
<label>2.3.3</label>
<title>Random Forest</title>
<p>To construct a RF is necessary to create several datasets by resampling (bootstrapping) from the training set. After that, the bootstrap samples are used to build multiple trees considering a subset of predictors (markers) randomly selected (<xref ref-type="bibr" rid="B24">Hastie et&#xa0;al., 2009</xref>). Usually, for a continuous response, the number of predictors used to find the best split at each node is a subset that is chosen by <inline-formula>
<mml:math display="inline" id="im32">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mfrac>
<mml:mi>v</mml:mi>
<mml:mn>3</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>, with <inline-formula>
<mml:math display="inline" id="im33">
<mml:mi>v</mml:mi>
</mml:math>
</inline-formula> being the total number of predictors. Also, usually, the number of trees for the RF is set to 500. For the RF, the trees grow to their maximum size without pruning, and the prediction is done by averaging the trees. The function <italic>randomForest</italic> in randomForest R-package (<xref ref-type="bibr" rid="B35">Liaw and Wiener, 2002</xref>) was used to implement RF method.</p>
</sec>
<sec id="s2_3_4">
<label>2.3.4</label>
<title>Quantile Random Forest</title>
<p>For the construction of the QRF, as same as for RF, it is necessary to obtain T regression trees generated from bootstrap samples considering subsets of the markers under study (<xref ref-type="bibr" rid="B24">Hastie et&#xa0;al., 2009</xref>). Then, for the <italic>t</italic>
<sup>th</sup> generated tree (<inline-formula>
<mml:math display="inline" id="im34">
<mml:mrow>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mtext>t</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), the conditional distribution is obtained by weighting the observed values of the studied traits. Specifically, given an observation, <inline-formula>
<mml:math display="inline" id="im35">
<mml:mrow>
<mml:mtext>X&#xa0;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mtext>x</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>, it is defined for each terminal node (adjusted tree leaf), <inline-formula>
<mml:math display="inline" id="im36">
<mml:mrow>
<mml:mtext>F</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the following weighting factor: <inline-formula>
<mml:math display="inline" id="im37">
<mml:mrow>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mtext>I</mml:mtext>
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>F</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mo>#</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtext>m</mml:mtext>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>&#xa0;X</mml:mtext>
</mml:mrow>
<mml:mtext>m</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>F</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> with <inline-formula>
<mml:math display="inline" id="im38">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>i</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mtext>n</mml:mtext>
</mml:msubsup>
<mml:mrow>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im39">
<mml:mrow>
<mml:msub>
<mml:mtext>I</mml:mtext>
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>X</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>F</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> an indicator variable stating that the observed value (<inline-formula>
<mml:math display="inline" id="im40">
<mml:mrow>
<mml:mtext>X&#xa0;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mtext>x</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>) belongs to <italic>f</italic> <sup>th</sup> leaf and <inline-formula>
<mml:math display="inline" id="im41">
<mml:mrow>
<mml:mo>#</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtext>m</mml:mtext>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>&#xa0;X</mml:mtext>
</mml:mrow>
<mml:mtext>m</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>F</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the number of observations on the <italic>f</italic> <sup>th</sup> leaf.</p>
<p>The prediction of a tree <inline-formula>
<mml:math display="inline" id="im42">
<mml:mrow>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mtext>t</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, according to <xref ref-type="bibr" rid="B39">Meinshausen (2006)</xref>, for a new point, <inline-formula>
<mml:math display="inline" id="im43">
<mml:mrow>
<mml:mtext>X&#xa0;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mtext>new</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is given by the weighted average of the observations <inline-formula>
<mml:math display="inline" id="im44">
<mml:mrow>
<mml:msub>
<mml:mtext>Y</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, that is, <inline-formula>
<mml:math display="inline" id="im45">
<mml:mrow>
<mml:mover accent="true">
<mml:mtext>&#x3bc;</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mtext>new</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>i</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mtext>n</mml:mtext>
</mml:msubsup>
<mml:mrow>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mtext>Y</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. In this way, the prediction for a given observation, <inline-formula>
<mml:math display="inline" id="im46">
<mml:mrow>
<mml:mtext>X&#xa0;</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> after the construction of T trees is given by <inline-formula>
<mml:math display="inline" id="im47">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mtext>&#x3bc;</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mtext>RF</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>x</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>i</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mtext>n</mml:mtext>
</mml:msubsup>
<mml:mrow>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>x</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mtext>Y</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> where <inline-formula>
<mml:math display="inline" id="im48">
<mml:mrow>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>x</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mtext>T</mml:mtext>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>t</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mtext>T</mml:mtext>
</mml:msubsup>
<mml:mrow>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mrow>
<mml:mtext>&#xa0;&#xa0;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>x</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>T</mml:mtext>
<mml:mrow>
<mml:mtext>tf</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Taking into consideration that the estimated cumulative distribution function is given by <inline-formula>
<mml:math display="inline" id="im49">
<mml:mrow>
<mml:mover accent="true">
<mml:mtext>F</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>y</mml:mtext>
<mml:mo>|</mml:mo>
<mml:mtext>X</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mtext>x</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>i</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mtext>n</mml:mtext>
</mml:msubsup>
<mml:mrow>
<mml:msub>
<mml:mtext>w</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>x</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mtext>I</mml:mtext>
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>Y</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>y</mml:mtext>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math display="inline" id="im50">
<mml:mrow>
<mml:msub>
<mml:mtext>I</mml:mtext>
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>Y</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>y</mml:mtext>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is an indicator function, the predicted value for the <inline-formula>
<mml:math display="inline" id="im51">
<mml:mrow>
<mml:msup>
<mml:mtext>&#x3c4;</mml:mtext>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> quantile is given by <inline-formula>
<mml:math display="inline" id="im52">
<mml:mrow>
<mml:msub>
<mml:mtext>Q</mml:mtext>
<mml:mtext>&#x3c4;</mml:mtext>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>x</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi>inf</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mtext>y</mml:mtext>
<mml:mo>:</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mover accent="true">
<mml:mtext>F</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>y</mml:mtext>
<mml:mo>|</mml:mo>
<mml:mtext>X</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mtext>x</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2265;</mml:mo>
<mml:mtext>&#xa0;&#x3c4;&#xa0;</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, for any <inline-formula>
<mml:math display="inline" id="im53">
<mml:mrow>
<mml:mtext>&#x3c4;</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>&lt;</mml:mo>
<mml:mtext>&#x3c4;</mml:mtext>
<mml:mo>&lt;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The main difference between QRF and RF is that, for each node in each tree, the RF maintains only the average of the observations that fall into that node and discards any other information. Conversely, the QRF maintains the value of all node observations (not just the average) and evaluates the conditional distribution based on this information (<xref ref-type="bibr" rid="B39">Meinshausen, 2006</xref>). This study evaluated nine Quantile Random Forest (QRF) for various quantile levels. The quantile parameter (&#x3c4;) ranged from 0.1 to 0.9 in increments of 0.1. Therefore, the models were named QRF0.1, QRF0.2,&#x2026;, QRF0.9, reflecting the specific quantile they aimed to predict. The function <italic>quantregForest</italic> in quantregForest R-package (<xref ref-type="bibr" rid="B38">Meinshausen, 2017</xref>) was used to implement the QRF methods.</p>
</sec>
<sec id="s2_3_5">
<label>2.3.5</label>
<title>Multivariate adaptive regression splines</title>
<p>MARS (<xref ref-type="bibr" rid="B19">Friedman, 1991</xref>) forms reflexive pairs of base functions (BF) for each input (marker) <inline-formula>
<mml:math display="inline" id="im54">
<mml:mrow>
<mml:msub>
<mml:mtext>X</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, with nodes at each observed value <inline-formula>
<mml:math display="inline" id="im55">
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> of that input. The model building strategy is like a progressive linear regression, but instead of using the original inputs, it implements base functions from the set <inline-formula>
<mml:math display="inline" id="im56">
<mml:mrow>
<mml:mtext>C</mml:mtext>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>X</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>t</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>+</mml:mo>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;&#xa0;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext>t</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mtext>X</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>+</mml:mo>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mtext>&#xa0;t&#xa0;</mml:mtext>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mtext>j</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>&#xa0;&#xa0;&#xa0;x</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mtext>j</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;&#xa0;</mml:mtext>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>&#xa0;&#xa0;&#xa0;x</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Nj</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;j</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;p&#xa0;</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and/or its products. The MARS model, which is a linear combination of the BF and/or their interactions, is given by (<xref ref-type="bibr" rid="B24">Hastie et&#xa0;al., 2009</xref>):</p>
<disp-formula>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:mtext>f</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>X</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>&#xa0;&#x3b2;</mml:mtext>
</mml:mrow>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:msub>
<mml:mtext>&#x3b2;</mml:mtext>
<mml:mtext>m</mml:mtext>
</mml:msub>
<mml:msub>
<mml:mtext>h</mml:mtext>
<mml:mtext>m</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>X</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im57">
<mml:mrow>
<mml:msub>
<mml:mtext>&#x3b2;</mml:mtext>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the regression constant, <inline-formula>
<mml:math display="inline" id="im58">
<mml:mrow>
<mml:msub>
<mml:mtext>&#x3b2;</mml:mtext>
<mml:mtext>m</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> with <italic>m</italic> = 1, 2,&#x2026;, <italic>M</italic>, are the regression coefficients, and <inline-formula>
<mml:math display="inline" id="im59">
<mml:mrow>
<mml:msub>
<mml:mtext>h</mml:mtext>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>X</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a function in <inline-formula>
<mml:math display="inline" id="im60">
<mml:mtext>C</mml:mtext>
</mml:math>
</inline-formula>, or a product of two or more functions.</p>
<p>The estimation process of the parameters <inline-formula>
<mml:math display="inline" id="im61">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im62">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is based on the minimization of the residual sum of squares. First, the forward phase starts on the training data, building the model initially with only the constant function <inline-formula>
<mml:math display="inline" id="im63">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, and all functions in the <inline-formula>
<mml:math display="inline" id="im64">
<mml:mtext>C</mml:mtext>
</mml:math>
</inline-formula> set are candidate functions. At each subsequent step, the base pair that produces the maximum reduction in training error is added. Considering a model with basic M functions, the next pair to be added to the model is</p>
<disp-formula>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mtext>&#x3b2;</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mtext>M</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mtext>h</mml:mtext>
<mml:mtext>l</mml:mtext>
</mml:msub>
<mml:mfenced>
<mml:mtext>X</mml:mtext>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mfenced>
<mml:mrow>
<mml:msub>
<mml:mtext>X</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>t</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>+</mml:mo>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mtext>&#x3b2;</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mtext>M</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mtext>h</mml:mtext>
<mml:mtext>l</mml:mtext>
</mml:msub>
<mml:mfenced>
<mml:mtext>X</mml:mtext>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mfenced>
<mml:mrow>
<mml:mtext>t</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mtext>X</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>+</mml:mo>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>&#xa0;h</mml:mtext>
</mml:mrow>
<mml:mtext>l</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>M</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im65">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mtext>&#x3b2;</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mtext>M</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im66">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mtext>&#x3b2;</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mtext>M</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are coefficients estimated by the least square method (<xref ref-type="bibr" rid="B24">Hastie et&#xa0;al., 2009</xref>), together with all other <inline-formula>
<mml:math display="inline" id="im67">
<mml:mrow>
<mml:mtext>M</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> coefficients in the model. This process of adding BF continues until the model reaches a predetermined maximum number, often leading to a purposefully overparametrized model (<xref ref-type="bibr" rid="B61">Zhang and Goh, 2016</xref>). The backward phase improves the model by removing the least significant terms until finding the best sub model. The model subsets are compared using the generalized cross-validation (GCV) method. The GCV is evaluated with the root-mean-square residual error divided by a penalty that depends on the complexity of the model (<xref ref-type="bibr" rid="B61">Zhang and Goh, 2016</xref>) and it is calculated as</p>
<disp-formula>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:mtext>GCV</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>&#x3bb;</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mtext>N</mml:mtext>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext>i</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mtext>N</mml:mtext>
</mml:msubsup>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>y</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mtext>f</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mtext>&#x3bb;</mml:mtext>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>C</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>M</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mtext>N</mml:mtext>
</mml:mfrac>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>(<xref ref-type="bibr" rid="B24">Hastie et&#xa0;al., 2009</xref>) where <inline-formula>
<mml:math display="inline" id="im68">
<mml:mtext>M</mml:mtext>
</mml:math>
</inline-formula> is the effective number of model parameters, <inline-formula>
<mml:math display="inline" id="im69">
<mml:mrow>
<mml:mtext>C</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>M</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a cost function for each basis function included in the developed submodel, which by default adopts the value of 3, N is the number of datasets used in CV and <inline-formula>
<mml:math display="inline" id="im70">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mtext>f</mml:mtext>
<mml:mo>^</mml:mo>
</mml:mover>
<mml:mtext>&#x3bb;</mml:mtext>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the predicted MARS values. This study employed three Adaptive Regression Spline (MARS) models with varying degrees of interaction (1, 2, and 3). The MARS 1 model represents an additive model, meaning it captures only the linear effects of markers. In contrast, MARS 2 and MARS 3 allow for the inclusion of second and third-order interactions, respectively, enabling them to capture more complex non-additive relationships between markers. The function <italic>earth</italic> in <italic>earth</italic> R-package (<xref ref-type="bibr" rid="B43">Milborrow, 2017</xref>) was used to implement MARS models.</p>
</sec>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Stacking Ensemble Learning for genomic prediction</title>
<p>This study explores the SEL approach for improving the accuracy of genomic prediction models. SEL leverages predictions from multiple individual models (base learners, Level 0) and combines them using a meta-learner model (Level 1) to generate a final, potentially more accurate prediction. The base learners used in this study were GBLUP, different degrees of MARS (1, 2, 3), multiple QRFs (0.1 to 0.9), and RF. Their predictions, referred to as Genomic Estimated Breeding Values from Base Learners (GEBV-BL), formed the core metadata for the meta-learner. In practice, it is necessary to prepare a dataset with both the observable characteristics (phenotype) and the genetic information (SNP markers) of individuals. Then, diverse machine learning models (base learners) are trained on these data to make initial predictions. These predictions from the base learners become the new features for a final model, the meta-learner. Finally, the meta-learner is trained using these base learner predictions as input and the original phenotype data as the target variable. In our work, four different combinations of metadata were explored: (i) GEBV-BL, only predictions from the base learners (standard approach); (ii) GEBV-BL+SNP, predictions combined with the original Single Nucleotide Polymorphism (SNP) markers (larger input dataset); (iii) GEBV-BL-Best; and (iv) GEBV-BL-Best + SNP, Similar to the previous cases, but only predictions from high-performing base learners (those exceeding the average predictive accuracy) were included. For GEBV-BL and GEBV-BL-Best datasets, six meta-learner methods were evaluated: Simple Mean (SSM); Weighted Regression (SWR); Regression (SR); Ridge Regression (SRR); Random Forest (SRF). For GEBV-BL + SNP and GEBV-BL-Best + SNP datasets, which included SNP markers, a two-kernel GBLUP model (S2KGBLUP) was additionally employed as the meta-learner. The SEL scheme for genomic prediction is illustrated in the <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The stacking ensemble learning framework for genomic prediction from original data to the base learners, creating metadata for the meta-learner. Base-Learner (Level 0) is composed of the GBLUP, MARS (1&#xb0;, 2&#xb0;, and 3&#xb0;), QRF considering nine quantiles (from 0.1 to 0.9, in steps of 0.1) and RF model. Four different meta-data were obtained: (i) GEBV-BL, only predictions from the base learners (standard approach); (ii) GEBV-BL+SNP, predictions combined with the original Single Nucleotide Polymorphism (SNP) markers (larger input dataset); (iii) GEBV-BL-Best; and (iv) GEBV-BL-Best + SNP, similar to the previous cases, but only predictions from high-performing base learners (those exceeding the average predictive accuracy). Meta-Learners: for GEBV-BL and GEBV-BL-Best metadata, six meta-learner methods were evaluated. Simple Mean (SSM), Weighted Regression (SWR), Regression (SR), Ridge Regression (SRR), Random Forest (SRF), and for GEBV-BL + SNP and GEBV-BL-Best + SNP datasets, which included SNP markers, a two-kernel GBLUP model (S2KGBLUP).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373318-g001.tif"/>
</fig>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Cross-validation</title>
<p>The PA of the models used as base-learners and the entire SEL process considered a CV scheme that was implemented as follows. First, the complete dataset under study was randomly divided into two sets (training and testing). The training set was composed by 70% of the individuals while the remaining 30% was assigned to the testing or validation set. The training set was used to calibrate the base-learners and the SEL for predicting the GEBVs of the individuals in the training set. This procedure was repeated 10 times. Then, for each approach, the average PA across replicated was computed. The PA was computed as the Pearson correlation between predicted GEBV and the adjusted phenotype values. The standard error (SE) was also computed. In addition, the mean square error (MSE) between the observed and predicted values was calculated. Finally, the agreement coefficient was used to compute the percentage of individuals with a performance above the 90<sup>th</sup> percentile in fields given the top 10% of the GEBVs obtained with the different genomic prediction approaches.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Phenotypic data analysis</title>
<p>The across environments mean (<inline-formula>
<mml:math display="inline" id="im71">
<mml:mrow>
<mml:mover accent="true">
<mml:mtext>X</mml:mtext>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and standard deviation (SD) of the evaluated traits are summarized in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Across environments phenotypic mean (<inline-formula>
<mml:math display="inline" id="im72">
<mml:mover accent="true">
<mml:mtext>X</mml:mtext>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula>) and standard deviation (SD) for yield (YL), total number of fruits (NF), leaf miner infestation (LM), and cercosporiosis incidence (Cer) of a coffea arabica L. population composed of 195 individuals observed in years 2014, 2015, and 2016 in Vi&#xe7;osa, Brazil.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="bottom" align="center">Trait</th>
<th valign="bottom" align="center">
<inline-formula>
<mml:math display="inline" id="im73">
<mml:mover accent="true">
<mml:mi>X</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula>
</th>
<th valign="bottom" align="center">
<inline-formula>
<mml:math display="inline" id="im74">
<mml:mrow>
<mml:mtext>SD</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="bottom" align="center">YL</td>
<td valign="bottom" align="center">5.16</td>
<td valign="bottom" align="center">3.84</td>
</tr>
<tr>
<td valign="bottom" align="center">NF</td>
<td valign="bottom" align="center">2.32</td>
<td valign="bottom" align="center">0.55</td>
</tr>
<tr>
<td valign="bottom" align="center">LM</td>
<td valign="bottom" align="center">2.05</td>
<td valign="bottom" align="center">0.69</td>
</tr>
<tr>
<td valign="bottom" align="center">Cer</td>
<td valign="bottom" align="center">2.48</td>
<td valign="bottom" align="center">0.70</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The estimates of the heritability (proportion of phenotypic variability explained by the genetic component) for YL (0.30), NF (0.49), LM (0.30), and Cer (0.38) were moderate. The Spearman&#x2019;s correlation (lower triangle) between the adjusted phenotypic values of each pair of traits were positive and presented low to moderate values varying from 0.02 to 0.52. The higher and the lower correlation values were observed between YL and NF (0.52) and between NF and Cer (0.02, not statistically significant), respectively (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). The correlation between YL and LM, Cer and NF and LM, Cer were not statistically significant (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S1</bold>
</xref>).</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Predictive ability (PA) for yield (YL), total number of fruits (NF), leaf miner infestation (LM), and cercosporiosis incidence (Cer) measured in an Arabica coffee population composed of 195 individuals using a holdout validation scheme repeated 10 times. The fitted models used as base learners are: Genomic Best Linear Unbiased Predictor (GBLUP); Multivariate Adaptive Regression Splines with degrees equal to 1, 2, and 3 (MARS 1, MARS 2 and MARS 3); Quantile Random Forest evaluated at nine quantiles [(&#x3c4;): 0.1 to 0.9, every 0.1] &#x2013; (QRF 0.1, &#x2026;, QRF 0.9), and Random Forest (RF).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373318-g002.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Comparison between the base learners</title>
<p>Overall, none of the evaluated base learner methods outperformed the predictive performance of the others for all the evaluated traits. The estimated predictive abilities (PA) and corresponding standard deviations for the four traits (YL, NF, LM, and Cer) ranged from &#x2212;0.01 (0.01) to 0.24 (0.01) and are presented in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>. Specifically, for YL, NF, LM, and Cer, the highest PA values were 0.15 (0.01), 0.24 (0.01), 0.15 (0.01) and 0.24 (0.02), and these were obtained with MARS2 and QRF0.3, QRF0.7 and GBLUP methods, respectively (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Predictive ability (PA) for yield (YL), total number of fruits (NF), leaf miner infestation (LM), and cercosporiosis incidence (Cer) measured in an Arabica coffee population composed of 195 individuals using a holdout validation scheme repeated 10 times. The fitted models used as base learners are: Stacking Simple Mean (SSM), Stacking Weighed Regression (SWR), Stacking Regression (SR), Stacking Ridge Regression (SRR), and the Stacking two-kernel GBLUP model (S2KGBLUP). The models named as best (SSMBest, SWRBest, SRBest, SRRBest, S2KGBLUP, and RFBest) used in the fitting only the results provided by those methods that presented predictive ability higher the mean in the Level 0.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373318-g003.tif"/>
</fig>
<p>GBLUP presented lower values of EQM (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S1</bold>
</xref>). Specifically, the MSE were equal to 16.05 (1.01), 2,329.00 (368.94), 0.21 (0.02), and 0.57 (0.07) for YL, NF, LM, and Cer, respectively.</p>
<p>The extreme QRF models QRF0.1, and QRF0.9 returned the highest MSE values across all the evaluated traits (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S1</bold>
</xref>). In general, the MSE decreased as the fitted quantile model was approaching to the median model (QRF0.5).</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Comparison between the Stacking Ensemble Learning approaches and GBLUP</title>
<p>The estimates of the PA obtained with the SEL models and the traditional genomic prediction method GBLUP model are shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>. The results of the GBLUP model were used as benchmark since as it was mentioned it is the most convenient and used implementation in genomic prediction.</p>
<p>The estimated PA ranged from 0.05 (0.01) to 0.32 (0.02) (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>). For YL, NF, LM, and Cer, the highest PA values were 0.20 (0.01), 0.32 (0.01), 0.19 (0.01), and 0.27 (0.01), respectively. These results were obtained by implementing SMBest method, which corresponds to the simple mean considering GEBV-BL-Best metadata. The &#x201c;best&#x201d; fitted model was SMBest, and it outperformed the PA of the GBLUP model by 87.44% (the ratio between the PA obtained from SMBest model fit and the GBLUP), 37.83%, 199.82%, and 14.59% for YL, NF, LM, and Cer, respectively (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>).</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Spearman&#x2019;s correlation between the genomic estimated breeding values [GEBV] (lower diagonal matrix) and the concordance coefficient between the top 10% of the selected individuals (upper triangular matrix) considering all the different fitted models including the GBLUP model and the meta-learners for yield (YL). The fitted models used as base learners are: Stacking Simple Mean (SSM), Stacking Weighed Regression (SWR), Stacking Regression (SR), Stacking Ridge Regression (SRR), and the Stacking two-kernel GBLUP model (S2KGBLUP). The models named as best (SSMBest, SWRBest, SRBest, SRRBest, S2KGBLUP, and RFBest) used in the fitting only the results provided by those methods that presented predictive ability higher the mean in the Level 0.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373318-g004.tif"/>
</fig>
<p>Regarding the different data sets used as input in the SEL approach, combining the predicted values obtained from base learners (GEBV-BL) with training data used to fitting the models, did not improve PA of these methods (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>). Additionally, the results considering only the predicted values provided from those base learners with PA higher than mean of all base learner (GEBV-BL-Best) as input in the Level 1, returned the highest results (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>).</p>
<p>For the four traits, the GBLUP model presented the lowest MSE values (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S2</bold>
</xref>). As expected, since the Ridge Regression model (SRR) depends on a regularization parameter it presented a significant higher MSE (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S2</bold>
</xref>). For this model, the MSE values were equal to 69.28 (9.18), 8390.68 (518.49), 6.65 (0.80), and 3.45 (0.36) for YL, NF, LM, and Cer, respectively.</p>
<p>The Spearman&#x2019;s correlation between the GEBVs obtained with the different prediction models, including the baseline GBLUP model and all the SEL models, presented positive values and these vary from low 0.25 to high 0.97 across the evaluated traits (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4</bold>
</xref>&#x2013;<xref ref-type="fig" rid="f7">
<bold>7</bold>
</xref>, lower triangular matrix). Low values of the Spearman&#x2019;s correlation were observed between the GBLUP and the other SEL methods and these were 0.25, 0.33, 0.28, and 0.30 for YL, NF, LM, and Cer, respectively (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4</bold>
</xref>&#x2013;<xref ref-type="fig" rid="f7">
<bold>7</bold>
</xref>, lower triangle). On the other hand, the highest correlation value (0.97) was observed between the GEBVs obtained by the Stacking Regression (SR) and the Stacking Ridge Regression (SRR) for LM (<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>).</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Spearman&#x2019;s correlation between the genomic estimated breeding values [GEBV] (lower diagonal matrix) and the concordance coefficient between the top 10% of the selected individuals (upper triangular matrix) considering all the different fitted models including the GBLUP model and the meta-learners for total number of fruits (NF). The fitted models used as base learners are Stacking Simple Mean (SSM), Stacking Weighed Regression (SWR), Stacking Regression (SR), Stacking Ridge Regression (SRR), and the Stacking two-kernel GBLUP model (S2KGBLUP). The models named as best (SSMBest, SWRBest, SRBest, SRRBest, S2KGBLUP, and RFBest) used in the fitting only the results provided by those methods that presented predictive ability higher the mean in the Level 0.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373318-g005.tif"/>
</fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Spearman&#x2019;s correlation between the genomic estimated breeding values [GEBV] (lower diagonal matrix) and the concordance coefficient between the top 10% of the selected individual&#x2019;s upper triangular matrix) considering all the different fitted models including the GBLUP model and the meta-learners for leaf minor infestation (LM). The fitted models used as base learners are: Stacking Simple Mean (SSM), Stacking Weighed Regression (SWR), Stacking Regression (SR), Stacking Ridge Regression (SRR), and the Stacking two-kernel GBLUP model (S2KGBLUP). The models named as best (SSMBest, SWRBest, SRBest, SRRBest, S2KGBLUP, and RFBest) used in the fitting only the results provided by those methods that presented predictive ability higher the mean in the Level 0.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373318-g006.tif"/>
</fig>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Spearman&#x2019;s correlation between the genomic estimated breeding values [GEBV] (lower diagonal matrix) and the concordance coefficient between the top 10% of the selected individuals (upper triangular matrix) considering all the different fitted models including the GBLUP model and the meta-learners for Cercosporiosis incidence (Cer). The fitted models used as base learners are: Stacking Simple Mean (SSM), Stacking Weighed Regression (SWR), Stacking Regression (SR), Stacking Ridge Regression (SRR), and the Stacking two-kernel GBLUP model (S2KGBLUP). The models named as best (SSMBest, SWRBest, SRBest, SRRBest, S2KGBLUP, and RFBest) used in the fitting only the results provided by those methods that presented predictive ability higher the mean in the Level 0.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373318-g007.tif"/>
</fig>
<p>For each prediction method, the predicted values were ordered based on rankings then the percentage of common individuals in the top 10% between pairs of methods was computed. Overall, the GBLUP presented lower agreement with the SEL evaluated approaches (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4</bold>
</xref>&#x2013;<xref ref-type="fig" rid="f7">
<bold>7</bold>
</xref>, upper triangle). For instance, the agreement coefficient between the GBLUP and the SRB, SR, SR, and S2KGBLUP methods presented values varying from 0.31 to 0.48 for all evaluated traits (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4</bold>
</xref>&#x2013;<xref ref-type="fig" rid="f7">
<bold>7</bold>
</xref>, upper triangle).</p>
<p>Regarding the different data sets used as input in the SEL approach, the highest Spearman&#x2019;s correlations and agreements were observed between those methods that used the same kind of metadata as input in the fitting. Overall, considering these two measures, the methods were grouped into three groups (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures S6</bold>
</xref>-<xref ref-type="supplementary-material" rid="SM1">
<bold>S13</bold>
</xref>). In general, the GBLUP was allocated into a single group. The only exception was for cercorporiosis (Cer) considering the agreement measure. Is this case, the GBLUP was allocated together with those methods that&#x2019;s considers only the predicted values provided from those base learners with PA higher than the mean of all base learner (GEBV-BL-Best) as input in the SEL approaches (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S12</bold>
</xref>).</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In this study, we used the SEL approach to improve PA of four important traits in Coffea Arabica. Two of these traits are associated with the productivity (YL and NF) and the remaining two with disease resistance (LM and Cer). The population under study is comprise of 195 genotypes of Coffea Arabica genotyped for 5,970 SNP markers. We compared the PA of different approaches used in the Level 1 of the SEL to the results obtained with the base learners [GBLUP, MARS considering degrees equal to 1, 2, and 3, QRF considering nine quantiles (from 0.1 to 0.9, every 0.1) and RF]. Since the GBLUP is the most implemented prediction model (<xref ref-type="bibr" rid="B62">Zhang et&#xa0;al., 2021</xref>), their results were used as benchmark. The PA of the different approaches was assessed using a CV scheme repeated 10 times. The Spearman&#x2019;s correlation and the agreement (based on the top 10%) coefficients between the GEBV values of the different models were also estimated. The genetic parameters were also estimated for the evaluated traits (YL, NF, LM, and Cer).</p>
<p>The heritability estimates for YL (0.55), NF (0.49), LM (0.30), and Cer (0.38) were consistent with those reported in the literature for this specie and same traits. Specifically, the heritability estimates varied between (0.1&#x2013;0.74) [<xref ref-type="bibr" rid="B4">Alkimim et&#xa0;al. (2020)</xref> and <xref ref-type="bibr" rid="B2">Alemayehu, 2019</xref>], (0.30&#x2013;0.55) [<xref ref-type="bibr" rid="B23">Gokavi et&#xa0;al., 2023</xref> and <xref ref-type="bibr" rid="B59">Weldemichael et&#xa0;al., 2017</xref>], (0.30&#x2013;0.51) [<xref ref-type="bibr" rid="B12">Chrigui et&#xa0;al., 2020</xref> and <xref ref-type="bibr" rid="B17">Ferr&#xe3;o et&#xa0;al., 2023</xref>], and (0.09&#x2013;0.61) <xref ref-type="bibr" rid="B3">Alkimim et&#xa0;al., 2021</xref> and <xref ref-type="bibr" rid="B17">Ferr&#xe3;o et&#xa0;al., 2023</xref>] for the YL, NF, LM, and Cer, respectively. Although the Pearson correlation between YL and the disease resistance traits were not statistically significant, a significant and positive genetic correlation (0.52) was obtained between YL and NF.</p>
<p>The machine learning methods as base learners have been already used in genomic prediction (<xref ref-type="bibr" rid="B36">Long et&#xa0;al., 2011</xref>; <xref ref-type="bibr" rid="B32">Lenz et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B45">Montesinos-L&#xf3;pez et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B13">Coelho de Sousa et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B14">Costa et&#xa0;al., 2022</xref>). However, generally these methods do not outperform significantly the traditional genomic prediction approach based on parametric models such as GBLUP and Bayesian Alphabet (<xref ref-type="bibr" rid="B34">Liang et&#xa0;al., 2021</xref>).</p>
<p>
<xref ref-type="bibr" rid="B34">Liang et&#xa0;al. (2021)</xref> used the SEL for improving PA in three real datasets on average by 7.70%, compared to GBLUP. The SEL uses predicted values from different machine learning implementations to obtain a single prediction value. These authors integrated/combined the results of three machine learning implementations (Support Vector Machine, Kernel Ridge Regression and Elastic Net) to compute the GEBVs.</p>
<p>In contrast to <xref ref-type="bibr" rid="B34">Liang et&#xa0;al. (2021)</xref>, in our study, the GBLUP approach was used as one of the base learner methods for the SEL too. The GBLUP was considered since it is widely used for genome prediction (<xref ref-type="bibr" rid="B62">Zhang et&#xa0;al., 2021</xref>) due to its reduced computational demand and simplicity (<xref ref-type="bibr" rid="B25">Hernandez et&#xa0;al., 2020</xref>) compared to the other parametric methods (e.g., Bayesian Alphabet, <xref ref-type="bibr" rid="B22">Gianola et&#xa0;al., 2009</xref>). The MARS that allows automatically selecting and modeling nonlinear relationships and interaction effects of the predictor variables was also considered as base-learner method (<xref ref-type="bibr" rid="B14">Costa et&#xa0;al., 2022</xref>). In addition, the RF (<xref ref-type="bibr" rid="B26">James et&#xa0;al., 2023</xref>) and the QRF (<xref ref-type="bibr" rid="B39">Meinshausen, 2006</xref>) were also set as base learner methods. Specifically, the RF is a machine learning approach used to increase the predictive power and reduce the variance of the predicted values by averaging uncorrelated quantities (<xref ref-type="bibr" rid="B26">James et&#xa0;al., 2023</xref>). The QRF combines interesting characteristics from RF and Quantile Regression (QR) approaches. QR (<xref ref-type="bibr" rid="B30">Koenker and Bassett, 1978</xref>) allows fitting all the portions of the probability distribution of the trait, enabling a more complete picture of the conditional distribution than a single estimate of the center (<xref ref-type="bibr" rid="B10">Briollais and Durrieu, 2014</xref>; <xref ref-type="bibr" rid="B48">Nascimento et&#xa0;al., 2019</xref>).</p>
<p>Overall, for each evaluated trait (YL, NF, LM, and Cer), a different model presented the highest PA. These results show that there is not a single approach that outperforms the others in the evaluated data sets. Also, it could be case of a model performing better than the others in a given dataset but another model could perform better in a similar dataset (<xref ref-type="bibr" rid="B26">James et&#xa0;al., 2023</xref>). For example, the MARS models with 2&#xb0; and 3&#xb0; (model with interactions) presented higher PA for YL. These results are in line with those obtained by <xref ref-type="bibr" rid="B13">Coelho de Sousa et&#xa0;al. (2022)</xref>. Using artificial neural networks to predict the genetic merit of genotypes of <italic>Coffea canephora</italic> these authors showed a larger dominance markers effect for YL when compared to the GBLUP additive dominant model based on additive marker effects.</p>
<p>Another interesting result was obtained modeling the QRF where the distribution of the adjusted phenotypic values for YL and NF. These presented a higher concentration in the first quantiles (0.1&#x2013;0.3) (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S2</bold>
</xref>) and positive skewness (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S3</bold>
</xref>). For YL, the best models were the QRF0.1 and QRF0.2, and QRF0.3 for NF (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S2</bold>
</xref>). The distribution of the adjusted LM values presented tree major portions (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S4</bold>
</xref>). The QRF modeling was able to distinguish these three different groups (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). Finally, since the distribution of adjusted Cer phenotypic values did no present a specific pattern to highlight (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure S5</bold>
</xref>) all of the QRF models present similar PA (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>). A similar trend was shown by <xref ref-type="bibr" rid="B49">Nascimento et&#xa0;al. (2017)</xref>. These authors found that the Quantile Regression approach outperform the traditional genomic prediction methods of not normal distributed traits.</p>
<p>An interesting approach to address the non-normality assumption is using multiple models to conduct the predictions, and then combine the predicted values to makeup a single prediction through the SEL approach. In general, the SEL outperforms the methods based on base learners only (<xref ref-type="bibr" rid="B34">Liang et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B29">Kandel et&#xa0;al, 2021</xref>; <xref ref-type="bibr" rid="B28">Kalule et&#xa0;al., 2023</xref>). In our study, the SEL approach outperformed all base learner methodologies (<xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2</bold>
</xref>, <xref ref-type="fig" rid="f3">
<bold>3</bold>
</xref>). However, it is important to emphasize that these results were observed by those SEL models that used only the predicted values provided from the base learners with PA higher than mean of all the base learners. Specifically, the Stacking Mean Best (SMB) presented the highest PA for all of the evaluated traits. The average of the predictions from several fitted models has been successfully implemented with Bagging and RF approaches (<xref ref-type="bibr" rid="B8">Breiman, 1996</xref> and <xref ref-type="bibr" rid="B9">Breiman, 2001</xref>). The SEL approach allow to use several models to combine the predicted values, for example, XGBoost (<xref ref-type="bibr" rid="B21">Ghasemieh et&#xa0;al., 2023</xref>), Penalized methods (<xref ref-type="bibr" rid="B28">Kalule et&#xa0;al., 2023</xref>), Linear Regression (<xref ref-type="bibr" rid="B34">Liang et&#xa0;al., 2021</xref>). Similar to the single model approach, the performance of the different SEL implementation can vary from one data set to another with no one of these outperforming the others in all data sets. Thus, as it was recommended by these authors it is important to evaluate several models as meta-learners as well.</p>
<p>Regarding the MSE, as expected, the penalized models, showed larger values compared to the other evaluated methods. By design these methods induce bias aiming to reduce the variance of the estimations (<xref ref-type="bibr" rid="B47">Montgomery et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B11">Chan et&#xa0;al., 2022</xref>). However, these cannot guarantee the increasing of the PA compared to other methods. The SMB, which resulted to return the best results in terms of PA, also presented large values for the MSE. This can be a consequence that SMB-SEL uses predicted values derived from base learners that return large MSE values (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table S1</bold>
</xref>).</p>
<p>Overall, the SEL models presented moderate to high Spearman&#x2019;s correlation between them (<xref ref-type="fig" rid="f4">
<bold>Figures&#xa0;4</bold>
</xref>&#x2013;<xref ref-type="fig" rid="f6">
<bold>6</bold>
</xref>). On the other hand, these were low to moderate between SEL approaches and the GBLUP model. Additionally, among the 10% of genotypes with the highest GEBVs for YL, NF, LM, and Cer, the agreement coefficient between the SEL and GBLUP models showed values varying from moderate to high, suggesting differences in the obtained classifications with these. In general, the cluster analysis of these results showed that the methods can be grouped into three distinct groups (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figures S6</bold>
</xref>-<xref ref-type="supplementary-material" rid="SM1">
<bold>S13</bold>
</xref>) with the GBLUP forming a group by itself.</p>
<p>Altogether, these results show that the use of SEL to predict the individual genetic merit of four important traits in Arabica Coffee is worth to investigate. The SEL approach showed higher estimates of PA compared with all evaluated base learning methods, in special to the traditional GBLUP method. In practice, SEL&#x2019;s ability to combine methods with diverse characteristics facilitates a more comprehensive exploration of the relationships between variables leading to more accurate selection of breeding. This approach considers a wider range of factors and reduces the reliance on any single model&#x2019;s limitations. However, evaluating phenotypes across multiple environments can pose challenges for SEL. Unlike GBLUP, which presents higher computational efficiency (<xref ref-type="bibr" rid="B25">Hernandez et&#xa0;al., 2020</xref>), many base learners in SEL methods are based on machine learning requiring significant computation time in certain scenarios. Studies have explored the use of single machine learning methods for multi-environment trials (METs). For example, <xref ref-type="bibr" rid="B7">Barreto et&#xa0;al. (2024)</xref> applied machine learning to predict hybrid performance in METs and achieved similar PA compared to GBLUP with non-additive effects. As highlighted by <xref ref-type="bibr" rid="B46">Montesinos L&#xf3;pez et&#xa0;al. (2022)</xref> in their study using RF for METs, training any machine learning model can be computationally demanding, especially for datasets where the training data sets are very large. The hyperparameter tuning for individual base learners within a SEL framework is a well-established approach to enhance model performance. However, it is important to acknowledge that SEL ensembles can achieve strong results even with default base learner parameters (<xref ref-type="bibr" rid="B18">Friedel et&#xa0;al., 2023</xref>). This aligns perfectly with the core principle of ensemble learning, that is, leveraging predictions from multiple models can outperform any single model. In our study, the high dimensionality of the data presented significant computational challenges for hyperparameter tuning. Additionally, the observed superiority of the SEL approach compared to traditional methods suggested that tuning might not be as critical for achieving good results.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>The SEL method was able to predict the PA of important traits (YL, NF, leaf miner infesting, cercosporiosis resistance) in Coffea Arabica. In addition, SEL presented higher PA compared with those obtained for all base learner methods (GBLUP, MARS considering degrees equal to 1, 2, and 3, QRF considering nine quantiles, from 0.1 to 0.9, every 0.1 and RF).</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref>. Further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>MN: Writing &#x2013; review &amp; editing, Writing &#x2013; original draft, Validation, Software, Methodology, Investigation, Formal analysis, Conceptualization. AN: Writing &#x2013; review &amp; editing, Software, Methodology, Investigation. CA: Writing &#x2013; review &amp; editing, Methodology. AO: Writing &#x2013; review &amp; editing, Data curation. EC: Writing &#x2013; review &amp; editing, Investigation, Data curation. DJ: Writing &#x2013; review &amp; editing, Methodology, Investigation.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by the Brazilian Coffee Research and Development Consortium (Cons&#xf3;rcio Brasileiro de Pesquisa e Desenvolvimento do Caf&#xe9;-CBP&amp;D/Caf&#xe9;), by the Foundation for Research Support of the state of Minas Gerais (FAPEMIG, APQ-01638&#x2013;18), by the National Council of Scientific and Technological Development (CNPq, 408833/2023&#x2013;8), and by the National Institutes of Science and Technology of Coffee (INCT/Caf&#xe9;). MN and CA are supported by scientific productivity (310755/2023&#x2013;9 and 306772/2020&#x2013;5), respectively, from Brazilian Council for Scientific and Technological Development (CNPq).</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>Authors AB and EC were employed by the company Embrapa Coffee, Brazilian Agricultural Research Corporation (Embrapa).</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author(s) declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2024.1373318/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2024.1373318/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abdollahi-Arpanahi</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Gianola</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Pe&#xf1;agaricano</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Deep learning versus parametric and ensemble methods for genomic prediction of complex phenotypes</article-title>. <source>Genet. Selection Evol.</source> <volume>52</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12711-020-00531-z</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alemayehu</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Estimation of genetic component and heritability for quantitative traits in amaro coffee (Coffea Arabica L.) landrace at Awada, Southern Ethiopia</article-title>. <source>Int. J. Res. Stud. Science Eng. Technology.</source> <volume>6</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>.</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alkimim</surname> <given-names>E. R.</given-names>
</name>
<name>
<surname>Caixeta</surname> <given-names>E. T.</given-names>
</name>
<name>
<surname>Sousa</surname> <given-names>T. V.</given-names>
</name>
<name>
<surname>Gois</surname> <given-names>I. B.</given-names>
</name>
<name>
<surname>Lopes da Silva</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Sakiyama</surname> <given-names>N. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Designing the best breeding strategy for Coffea Canephora: Genetic Evaluation of pure and hybrid individuals aiming to select for productivity and disease resistance traits</article-title>. <source>PLoS One</source> <volume>16</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0260997</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alkimim</surname> <given-names>E. R.</given-names>
</name>
<name>
<surname>Caixeta</surname> <given-names>E. T.</given-names>
</name>
<name>
<surname>Sousa</surname> <given-names>T. V.</given-names>
</name>
<name>
<surname>Resende</surname> <given-names>M. D.</given-names>
</name>
<name>
<surname>da Silva</surname> <given-names>F. L.</given-names>
</name>
<name>
<surname>Sakiyama</surname> <given-names>N. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Selective efficiency of genome-wide selection in Coffea canephora breeding</article-title>. <source>Tree Genet. Genomes</source> <volume>16</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11295-020-01433-3</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arcanjo</surname> <given-names>E. S.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>A. C. C.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Azevedo</surname> <given-names>C. F.</given-names>
</name>
<name>
<surname>Caixeta</surname> <given-names>E. T.</given-names>
</name>
<name>
<surname>Oliveira</surname> <given-names>A. C. C.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Low-density marker panels for genomic prediction in Coffea arabica L. Acta Scientiarum</article-title>. <source>Agronomy</source>.</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barbosa</surname> <given-names>I. D. P.</given-names>
</name>
<name>
<surname>da Costa</surname> <given-names>W. G.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Cruz</surname> <given-names>C. D.</given-names>
</name>
<name>
<surname>de Oliveira</surname> <given-names>A. C.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Recommendation of Coffea arabica genotypes by factor analysis</article-title>. <source>Euphytica</source> <volume>215</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10681-019-2499-x</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barreto</surname> <given-names>C. A. V.</given-names>
</name>
<name>
<surname>das Gra&#xe7;as Dias</surname> <given-names>K. O.</given-names>
</name>
<name>
<surname>de Sousa</surname> <given-names>I. C.</given-names>
</name>
<name>
<surname>Azevedo</surname> <given-names>C. F.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>A. C. C.</given-names>
</name>
<name>
<surname>Guimar&#xe3;es</surname> <given-names>L. J. M.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Genomic prediction in multi-environment trials in maize using statistical and machine learning methods</article-title>. <source>Sci. Rep.</source> <volume>14</volume>, <fpage>1062</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-024-51792-3</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breiman</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>Bagging predictors</article-title>. <source>Mach. Learn.</source> <volume>24</volume>, <fpage>123</fpage>&#x2013;<lpage>140</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/BF00058655</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breiman</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Random forests</article-title>. <source>Mach. Learn.</source> <volume>45</volume>, <fpage>5</fpage>&#x2013;<lpage>32</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Briollais</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Durrieu</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Application of quantile regression to recent genetic and -omic studies</article-title>. <source>Hum. Genet.</source> <volume>133</volume>, <fpage>951</fpage>&#x2013;<lpage>966</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00439-014-1440-6</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chan</surname> <given-names>J. Y.-L.</given-names>
</name>
<name>
<surname>Leow</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Bea</surname> <given-names>K. T.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>W. K.</given-names>
</name>
<name>
<surname>Phoong</surname> <given-names>S. W.</given-names>
</name>
<name>
<surname>Hong</surname> <given-names>Z.-W.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Mitigating the multicollinearity problem and its Machine Learning Approach: A Review</article-title>. <source>Mathematics</source> <volume>10</volume>, <elocation-id>1283</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/math10081283</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chrigui</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Sari</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Sari</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Eker</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Cengiz</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Ikten</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Introgression of resistance to Leafminer (Liriomyza Cicerina Rondani) from Cicer reticulatum Ladiz. to C. Arietinum L. and relationships between potential biochemical selection criteria</article-title>. <source>Agronomy</source> <volume>11</volume>, <elocation-id>57</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy11010057</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Coelho de Sousa</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>M.</given-names>
</name>
<name>
<surname>de Castro Sant&#x2019;anna</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Teixeira Caixeta</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Ferreira Azevedo</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Dami&#xe3;o Cruz</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Marker effects and heritability estimates using additive-dominance genomic architectures via artificial neural networks in Coffea canephora</article-title>. <source>PLoS One</source> <volume>17</volume>, <elocation-id>e0262055</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0262055</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Costa</surname> <given-names>W. G.</given-names>
</name>
<name>
<surname>Celeri</surname> <given-names>M. O.</given-names>
</name>
<name>
<surname>Barbosa</surname> <given-names>I. P.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>G. N.</given-names>
</name>
<name>
<surname>Azevedo</surname> <given-names>C. F.</given-names>
</name>
<name>
<surname>Oliveira</surname> <given-names>A. B.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Genomic prediction through machine learning and neural networks for traits with epistasis</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>20</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.csbj.2022.09.029</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Daetwyler</surname> <given-names>H. D.</given-names>
</name>
<name>
<surname>Calus</surname> <given-names>M. P.</given-names>
</name>
<name>
<surname>Pong-Wong</surname> <given-names>R.</given-names>
</name>
<name>
<surname>de los Campos</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Hickey</surname> <given-names>J. M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Genomic prediction in animals and plants: Simulation of data, validation, reporting, and benchmarking</article-title>. <source>Genetics</source> <volume>193</volume>, <fpage>347</fpage>&#x2013;<lpage>365</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/genetics.112.147983</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Resende</surname> <given-names>M. D. V.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Software Selegen-REML/BLUP: a useful tool for plant breeding</article-title>. <source>Crop Breed. Appl. Biotechnol.</source> <volume>16</volume>, <fpage>330</fpage>&#x2013;<lpage>339</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1590/1984-70332016v16n4a49</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ferr&#xe3;o</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>da Fonseca</surname> <given-names>A. F.</given-names>
</name>
<name>
<surname>Volpi</surname> <given-names>P. S.</given-names>
</name>
<name>
<surname>de Souza</surname> <given-names>L. C.</given-names>
</name>
<name>
<surname>Com&#xe9;rio</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Filho</surname> <given-names>A. C.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Genomic-assisted breeding for climate-Smart Coffee</article-title>. <source>Plant Genome.</source> <volume>17</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/tpg2.20321</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Friedel</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Stewart</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>X. F.</given-names>
</name>
<name>
<surname>Stevenson</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Manly</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Dyer</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A simple stacked ensemble machine learning model to predict naturalized catchment hydrology and allocation status</article-title>. <source>arXiv preprint</source>. arXiv:2312.01649.</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Friedman</surname> <given-names>J. H.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>Multivariate adaptive regression splines</article-title>. <source>Ann. Stat</source> <volume>19</volume>, <fpage>1</fpage>&#x2013;<lpage>67</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1214/aos/1176347963</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ganaie</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Malik</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Tanveer</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Suganthan</surname> <given-names>P. N.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Ensemble deep learning: A review</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>115</volume>, <elocation-id>105151</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2022.105151</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghasemieh</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Lloyed</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bahrami</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Vajar</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Kashef</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A novel machine learning model with stacking ensemble learner for predicting emergency readmission of heart-disease patients</article-title>. <source>Decision Analytics J.</source> <volume>7</volume>, <elocation-id>100242</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.dajour.2023.100242</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gianola</surname> <given-names>D.</given-names>
</name>
<name>
<surname>de los Campos</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Hill</surname> <given-names>W. G.</given-names>
</name>
<name>
<surname>Manfredi</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Fernando</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Additive genetic variability and the Bayesian alphabet</article-title>. <source>Genetics</source> <volume>183</volume>, <fpage>347</fpage>&#x2013;<lpage>363</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/genetics.109.103952</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gokavi</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Gangadharappa</surname> <given-names>P. M.</given-names>
</name>
<name>
<surname>Sathish</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Nishani</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Hiremath</surname> <given-names>J. S.</given-names>
</name>
<name>
<surname>Koulagi</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Genetic variability, heritability and genetic advance for quantitative traits of Arabica coffee (<italic>Coffea Arabica</italic> L.) genotypes</article-title>. <source>Plant Genet. Resources: Characterization Utilization</source> <volume>21</volume>, <fpage>260</fpage>&#x2013;<lpage>268</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1017/S1479262123000680</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hastie</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Tibshirani</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Friedman</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2009</year>). <source>The elements of statistical learning: data mining, inference, and prediction</source>. <edition>2ed</edition> (<publisher-loc>New York</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>p. 745</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-0-387-84858-7</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hernandez</surname> <given-names>C. O.</given-names>
</name>
<name>
<surname>Wyatt</surname> <given-names>L. E.</given-names>
</name>
<name>
<surname>Mazourek</surname> <given-names>M. R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Genomic prediction and selection for fruit traits in winter squash</article-title>. <source>G3 Genes|Genomes|Genetics</source> <volume>10</volume>, <fpage>3601</fpage>&#x2013;<lpage>3610</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/g3.120.401215</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>James</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Witten</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Hastie</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Tibshirani</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Taylor</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). <source>An introduction to statistical learning with applications in Python.</source> (<publisher-loc>Cham, Switzerland</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>).</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jarqu&#xed;n</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Crossa</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lacaze</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Du Cheyron</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Daucourt</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lorgeou</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>A reaction norm model for genomic selection using high-dimensional genomic and environmental data</article-title>. <source>Appl. Genet.</source> <volume>127</volume>, <fpage>595</fpage>&#x2013;<lpage>607</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00122-013-2243-1</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kalule</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Abderrahmane</surname> <given-names>H. A.</given-names>
</name>
<name>
<surname>Alameri</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Sassi</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Stacked Ensemble Machine Learning for porosity and absolute permeability prediction of carbonate rock plugs</article-title>. <source>Sci. Rep.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-023-36096-2</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kandel</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Castelli</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Popovi&#x10d;</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Comparing stacking ensemble techniques to improve musculoskeletal fracture image classification</article-title>. <source>J. Imaging</source> <volume>7</volume>, <elocation-id>100</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/jimaging7060100</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koenker</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Bassett</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>1978</year>). <article-title>Regression quantiles</article-title>. <source>Econometrica</source> <volume>46</volume>, <fpage>33</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/1913643</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Larkin</surname> <given-names>D. L.</given-names>
</name>
<name>
<surname>Lozada</surname> <given-names>D. N.</given-names>
</name>
<name>
<surname>Mason</surname> <given-names>R. E.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Genomic selection&#x2014;considerations for successful implementation in wheat breeding programs</article-title>. <source>Agronomy</source> <volume>9</volume>, <fpage>1</fpage>&#x2013;<lpage>18</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy9090479</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lenz</surname> <given-names>P. R.</given-names>
</name>
<name>
<surname>Nadeau</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Mottet</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Perron</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Isabel</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Beaulieu</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Multi-trait genomic selection for Weevil Resistance, growth, and wood quality in Norway spruce</article-title>. <source>Evolutionary Appl.</source> <volume>13</volume>, <fpage>76</fpage>&#x2013;<lpage>94</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/eva.12823</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Martini</surname> <given-names>J. W. R.</given-names>
</name>
<name>
<surname>Simianer</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Integrating gene expression data into genomic prediction</article-title>. <source>Front. Genet.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fgene.2019.00126</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname> <given-names>M</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>T</given-names>
</name>
<name>
<surname>An</surname> <given-names>B</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>X</given-names>
</name>
<name>
<surname>Du</surname> <given-names>L</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Stacking ensemble learning framework for genomic prediction</article-title>. <source>Front Genet.</source> <volume>4</volume>, <elocation-id>12</elocation-id>. doi: <pub-id pub-id-type="doi">10.3389/fgene.2021.600040</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liaw</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wiener</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Classification and regression by randomforest</article-title>. <source>R J</source> <volume>2</volume>, <fpage>18</fpage>&#x2013;<lpage>22</lpage>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Long</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Gianola</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Rosa</surname> <given-names>G. J.</given-names>
</name>
<name>
<surname>Weigel</surname> <given-names>K. A.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Application of support vector regression to genome-assisted prediction of quantitative traits</article-title>. <source>Theor. Appl. Genet.</source> <volume>123</volume>, <fpage>1065</fpage>&#x2013;<lpage>1074</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00122-011-1648-y</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McKinney</surname> <given-names>B. A.</given-names>
</name>
<name>
<surname>Reif</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>Ritchie</surname> <given-names>M. D.</given-names>
</name>
<name>
<surname>Moore</surname> <given-names>J. H.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Machine learning for detecting gene-gene interactions</article-title>. <source>Appl-Bioinformatics</source> <volume>5</volume>, <fpage>77</fpage>&#x2013;<lpage>88</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2165/00822942-200605020-00002</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Meinshausen</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2017</year>). <source>Quantregforest: Quantile regression forests - the comprehensive R</source>. Available online at: <uri xlink:href="https://cran.r-project.org/web/packages/quantregForest/quantregForest.pdf">https://cran.r-project.org/web/packages/quantregForest/quantregForest.pdf</uri> (Accessed <access-date>January 17, 2023</access-date>).</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meinshausen</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Quantile regression forests</article-title>. <source>J. Mach. Learn. Res.</source> <volume>7</volume>, <fpage>983</fpage>&#x2013;<lpage>999</lpage>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mendes-Moreira</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Soares</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Jorge</surname> <given-names>A. M.</given-names>
</name>
<name>
<surname>Sousa</surname> <given-names>J. F.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Ensemble approaches for regression</article-title>. <source>ACM Computing Surveys</source> <volume>45</volume>, <fpage>1</fpage>&#x2013;<lpage>40</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/2379776.2379786</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meuwissen</surname> <given-names>T. H. E.</given-names>
</name>
<name>
<surname>Hayes</surname> <given-names>B. J.</given-names>
</name>
<name>
<surname>Goddard</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Prediction of total genetic value using genome-wide dense marker maps</article-title>. <source>Genetics</source> <volume>157</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/genetics/157.4.1819</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mienye</surname> <given-names>I. D.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A survey of ensemble learning: Concepts, algorithms, applications, and prospects</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>99129</fpage>&#x2013;<lpage>99149</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2022.3207287</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Milborrow</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>).<article-title>earth: multivariate adaptive regression splines</article-title>. In: <source>R package version 4.6.3</source>. Available online at: <uri xlink:href="https://CRAN.R-project.org/package=earth">https://CRAN.R-project.org/package=earth</uri> (Accessed <access-date>7 June 20123</access-date>).</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mohammed</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Kora</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A comprehensive review on ensemble deep learning: opportunities and challenges</article-title>. <source>J. King Saud Univ-Comput Inf Sci.</source> <volume>35</volume>, <fpage>757</fpage>&#x2013;<lpage>774</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jksuci.2023.01.014</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Montesinos-L&#xf3;pez</surname> <given-names>O. A.</given-names>
</name>
<name>
<surname>Mart&#xed;n-Vallejo</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Crossa</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gianola</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Hern&#xe1;ndez-Su&#xe1;rez</surname> <given-names>C. M.</given-names>
</name>
<name>
<surname>Montesinos-L&#xf3;pez</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>A benchmarking between deep learning, support Vector Machine and bayesian threshold best linear unbiased prediction for predicting ordinal traits in plant breeding</article-title>. <source>G3 Genes|Genomes|Genetics</source> <volume>9</volume>, <fpage>601</fpage>&#x2013;<lpage>618</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/g3.118.200998</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Montesinos L&#xf3;pez</surname> <given-names>O. A.</given-names>
</name>
<name>
<surname>Montesinos L&#xf3;pez</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Crossa</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Random forest for genomic prediction</article-title>,&#x201d; in <source>Multivariate statistical machine learning methods for genomic prediction</source>. Eds. <person-group person-group-type="editor">
<name>
<surname>Montesinos L&#xf3;pez</surname> <given-names>O. A.</given-names>
</name>
<name>
<surname>Montesinos L&#xf3;pez</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Crossa</surname> <given-names>J.</given-names>
</name>
</person-group> (<publisher-name>Springer</publisher-name>, <publisher-loc>Cham</publisher-loc>), <fpage>pp 633</fpage>&#x2013;<lpage>pp 681</lpage>.</citation>
</ref>
<ref id="B47">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Montgomery</surname> <given-names>D. C.</given-names>
</name>
<name>
<surname>Peck</surname> <given-names>E. A.</given-names>
</name>
<name>
<surname>Vining</surname> <given-names>G. G.</given-names>
</name>
</person-group> (<year>2021</year>). <source>Introduction to linear regression analysis.</source> (<publisher-loc>Hoboken, New Jersey</publisher-loc>: <publisher-name>John Wiley &amp; Sons, Inc</publisher-name>).</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nascimento</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Azevedo</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Barili</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Vale</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Quantile regression applied to genome-enabled prediction of traits related to flowering time in the common bean</article-title>. <source>Agronomy</source> <volume>9</volume>, <elocation-id>796</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy9120796</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nascimento</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>E. F. F.</given-names>
</name>
<name>
<surname>de Resende</surname> <given-names>M. D. V.</given-names>
</name>
<name>
<surname>Cruz</surname> <given-names>C. D.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>A. C. C.</given-names>
</name>
<name>
<surname>Viana</surname> <given-names>J. M. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Regularized quantile regression applied to genome-enabled prediction of quantitative traits</article-title>. <source>Genet. Mol. Res.</source> <volume>16</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.4238/gmr16019538</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>P&#xe9;rez</surname> <given-names>P.</given-names>
</name>
<name>
<surname>de los Campos</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Genome-wide regression and prediction with the BGLR statistical package</article-title>. <source>Genetics.</source> <volume>198</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1534/genetics.114.164442</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Porto</surname> <given-names>A. C. V.</given-names>
</name>
<name>
<surname>Freitas-Silva</surname> <given-names>O.</given-names>
</name>
<name>
<surname>de Souza</surname> <given-names>E. F.</given-names>
</name>
<name>
<surname>Gottschalk</surname> <given-names>L. M. F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Effect of asparaginase enzyme in the reduction of asparagine in green coffee</article-title>. <source>Beverages</source> <volume>5</volume>, <elocation-id>32</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/beverages5020032</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="book">
<person-group person-group-type="author">
<collab>R Core Team</collab>
</person-group> (<year>2022</year>). <source>R: A Language and Environment for Statistical Computing</source> (<publisher-loc>Vienna</publisher-loc>: <publisher-name>R Foundation for Statistical Computing</publisher-name>).</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Seyum</surname> <given-names>E. G.</given-names>
</name>
<name>
<surname>Bille</surname> <given-names>N. H.</given-names>
</name>
<name>
<surname>Abtew</surname> <given-names>W. G.</given-names>
</name>
<name>
<surname>Munyengwa</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Bell</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Cros</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Genomic selection in tropical perennial crops and plantation trees: A review</article-title>. <source>Mol. Breed.</source> <volume>42</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11032-022-01326-4</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sousa</surname> <given-names>T. V.</given-names>
</name>
<name>
<surname>Caixeta</surname> <given-names>E. T.</given-names>
</name>
<name>
<surname>Alkimim</surname> <given-names>E. R.</given-names>
</name>
<name>
<surname>Oliveira</surname> <given-names>A. C.</given-names>
</name>
<name>
<surname>B de Pereira</surname> <given-names>A.A.</given-names>
</name>
<name>
<surname>Sakiyama</surname> <given-names>N. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Population structure and genetic diversity of coffee progenies derived from Catua&#xed; and H&#xed;brido de Timor revealed by genome-wide SNP marker</article-title>. <source>Tree Genet. Genomes</source> <volume>13</volume>, <fpage>124</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11295-017-1208-y</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sousa</surname> <given-names>T. V.</given-names>
</name>
<name>
<surname>Caixeta</surname> <given-names>E. T.</given-names>
</name>
<name>
<surname>Alkimim</surname> <given-names>E. R.</given-names>
</name>
<name>
<surname>Oliveira</surname> <given-names>A. C. B.</given-names>
</name>
<name>
<surname>Pereira</surname> <given-names>A. A.</given-names>
</name>
<name>
<surname>Sakiyama</surname> <given-names>N. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Early selection enabled by the implementation of genomic selection in coffea arabica breeding</article-title>. <source>Front. Plant Sci.</source> <volume>9</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2018.01934</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sousa</surname> <given-names>I. C.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>G. N.</given-names>
</name>
<name>
<surname>Nascimento</surname> <given-names>A. C. C.</given-names>
</name>
<name>
<surname>Cruz</surname> <given-names>C. D.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>F. F.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Genomic prediction of leaf rust resistance to Arabica coffee using machine learning algorithms</article-title>. <source>Scientia Agricola</source> <volume>78</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1590/1678-992x-2020-0021</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>VanRaden</surname> <given-names>P. M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Efficient methods to compute genomic predictions</article-title>. <source>J. Dairy Sci.</source> <volume>91</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3168/jds.2007-0980</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Voss-Fels</surname> <given-names>K. P.</given-names>
</name>
<name>
<surname>Cooper</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hayes</surname> <given-names>B. J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Accelerating crop genetic gains with genomic selection</article-title>. <source>Theor. Appl. Genet.</source> <volume>132</volume>, <fpage>669</fpage>&#x2013;<lpage>686</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00122-018-3270-8</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weldemichael</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Alamerew</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kufa</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Genetic variability, heritability and genetic advance for quantitative traits in coffee (Coffea arabica l.) accessions in ethiopia</article-title>. <source>Afr. J. Agricul. Res.</source> <volume>12</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.5897/AJAR2016.12059</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Enhancing genetic gain through genomic selection: From livestock to plants</article-title>. <source>Plant Commun.</source> <volume>16</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.xplc.2019.100005</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Goh</surname> <given-names>A. T. C.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Multivariate adaptive regression splines and neural network models for prediction of pile drivability</article-title>. <source>Geosci. Front.</source> <volume>7</volume>, <fpage>45</fpage>&#x2013;<lpage>52</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gsf.2014.10.003</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Reif</surname> <given-names>J. C.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>On the use of GBLUP and its extension for GWAS with additive and epistatic effects</article-title>. <source>G3 Genes|Genomes|Genetics</source> <volume>11</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/g3journal/jkab122</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>