<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1267704</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2023.1267704</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Bayesian linear mixed model with multiple random effects for family-based genetic studies</article-title>
<alt-title alt-title-type="left-running-head">Hai et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2023.1267704">10.3389/fgene.2023.1267704</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Hai</surname>
<given-names>Yang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Zhao</surname>
<given-names>Wenxuan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2111260/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Meng</surname>
<given-names>Qingyu</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2419675/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname>
<given-names>Long</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1159222/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wen</surname>
<given-names>Yalu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/311671/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Statistics</institution>, <institution>University of Auckland</institution>, <addr-line>Auckland</addr-line>, <country>New Zealand</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Health Statistics</institution>, <institution>School of Public Health</institution>, <institution>Shanxi Medical University</institution>, <addr-line>Taiyuan</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/35031/overview">Tao Wang</ext-link>, Medical College of Wisconsin, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1757695/overview">Song Zhai</ext-link>, Merck, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/34164/overview">Zhaoxia Yu</ext-link>, University of California, Irvine, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Long Liu, <email>biostat-ll@sxmu.edu.cn</email>; Yalu Wen, <email>y.wen@auckalnd.ac.nz</email>
</corresp>
<fn fn-type="equal" id="fn001">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>10</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1267704</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>07</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>25</day>
<month>09</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Hai, Zhao, Meng, Liu and Wen.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Hai, Zhao, Meng, Liu and Wen</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Motivation:</bold> Family-based study design is one of the popular designs used in genetic research, and the whole-genome sequencing data obtained from family-based studies offer many unique features for risk prediction studies. They can not only provide a more comprehensive view of many complex diseases, but also utilize information in the design to further improve the prediction accuracy. While promising, existing analytical methods often ignore the information embedded in the study design and overlook the predictive effects of rare variants, leading to a prediction model with sub-optimal performance.</p>
<p>
<bold>Results:</bold> We proposed a Bayesian linear mixed model for the prediction analysis of sequencing data obtained from family-based studies. Our method can not only capture predictive effects from both common and rare variants, but also easily accommodate various disease model assumptions. It uses information embedded in the study design to form surrogates, where the predictive effects from unmeasured/unknown genetic and environmental risk factors can be modelled. Through extensive simulation studies and the analysis of sequencing data obtained from the Michigan State University Twin Registry study, we have demonstrated that the proposed method outperforms commonly adopted techniques.</p>
<p>
<bold>Availability:</bold> R package is available at <ext-link ext-link-type="uri" xlink:href="https://github.com/yhai943/FBLMM">https://github.com/yhai943/FBLMM</ext-link>.</p>
</abstract>
<kwd-group>
<kwd>bayesian linear mixed model</kwd>
<kwd>family-based genetic study</kwd>
<kwd>rare variants</kwd>
<kwd>unknown genetic factors</kwd>
<kwd>common environmental risk factors</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Statistical Genetics and Methodology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Family-based study (e.g., twin study) is one of the most popular designs used in genetic research, and it offers many unique features for risk prediction studies. For example, the relatedness among family members helps capture the predictive effects from unmeasured/unknown polygenic and shared environmental factors, and thus contributes additional information, beyond the measured data, for risk prediction studies (<xref ref-type="bibr" rid="B31">Ruderfer et al., 2010</xref>). Despite these advantages, few statistical methods are available for risk prediction research using family-based designs. The existing methods usually build risk prediction models based on genetic effects that are estimated with familial correlations adjusted for. For example (<xref ref-type="bibr" rid="B27">Meigs et al., 2008</xref>), developed a risk prediction model for family-based genetic studies, where the genotypic risk score is determined without considering the information in families (<xref ref-type="bibr" rid="B31">Ruderfer et al., 2010</xref>). presented a family-based liability threshold model and illustrated it in the analyses of Crohn&#x2019;s disease. Although these methods have contributed to the advances of family-based risk prediction, they can lead to less accurate models when unmeasured genetic and/or shared environmental factors contribute significantly to disease risk. Moreover, the recent whole genome sequencing studies have demonstrated that rare variants can play a significant role in many common complex diseases, such as obesity, coronary heart disease, and drug addiction (<xref ref-type="bibr" rid="B32">Ramachandrappa et al., 2013</xref>; <xref ref-type="bibr" rid="B30">Peloso et al., 2014</xref>; <xref ref-type="bibr" rid="B38">Wang et al., 2014</xref>). Family-based design can enhance the chance of capturing the predictive effects from rare variants as they tend to be aggregated within family. However, existing prediction models do not utilize the design information and they simply extend models designed for population-based studies by adjusting correlations within the data. Therefore, it remains challenging for them to capture the predictive effects from rare variants, primarily due to their low minor allele frequencies (<xref ref-type="bibr" rid="B28">Mihaescu et al., 2013</xref>).</p>
<p>It has long been recognized that family history alone can greatly facilitate disease risk prediction. For many complex diseases (e.g., cardiovascular diseases and type II diabetes), individuals with a positive family history are usually classified as the population at high risk (<xref ref-type="bibr" rid="B36">Valdez et al., 2007</xref>; <xref ref-type="bibr" rid="B25">Marateb et al., 2018</xref>). Family history can be viewed as a surrogate that reflects the contributions of many known/unknown risk factors accumulated within a family. Evidences have shown that familial effects account for a significant amount of disease variability. For example (<xref ref-type="bibr" rid="B4">Chen et al., 2007</xref>), have shown that 33% of variance of spherical equivalent can be attributed to childhood environmental effects. Furthermore, genetic variants can account for a substantial proportion of heritability for human traits (<xref ref-type="bibr" rid="B6">Couillard et al., 2001</xref>; <xref ref-type="bibr" rid="B9">Dirani et al., 2006</xref>). For example, genetic factors can explain as much as 87% of the variation in the susceptibility to asthma in twins with positive family history (<xref ref-type="bibr" rid="B19">Laitinen et al., 1998</xref>; <xref ref-type="bibr" rid="B22">Lichtenstein et al., 2009</xref>) found that genetic heritability for bipolar disorder and schizophrenia was 59% and 64%, respectively. The familial aggregation for many complex diseases is mainly due to the relatedness in genetic and environmental factors among family members, which carry important information and can be used to further improve prediction accuracy. However, most existing analytical methods are developed by simply extending those models designed for population-based studies, where family correlations are first adjusted. For example (<xref ref-type="bibr" rid="B27">Meigs et al., 2008</xref>), built a risk prediction model for family-based genetic study, where the relatedness among family members is adjusted using a generalized estimating equation model. Although statistically valid and these methods could capture the predictive effects from those measured known risk factors, they are not capable of using family information as surrogates to account for unmeasured predictors (e.g., shared environmental risk factors).</p>
<p>Population-based whole-genome sequencing studies have shown that rare variants are associated with many complex human diseases (<xref ref-type="bibr" rid="B8">Dickson et al., 2010</xref>; <xref ref-type="bibr" rid="B14">Helgadottir et al., 2016</xref>), and they have great potential in explaining the missing heritability (<xref ref-type="bibr" rid="B7">Cruceanu et al., 2013</xref>). For example, recent study has reported that rare variants in renal salt handling genes have contributed to variation of blood pressure (<xref ref-type="bibr" rid="B18">Ji et al., 2008</xref>; <xref ref-type="bibr" rid="B35">Stefansson et al., 2008</xref>) found that rare variants are associated with schizophrenia and autism (<xref ref-type="bibr" rid="B17">Ionita-Laza and Ottman, 2011</xref>). showed that four rare variants in <italic>IFIH1</italic> gene can lower the risk of type 1 diabetes. Recent developments in prediction research have also shed light on the importance of rare variants in building an accurate prediction model. For example, the risk prediction model for coronary artery disease in European and South Asian populations was built with rare variants incorporated, and it yields improved predictive accuracy (<xref ref-type="bibr" rid="B20">Lali et al., 2020</xref>). Despite their importance, few methods designed for family-based studies have considered the contributions of rare variants in disease risk prediction. Recently, we developed a Bayesian linear mixed model with multiple random effects (denoted as BLMM) to predict disease risk for population-based studies, where both common and rare variants have been explicitly considered (<xref ref-type="bibr" rid="B13">Hai and Wen, 2020</xref>). We have showed that the BLMM can capture the predictive effects from rare variants and is robust against various disease models. Though promising, it was developed for population-based studies, and thus cannot make use of the information embedded in the family-based study design.</p>
<p>To address these limitations, we proposed a family-based Bayesian linear mixed model with multiple random effects (denoted as FBLMM) for the prediction analysis on sequencing data obtained from family-based genetic studies. The proposed FBLMM uses the correlations among family members to construct surrogates for unmeasured risk predictors, and it can account for the predictive effects from both common and rare variants. In the following sections, we first presented the details of the proposed model, and then conducted extensive simulation studies to evaluate its performance. Finally, we illustrated its application using the whole-genome exome data from Michigan State University Twin Registry study (<xref ref-type="bibr" rid="B2">Burt and Klump, 2012</xref>).</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and methods</title>
<p>The proposed FBLMM is built using a similar idea in BLMM presented in (<xref ref-type="bibr" rid="B13">Hai and Wen, 2020</xref>), where we assume genetic similarities can lead to phenotypic similarities. Fundamentally different from existing methods that adjust for family correlations, we utilize the information embedded in the family-based study design to further improve the prediction accuracy. Given <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> genetic regions that can be defined using various criteria (e.g., gene and pathway), we form the FBLMM model as:<disp-formula id="e1">
<mml:math id="m2">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">f</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>&#x2001;</mml:mo>
<mml:mtext>with</mml:mtext>
<mml:mo>&#x2001;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">I</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>&#x3f5;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the outcome; <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the genotypes for all common variants; and <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is their corresponding effect. <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the cumulative predictive effect from all measured predictors, including rare variants, on region <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:mi mathvariant="bold">f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the familial effect due to shared environmental factors and genetic relatedness, and <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:mi mathvariant="bold">I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is an <inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> identity matrix.</p>
<p>Similar to existing sparsity regression models (<xref ref-type="bibr" rid="B3">Carvalho et al., 2008</xref>; <xref ref-type="bibr" rid="B44">Zhou, Carbonetto, and Stephens, 2013</xref>), the <inline-formula id="inf10">
<mml:math id="m11">
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is designed to capture the predictive effects from isolated markers. To tease out the impact of noise, we followed the same procedure in (<xref ref-type="bibr" rid="B13">Hai and Wen, 2020</xref>), instead of using the spike and slab prior that can lead to an underestimation of posterior variances for <inline-formula id="inf11">
<mml:math id="m12">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (Carbonetto et al., 2012). We re-parameterized <inline-formula id="inf12">
<mml:math id="m13">
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> as <inline-formula id="inf13">
<mml:math id="m14">
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mi mathvariant="bold">&#x393;</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf14">
<mml:math id="m15">
<mml:mrow>
<mml:mi mathvariant="bold">&#x393;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> diag(<inline-formula id="inf15">
<mml:math id="m16">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) and <inline-formula id="inf16">
<mml:math id="m17">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b3;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is a vector of binary variables indicating whether each genetic variant is predictive. We used the Bernoulli Gaussian distribution as the priors for <inline-formula id="inf17">
<mml:math id="m18">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf18">
<mml:math id="m19">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (i.e., <inline-formula id="inf19">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b2;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>&#x3b2;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf20">
<mml:math id="m21">
<mml:mrow>
<mml:mfenced open="" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mtext>Bernoulli</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, and this allows to obtain an unbiased estimation of the posterior variance of <inline-formula id="inf21">
<mml:math id="m22">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> as well as achieving variable selection for <inline-formula id="inf22">
<mml:math id="m23">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (<xref ref-type="bibr" rid="B44">Zhou, Carbonetto, and Stephens, 2013</xref>; <xref ref-type="bibr" rid="B10">Fernandes et al., 2017</xref>).</p>
<p>Similar to linear mixed models that assume the infinitesimal effects (<xref ref-type="bibr" rid="B37">VanRaden, 2008</xref>), the cumulative predictive effects from common and rare variants for region <inline-formula id="inf23">
<mml:math id="m24">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are modeled via <inline-formula id="inf24">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where we set a multivariate normal prior for each region-based cumulative predictive effect as<disp-formula id="e2">
<mml:math id="m26">
<mml:mrow>
<mml:mtable columnalign="right">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2001;</mml:mo>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
<inline-formula id="inf25">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the genetic similarity for region <inline-formula id="inf26">
<mml:math id="m28">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and it is defined as <inline-formula id="inf27">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf28">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the genotype matrix for region <inline-formula id="inf29">
<mml:math id="m31">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf30">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the number of genetic markers in the region. <inline-formula id="inf31">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the pre-specified weights used to capture the contribution of rare variants. Similar to existing literature (<xref ref-type="bibr" rid="B41">Wu et al., 2011</xref>; <xref ref-type="bibr" rid="B21">Lee et al., 2012</xref>), we define the weighted sum statistics types (denoted as WSS) of weights as <inline-formula id="inf32">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf33">
<mml:math id="m35">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the minor allele frequency for the <inline-formula id="inf34">
<mml:math id="m36">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> th variant. The hyper-parameters of <inline-formula id="inf35">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf36">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are set to be 0.1 for all regions. To expedite its computation, we re-parameterized the cumulative predictive effects part with the slab and spike prior as<disp-formula id="e3">
<mml:math id="m39">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold">&#x393;</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">f</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf37">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">I</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf38">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi mathvariant="bold">&#x39b;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf39">
<mml:math id="m42">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x2118;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The re-parameterization facilitates the selection of predictive regions (i.e., <inline-formula id="inf40">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> indicates the region is predictive), and the details of its derivations can be found in appendix A.</p>
<p>Mounting evidences suggest that there are familial aggregations for many complex traits (<xref ref-type="bibr" rid="B19">Laitinen et al., 1998</xref>; <xref ref-type="bibr" rid="B6">Couillard et al., 2001</xref>; <xref ref-type="bibr" rid="B9">Dirani et al., 2006</xref>; <xref ref-type="bibr" rid="B22">Lichtenstein et al., 2009</xref>), and the relatedness in genetic and environmental factors among family members are thought to be the main reasons for this aggregation. Therefore, we split the familiar effect <inline-formula id="inf41">
<mml:math id="m44">
<mml:mrow>
<mml:mi mathvariant="bold">f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> into predictive effects due to genetic correlation (denoted as <inline-formula id="inf42">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) and shared environmental factors (denoted as <inline-formula id="inf43">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>). Model 3 can be written as<disp-formula id="e4">
<mml:math id="m47">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold">&#x393;</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>We set the prior for <inline-formula id="inf44">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as<disp-formula id="e5">
<mml:math id="m49">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mtable columnalign="right">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf45">
<mml:math id="m50">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the theoretical kinship coefficient matrix. The <inline-formula id="inf46">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> uses the genetic correlation between family members to improve the prediction accuracy, and it can be viewed as a surrogate for those predictive but unmeasured genetic variants. To account for the impact of environmental factors, we assume all family members share the same environment (e.g., diet) and set <inline-formula id="inf47">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as<disp-formula id="e6">
<mml:math id="m53">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mtable columnalign="right">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi mathvariant="bold">e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where <inline-formula id="inf48">
<mml:math id="m54">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is a block diagonal matrix with each block being a matrix with all elements equal to 1. The <inline-formula id="inf49">
<mml:math id="m55">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is designed to capture the predictive effects from shared environmental factors, and it can also be viewed as a surrogate for those unmeasured environmental predictors shared by family members. We used the idea from (Z. <xref ref-type="bibr" rid="B5">Chen and Dunson, 2003</xref>) and decomposed <inline-formula id="inf50">
<mml:math id="m56">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf51">
<mml:math id="m57">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> as <inline-formula id="inf52">
<mml:math id="m58">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">&#x39b;</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf53">
<mml:math id="m59">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">&#x39b;</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf54">
<mml:math id="m60">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">&#x39b;</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf55">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">&#x39b;</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are diagonal matrices with eigenvalues on their diagonals, and <inline-formula id="inf56">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf57">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are matrices of the corresponding eigenvectors. Eq. <xref ref-type="disp-formula" rid="e4">4</xref> can be written as<disp-formula id="e7">
<mml:math id="m64">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold">&#x393;</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf58">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi mathvariant="bold">&#x39b;</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf59">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">Z</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi mathvariant="bold">&#x39b;</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. We adopted the mean-field variational Bayes algorithm (VB) to estimate parameters for FBLMM. Let <inline-formula id="inf60">
<mml:math id="m67">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3be;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">&#x3b3;</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">r</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold">&#x3c3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes all parameters of interest, where <inline-formula id="inf61">
<mml:math id="m68">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b3;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf62">
<mml:math id="m69">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>M</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf63">
<mml:math id="m70">
<mml:mrow>
<mml:mi mathvariant="bold">r</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>M</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf64">
<mml:math id="m71">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">&#x3c3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>M</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The goal is to obtain an optimal approximation <inline-formula id="inf65">
<mml:math id="m72">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3be;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of the posterior distribution on <inline-formula id="inf66">
<mml:math id="m73">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3be;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> by maximizing the evidence lower bound (ELBO). In details, we iteratively update the approximated distributions for <inline-formula id="inf67">
<mml:math id="m74">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3be;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as<disp-formula id="e8">
<mml:math id="m75">
<mml:mrow>
<mml:mtable columnalign="right">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3be;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x220f;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>p</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x220f;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x220f;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd/>
<mml:mtd>
<mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x220f;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>M</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:msub>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>&#x3f5;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf68">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">M</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">S</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf69">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi>&#x3b3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c8;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf70">
<mml:math id="m78">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">M</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">S</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf71">
<mml:math id="m79">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf72">
<mml:math id="m80">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">M</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">S</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf73">
<mml:math id="m81">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold">U</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">M</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">S</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf74">
<mml:math id="m82">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf75">
<mml:math id="m83">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf76">
<mml:math id="m84">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; and <inline-formula id="inf77">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mi>q</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>&#x3f5;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>&#x3f5;</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>&#x3f5;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Each parameter of <inline-formula id="inf78">
<mml:math id="m86">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3be;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> can be estimated by using the coordinate ascent algorithm, the estimating equations used to update the parameters are listed in appendix A.</p>
<p>The pseudo-code implementing our proposed model is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. It is worth noting that when a new subject is not from families in the training data, its predicted value only depends on demographic and genetic predictors (i.e., the family information does not contribute to the outcomes). When a new individual comes from families in the training set, the FBLMM method not only uses genetic and demographic predictors, but also utilizes the extra information provided by family design to capture unmeasured genetic and shared environmental risk factors. Therefore, FBLMM has great potential to further improve predictions. The weight function employed by FBLMM can facilitate the identification of rare variants that are predictive, enabling FBLMM to consider contributions from both common and rare variants in prediction modeling.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Algorithm 1: Inference procedure using variational Bayes.</p>
</caption>
<graphic xlink:href="fgene-14-1267704-g001.tif"/>
</fig>
</sec>
<sec id="s3">
<title>Simulation study</title>
<p>We conducted extensive simulation studies to evaluate the performance of our proposed method under various family-based designs, and further compared FBLMM with other widely used methods, including 1) adaptive MultiBLUP (<xref ref-type="bibr" rid="B34">Speed and Balding, 2014</xref>); 2) DPRVB (<xref ref-type="bibr" rid="B43">Zeng and Zhou, 2017</xref>); and 3) BLMM (<xref ref-type="bibr" rid="B13">Hai and Wen, 2020</xref>), where family correlations are first adjusted. Note that both MultiBLUP and DPRVB have shown to outperform other existing gBLUP-based methods (<xref ref-type="bibr" rid="B34">Speed and Balding, 2014</xref>; <xref ref-type="bibr" rid="B43">Zeng and Zhou, 2017</xref>).</p>
<p>To closely mimic the real human genome, the founders&#x2019; genotypes were drawn directly from Alzheimer&#x2019;s Disease Neuroimaging Initiative (ADNI) study (<inline-formula id="inf79">
<mml:math id="m87">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>808</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>). Pedigree simulator was used to simulate various types of pedigree structures and the gene-dropping method (<xref ref-type="bibr" rid="B16">Huang, Thomas, and Vieland, 2013</xref>) was implemented to generate the genotypes of offsprings. Each simulation scenario was replicated 100 times. We randomly split the simulated data into a testing set with 20% samples and a training set with the remaining 80% samples. Pearson correlations and root mean square errors (RMSE) that are calculated based on testing samples were reported for each method.</p>
</sec>
<sec id="s4">
<title>Scenario 1: The impact of disease model</title>
<p>In this set of simulations, we evaluated the performance of our proposed method under three types of disease models, including outcomes are affected by 1) shared environmental factors only, 2) genetic factors only, and 3) both environmental and genetic factors.</p>
<sec id="s4-1">
<title>The outcome is affected by shared environmental factors only</title>
<p>To evaluate the impact of shared environmental factors, we randomly selected 3 genes from ADNI dataset and none of them was set to be causal. For simplicity and without loss of generality, we considered mixed two-generation pedigree structures, including a) half-sibling (<xref ref-type="sec" rid="s13">Supplementary Figure S1A</xref>), parents with two offspring (<xref ref-type="sec" rid="s13">Supplementary Figure S1B</xref>) and parents with four offspring (<xref ref-type="sec" rid="s13">Supplementary Figure S1C</xref>). We used 808 samples from ADNI study as founders and formed a total of 394 families including 2040 individuals, which contained 150 individuals from 30 pedigrees of half-siblings, 708 individuals from 177 pedigrees of parents with two offsprings, and 1182 individuals from 197 pedigrees of parents with four offsprings. We simulated the outcomes as <inline-formula id="inf80">
<mml:math id="m88">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3f5;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf81">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the shared environmental effects for family <inline-formula id="inf82">
<mml:math id="m90">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf83">
<mml:math id="m91">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. It is straightforward to show that <inline-formula id="inf84">
<mml:math id="m92">
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">I</mml:mi>
<mml:msup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf85">
<mml:math id="m93">
<mml:mrow>
<mml:mi mathvariant="bold">K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a block diagonal matrix with each block being a matrix with all elements equal to 1. Therefore, we simulated the outcomes using <inline-formula id="inf86">
<mml:math id="m94">
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">I</mml:mi>
<mml:msup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where the percentage of the outcome variance explained by shared environmental factors increased from 25% to 75%.</p>
<p>Pearson correlations and RMSEs are shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. As expected, FBLMM significantly outperformed DPRVB, MultiBLUP and BLMM when shared environmental factors significantly contributed to disease risk. In addition, the prediction accuracy for FBLMM increases as the effects from shared environmental factors increase, but it remains almost unchanged for the other three methods. This is mainly because FBLMM is specifically designed to utilize information from family design for improved prediction. Although adjusting for the relatedness among family members makes it statistically valid to apply population-based methods on family-based studies, overlooking information embedded in the family design can lead to sub-optimal prediction performance. While DPRVB, MultiBLUP and BLMM have similar performance, BLMM tends to be slightly better. This is mainly because BLMM is flexible to the underlying disease models. While MultiBLUP assumes an infinitesimal effect model and DPRVB assumes an isolated effect model, BLMM-based method (i.e., BLMM and FBLMM) can easily accommodate these two commonly used model assumptions.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The comparison of prediction accuracy when outcomes are only impacted by shared environmental factors. The heritability increases from 25% to 75%.</p>
</caption>
<graphic xlink:href="fgene-14-1267704-g002.tif"/>
</fig>
</sec>
<sec id="s4-2">
<title>The outcome is affected by genetic factors only</title>
<p>We evaluated the performance of FBLMM when only genetic variants, including both measured and unmeasured, contributed to the familial aggregation of traits. We first randomly selected three genes and set all of them as causal regions. We simulated the outcomes as <inline-formula id="inf87">
<mml:math id="m95">
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:msubsup>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf88">
<mml:math id="m96">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the genetic effect for region <inline-formula id="inf89">
<mml:math id="m97">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf90">
<mml:math id="m98">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf91">
<mml:math id="m99">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is an <inline-formula id="inf92">
<mml:math id="m100">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> matrix of genetic markers on gene <inline-formula id="inf93">
<mml:math id="m101">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf94">
<mml:math id="m102">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Causal genetic variants can be unmeasured in practice (<xref ref-type="bibr" rid="B40">Wen, and Lu, 2017</xref>). Therefore, we randomly selected one of the three causal genes as unmeasured (i.e., only two causal genes are in the final simulated dataset). We set the total heritability to be 60% with the proportion of heritability accounted by unmeasured variants changing from 25% to 75%. To evaluate the performance of FBLMM across a range of phenotypes, we first considered the case where outcomes were mainly caused by common variants, and set <inline-formula id="inf95">
<mml:math id="m103">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> for each predictor. We then simulated the cases where rare variants contributed substantially to disease risk. We simulated two models under such settings, where a beta-type of weights (denoted as BETA) <inline-formula id="inf96">
<mml:math id="m104">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>1,25</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and a weighted sum statistics type of weights were used.</p>
<p>Pearson correlations and RMSEs are shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. As the proportion of genetic variance explained by unmeasured effects increases, the prediction accuracy for all methods decreases with FLBMM decreased the least. For FBLMM, it has robust performance across all settings. When outcomes are mainly caused by common genetic variants (<xref ref-type="fig" rid="F3">Figure 3</xref>. A), FBLMM outperforms the other methods across all simulation settings and captures most of the heritability. This is mainly because FBLMM has an advantage in capturing the genetic effects from unmeasured variants via using the theoretical kinship coefficients. Not surprisingly, the performance of BLMM, MultiBLUP and DPRVB are very similar. When the disease outcomes were simulated under the assumption that rare variants had large contributions (<xref ref-type="fig" rid="F3">Figure 3</xref>. B; <xref ref-type="fig" rid="F3">Figure 3</xref>. C), FBLMM performs much better than the existing methods, and BLMM outperforms MultiBLUP and DPRVB. This is mainly because the weights in both FBLMM and BLMM are designed to capture the effects from rare variants. Therefore, FBLMM is expected to have a more robust performance through modeling familial correlations and up-weighting rare genetic variants.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The comparison of prediction accuracy when outcomes are affected by unmeasured genetic variants. The total heritability is 60%, and the percentage of heritability accounted by unmeasured variants increases from 25% to 75%. <bold>(A)</bold>: Common variants affect the outcomes (<inline-formula id="inf97">
<mml:math id="m105">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>). <bold>(B)</bold>: Rare variants affect the outcomes (<inline-formula id="inf98">
<mml:math id="m106">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>1,25</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>). <bold>(C)</bold>: Rare variants affect the outcomes <inline-formula id="inf99">
<mml:math id="m107">
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:math>
</inline-formula>.</p>
</caption>
<graphic xlink:href="fgene-14-1267704-g003.tif"/>
</fig>
</sec>
<sec id="s4-3">
<title>Outcome is affected by shared environmental and genetic factors</title>
<p>In this set of simulations, we evaluated the performance of FBLMM when outcomes were affected by both shared environmental and genetic factors. Three genes were randomly selected as causal, and outcomes were simulated under the following additive model:<disp-formula id="e9">
<mml:math id="m108">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">&#x3b1;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>where <inline-formula id="inf100">
<mml:math id="m109">
<mml:mrow>
<mml:mi mathvariant="bold">&#x3b1;</mml:mi>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf101">
<mml:math id="m110">
<mml:mrow>
<mml:mi mathvariant="bold">K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a block diagonal matrix. <inline-formula id="inf102">
<mml:math id="m111">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf103">
<mml:math id="m112">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi mathvariant="bold">G</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Similar to previous section, among the three causal genes, we randomly set one of them as unmeasured in the data. We gradually increased the percentages of variabilities explained by the shared environmental and genetic effects from 20% to 60%, and both factors contributed equally (i.e., <inline-formula id="inf104">
<mml:math id="m113">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>3</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>).</p>
<p>Pearson correlations and RMSEs are shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. As the proportion of variability explained by shared environmental and genetic factors increases, the proposed method tends to perform much better than the others. This is because FBLMM is designed to capture predictive effects from both genetic and environmental risk factors simultaneously, whereas the other methods have little ability to model them if they are not measured. Although it is well accepted that family history itself is an important predictor for many complex diseases, little efforts have been made to utilize information embedded in the family design. Our simulation shows that by using the design information, FBLMM can achieve robust performance and substantially improve the prediction models across a range of settings.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>The comparison of prediction accuracy when outcomes are affected by both shared environmental factors and genetic variants, including both measured and unmeasured. The totally heritability increases from 20% to 60%, with both genetic and environmental factors contributing equally.</p>
</caption>
<graphic xlink:href="fgene-14-1267704-g004.tif"/>
</fig>
</sec>
</sec>
<sec id="s5">
<title>Scenario 2: The impact of pedigree structures</title>
<p>In this set of simulations, we assessed the effects of pedigree structures on the performance of FBLMM. We considered the twin design (<xref ref-type="sec" rid="s13">Supplementary Figure S1D</xref>), the trio design (<xref ref-type="sec" rid="s13">Supplementary Figure S1E</xref>), and three-generation pedigree with mixed structures that include 24 avuncular, 30 double cousins, 42 grandparents and 278 sibling (<xref ref-type="sec" rid="s13">Supplementary Figures S1F&#x2013;I</xref>). We used Eq. <xref ref-type="disp-formula" rid="e9">9</xref> to simulate outcomes, where genetic variants on one causal gene is set as unmeasured. Let <inline-formula id="inf105">
<mml:math id="m114">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>u</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the cumulative predictive effects for the unmeasured gene, and Eq. <xref ref-type="disp-formula" rid="e9">9</xref> can be written as <inline-formula id="inf106">
<mml:math id="m115">
<mml:mrow>
<mml:mi mathvariant="bold">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">&#x3b1;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">g</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold">&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>We considered three types of disease models (<xref ref-type="sec" rid="s13">Supplementary Table S1</xref>: both measured and unmeasured genetic variants have equally contributed to disease risk (i.e., <inline-formula id="inf107">
<mml:math id="m116">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>u</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>); <inline-formula id="inf108">
<mml:math id="m117">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: shared environmental factors have major influences on disease risk, and measured genetic factors only make small contributions (i.e., <inline-formula id="inf109">
<mml:math id="m118">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x3e;</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>); and <inline-formula id="inf110">
<mml:math id="m119">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: both genetic and shared environmental factors were considered with unmeasured genetic variants making major contributions (i.e., <inline-formula id="inf111">
<mml:math id="m120">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>u</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x3e;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>). We set the total heritability for all disease models ranging from 20% to 60%, and the details of parameter settings for each disease model are summarized in <xref ref-type="sec" rid="s13">Supplementary Table S1</xref>.</p>
<p>The results when heritability is 40% are summarized in <xref ref-type="fig" rid="F5">Figure 5</xref>, and the others (i.e., <inline-formula id="inf112">
<mml:math id="m121">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>20</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf113">
<mml:math id="m122">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>60</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>) are shown in <xref ref-type="sec" rid="s13">Supplementary Figures S2, S3</xref>. Under disease model <inline-formula id="inf114">
<mml:math id="m123">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where measured and unmeasured genetic variants contributed equally to disease risk, <xref ref-type="fig" rid="F5">Figure 5A</xref> showed that the two-generation pedigree design has a higher prediction accuracy as compared to three-generation designs. This is mainly because relatives in two-generation pedigree have higher level of genetic relatedness than those that are far apart. Compared to existing methods, FBLMM worked the best across all pedigree structures under disease model <inline-formula id="inf115">
<mml:math id="m124">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and captured most of the heritability. Under disease model <inline-formula id="inf116">
<mml:math id="m125">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where shared environmental factors mainly contributed to disease risk, <xref ref-type="fig" rid="F5">Figure 5B</xref> showed that the existing methods (i.e., BLMM, MultiBLUP and DPR) have lower prediction performance as compared to FBLMM. FBLMM tended to perform similarly across all three pedigree structures considered, as shared environmental factors affect all individuals within the family in a similar fashion. Under disease model <inline-formula id="inf117">
<mml:math id="m126">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where both unmeasured and environmental factors contribute significantly to the trait, two-generation pedigree structure tended to have higher prediction accuracy than the three-generation pedigree design (<xref ref-type="fig" rid="F5">Figure 5C</xref>). Regardless of the pedigree structures and disease models considered, our proposed FBLMM always outperformed the other methods (i.e., BLMM, MultiBLUP and DPR). This indicates that FBLMM has robust performance in capturing the predictive effects from shared environmental and unmeasured genetic factors regardless of the pedigree structures. When the heritability is set to be 20% and 60%, the trend remains the same (<xref ref-type="sec" rid="s13">Supplementary Figures S2, S3</xref>). By using the family design information, FBLMM has substantially enhanced the prediction accuracy, and the improvement is robust against various pedigree structures.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The comparison of prediction accuracy under different pedigree structures (<inline-formula id="inf118">
<mml:math id="m127">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>40</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>). Three disease models are considered: <bold>(A)</bold> both measured and unmeasured genetic variants contributed to disease risk; <bold>(B)</bold> shared environmental and measured genetic factors affected outcomes; <bold>(C)</bold> all genetic variants (measured and unmeasured) and shared environmental factors contributed to disease risk.</p>
</caption>
<graphic xlink:href="fgene-14-1267704-g005.tif"/>
</fig>
</sec>
<sec id="s6">
<title>Real data application</title>
<p>The proposed method is applied to predict aggressive behavior utilizing the dataset obtained from the Behavioral and Emotional Development in Children (TBED-C) study. TBED-C is a family-based twin study, aimed at discovering genetic factors that contribute to conduct problems in children (<xref ref-type="bibr" rid="B2">Burt and Klump, 2012</xref>). TBED-C recruited 1000 twins aged between 6 and 10 years from 500 twin families in Michigan, including 50% monozygous twins. DNA samples were collected from each pair of twins. The sequencing was performed using the Illumina Human Core Exome chip, which includes common variants, rare variants, mitochondrial DNA, and indels. Samples with missing rate <inline-formula id="inf119">
<mml:math id="m128">
<mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>3</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> were excluded. Single nucleotide variants (SNVs) were removed if any of the following exclusion criteria was met: 1) call rate <inline-formula id="inf120">
<mml:math id="m129">
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>98</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and 2) a <italic>p</italic>-value for Hardy&#x2013;Weinberg equilibrium test <inline-formula id="inf121">
<mml:math id="m130">
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. After the quality control filtering, there are 957 samples and 513,886 SNVs remained for the analysis. Parents completed the child behavior checklist for each twin separately by rating a series of questionnaires, where children&#x2019;s competencies, behavioral and emotional problems were assessed (<xref ref-type="bibr" rid="B2">Burt and Klump, 2012</xref>). Teacher(s) of each twin also completed the report form. Using the recommended approach (<xref ref-type="bibr" rid="B2">Burt and Klump, 2012</xref>), we assessed children&#x2019;s aggressive behavior by averaging the raw scale scores from both the parents&#x2019; and teachers&#x2019; reports. The distribution of the aggressive scales (AGG, <inline-formula id="inf122">
<mml:math id="m131">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>3.70</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf123">
<mml:math id="m132">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>d</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>3.59</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) is shown in <xref ref-type="sec" rid="s13">Supplementary Figure S4</xref>.</p>
<p>First, to avoid over-fitting and the chance finding problems, 20% samples were randomly select for testing and the rest 80% was used for training. In the training dataset, we assessed the marginal significance for each marker using a linear hybrid model in the GCTA software package (<xref ref-type="bibr" rid="B42">Yang et al., 2011</xref>). Common variants with <italic>p</italic> values <inline-formula id="inf124">
<mml:math id="m133">
<mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0.1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> were filtered out from risk prediction analysis. As a result, approximately 25,168 SNVs remained. This pre-selection aimed to prune a large number of predictors down drastically to a more manageable size, and improve computational speed. We applied all evaluated methods (i.e., FBLMM, BLMM, MulitBLUP and DPRVB) to the remaining genetic variants. Finally, we validated the trained FBLMM model using the test set. The prediction performance was evaluated using Pearson correlation and RMSE. This process was repeated 100&#xa0;times.</p>
<p>Pearson correlations and the RMSEs are shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. Similar to results from simulations, <xref ref-type="fig" rid="F6">Figure 6</xref> shows FBLMM performed much better than the others. This clearly indicates that simply adjusting for relatedness among family members can overlook key information, leading to a less accurate risk prediction model. On contrary, utilizing information embedded in the family design can substantially improve prediction accuracy, as this makes the model more flexible to capture the predictive effects from unknown genetic and shared environmental risk factors.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>The comparison of prediction accuracy for aggressive scale.</p>
</caption>
<graphic xlink:href="fgene-14-1267704-g006.tif"/>
</fig>
</sec>
<sec id="s7">
<title>Discussion and conclusions</title>
<p>In this paper, we have developed a novel FBLMM method for risk prediction analysis on sequencing data obtained from family-based genetic studies. Fundamentally different from existing methods that adjust for family correlations, FBLMM utilizes this relatedness to further improve prediction accuracy. Specifically, it forms two surrogates, including a theoretical kinship coefficient matrix (i.e., <inline-formula id="inf125">
<mml:math id="m134">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) and a block diagonal matrix (i.e., <inline-formula id="inf126">
<mml:math id="m135">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>), to capture effects from unmeasured genetic and shared environmental factors. In addition, FBLMM extends the BLMM method proposed by (<xref ref-type="bibr" rid="B13">Hai and Wen, 2020</xref>), and thus it inherits all the advantages in the BLMM method. For example, it infers its parameters using variational Bayes algorithm rather than the traditional MCMC, making it much more computationally efficient. <xref ref-type="sec" rid="s13">Supplementary Table S2</xref> provided the details of computational resources needed as the sample size and the number of variants increase. Furthermore, it can capture predictive effects from both common and rare variants, and easily accommodate various model assumptions (e.g., isolated large effects and infinitesimal model). It is worth noting that although we mainly focused on genetic variants, our proposed framework has the intrinsic capacity in modeling the predictive effects from important demographic variables, where their predictive effects can be selected and modelled through the fixed effects (i.e., <inline-formula id="inf127">
<mml:math id="m136">
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
<mml:mi mathvariant="bold">&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) in our model. For example, in addition to genetic information, we can add family history, age and gender into the fixed effect part (i.e., <inline-formula id="inf128">
<mml:math id="m137">
<mml:mrow>
<mml:mi mathvariant="bold">X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) of our model, and their predictive effects can be directly estimated by our proposed framework. Through simulation studies, we have shown that FBLMM can yield higher prediction accuracy than existing methods, and our analysis on Michigan Twin data has also showed that FBLMM can better predict AGG.</p>
<p>The importance of genetic and environmental factors in risk prediction has long been appreciated (<xref ref-type="bibr" rid="B29">Nilsson et al., 2004</xref>). Many previous studies have shown that a substantial amount of heritability can be explained by family information due to a combination of genetic factors and shared environmental conditions (<xref ref-type="bibr" rid="B1">Bermejo and Hemminki, 2005</xref>; <xref ref-type="bibr" rid="B12">Gim et al., 2017</xref>). The family information can be helpful in identifying sub-populations that are at high risk (<xref ref-type="bibr" rid="B23">MacInnis et al., 2011</xref>; <xref ref-type="bibr" rid="B33">So et al., 2011</xref>; <xref ref-type="bibr" rid="B12">Gim et al., 2017</xref>). Despite its clinical importance, few methods fully use this information when building risk prediction models based on high-dimensional genomic data obtained from family-based studies. Existing analytical methods are usually an extension of the models designed for population-based studies, and thus they tend to make the observations un-correlated before estimating the predictive effects from genetic variants (<xref ref-type="bibr" rid="B27">Meigs et al., 2008</xref>). While this most common practice can allow researchers to build a statistically valid risk prediction model using genomic data from family-based study designs, it overlooks important information embedded in the design, leading to a model with decreased prediction accuracy. In this study, one of the key features of our proposed model is that it utilizes the family design to improve prediction model, rather than simply adjusting for the correlations among family members. Based on the design information, we formed two surrogate measures, including a theoretical kinship coefficient matrix (i.e., <inline-formula id="inf129">
<mml:math id="m138">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) and a block diagonal matrix (i.e., <inline-formula id="inf130">
<mml:math id="m139">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>), to capture the impacts of genetic and environmental risk factors. As shown in our simulation studies (<xref ref-type="fig" rid="F2">Figure 2</xref> to <xref ref-type="fig" rid="F5">Figure 5</xref>) and the analysis of TBED-C dataset (<xref ref-type="fig" rid="F6">Figure 6</xref>), we have shown that FBLMM have outperformed commonly used methods via using the design information, indicating our proposed method has the capacity to substantially improve prediction models for family-based studies.</p>
<p>Rare variants of large effects can play an important role in complex human diseases (<xref ref-type="bibr" rid="B11">Gaukrodger et al., 2005</xref>). It has been reported that the largest contributions to genetic risk of human diseases can come from rare variants (<xref ref-type="bibr" rid="B24">Mancuso et al., 2016</xref>; <xref ref-type="bibr" rid="B15">Hernandez et al., 2019</xref>). However, few family-based genetic studies are powerful enough to model these effects, primarily due to the lack of efficient analytical methods (<xref ref-type="bibr" rid="B26">McIntosh et al., 2016</xref>). We have recently developed BLMM for risk prediction studies using genomic data from population-based study designs (<xref ref-type="bibr" rid="B13">Hai and Wen, 2020</xref>), and BLMM has achieved an improved prediction accuracy through simultaneously considering both common and rare variants. Instead of modeling individual predictive effects that are hard to estimate for rare variants, BLMM models the cumulative predictive effects from a group of variables that include both common and rare variants. BLMM uses a WSS weight function that has been used in association analysis of sequencing data to address the contributions of rare variants (<xref ref-type="bibr" rid="B41">Wu et al., 2011</xref>), and this leads to an improvement for prediction studies. Our proposed FBLMM is built within the BLMM framework, and thus it inherits BLMM&#x2019;s capacity in modeling rare variants. Same as BLMM, FBLMM uses the WSS function to up-weight the rare variants so that their predictive effects can be effectively captured. As shown in simulations, FBLMM can achieve better assessment, when outcomes were simulated under the assumption that rare variants significantly contribute to the risk (<xref ref-type="fig" rid="F2">Figures 2B, C</xref>).</p>
<p>One of the limitations of our method is that it overlooks the contributions of non-additive effects, especially interactions. As indicated in existing literature (<xref ref-type="bibr" rid="B39">Weissbrod, Geiger, and Rosset, 2016</xref>), non-linear predictive effects (e.g., epistasis) widely exist. Therefore, it is important to incorporate non-additive effects into risk prediction models. A potential solution within the FBLMM framework is to kernelize the variance-covariance matrix of the random effect terms, so that the assumed relationships between predictors and outcomes can be non-linear. For example, similar to MKLMM (<xref ref-type="bibr" rid="B39">Weissbrod, Geiger, and Rosset, 2016</xref>), polynomial kernel of two degrees of freedom and the saturating pathway kernel can be used to capture non-linear predictive effects. This will be a future direction of our research.</p>
<p>In summary, we have proposed a Bayesian linear mixed model for risk prediction analysis on genomic data obtained from family-based study designs. Our proposed FBLMM extends the BLMM method, and thus it can not only capture the predictive effects from both common and rare variants, but also accommodate various disease model assumptions. In addition, using study design information, FBLMM forms two surrogates to model the predictive effects from unmeasured/unknown genetic and environmental risk factors, which substantially facilitates family-based prediction studies. The algorithm implementing our proposed method is available at <ext-link ext-link-type="uri" xlink:href="https://github.com/yhai943/FBLMM">https://github.com/yhai943/FBLMM</ext-link>.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s8">
<title>Data availability statement</title>
<p>The R package is available at <ext-link ext-link-type="uri" xlink:href="https://github.com/yhai943/FBLMM">https://github.com/yhai943/FBLMM</ext-link>. The data presented in the study are deposited at figshare repository (<ext-link ext-link-type="uri" xlink:href="https://figshare.com/s/02c32e50c5b7529c0fb5">https://figshare.com/s/02c32e50c5b7529c0fb5</ext-link>). The use of the original genotype data is subject to the approval from the TBED-C study team (<ext-link ext-link-type="uri" xlink:href="https://msutwinstudies.com/msutr-data">https://msutwinstudies.com/msutr-data</ext-link>).</p>
</sec>
<sec id="s9">
<title>Author contributions</title>
<p>YH: Conceptualization, Methodology, Formal Analysis, Visualization, Writing&#x2013;original draft. WZ: Writing&#x2013;original draft, Software. QM: Formal Analysis. LL: Supervision. YW: Conceptualization, Methodology, Writing&#x2013;review and editing, Project administration, Supervision.</p>
</sec>
<sec id="s10">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This project is funded by the National Natural Science Foundation of China (Award No. 82173632), the Early Career Research Excellence Award from the University of Auckland, and the Marsden Fund from Royal Society of New Zealand (Project No. 19-UOA-209).</p>
</sec>
<ack>
<p>The author(s) wish to acknowledge the use of New Zealand eScience Infrastructure (NeSI) high performance computing facilities, consulting support and/or training services as part of this research. New Zealand&#x2019;s national facilities are provided by NeSI and funded jointly by NeSI&#x2019;s collaborator institutions and through the Ministry of Business, Innovation and Employment&#x2019;s Research Infrastructure programme.</p>
</ack>
<sec sec-type="COI-statement" id="s11">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s12">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s13">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2023.1267704/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2023.1267704/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Presentation1.PDF" id="SM1" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bermejo</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Hemminki</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Familial lung cancer and aggregation of smoking habits: A simulation of the effect of shared environmental factors on the familial risk of cancer</article-title>. <source>Cancer Epidemiol. Prev. Biomarkers</source> <volume>14</volume> (<issue>7</issue>), <fpage>1738</fpage>&#x2013;<lpage>1740</lpage>. <pub-id pub-id-type="doi">10.1158/1055-9965.EPI-05-0201</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Burt</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Klump</surname>
<given-names>K. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Etiological distinctions between aggressive and non-aggressive antisocial behavior: results from a nuclear twin family model</article-title>. <source>J. Abnorm. Child Psychol.</source> <volume>40</volume> (<issue>7</issue>), <fpage>1059</fpage>&#x2013;<lpage>1071</lpage>. <pub-id pub-id-type="doi">10.1007/s10802-012-9632-9</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carvalho</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lucas</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Nevins</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>West</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>High-dimensional sparse factor modeling: applications in gene expression genomics</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>103</volume> (<issue>484</issue>), <fpage>1438</fpage>&#x2013;<lpage>1456</lpage>. <pub-id pub-id-type="doi">10.1198/016214508000000869</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>C.Y.-C.</given-names>
</name>
<name>
<surname>Scurrah</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Stankovich</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Garoufalis</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Dirani</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Per-tile</surname>
<given-names>K. K.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>Heritability and shared environment estimates for myopia and associated ocular biometric traits: the genes in myopia (gem) family study</article-title>. <source>Hum. Genet.</source> <volume>121</volume> (<issue>3-4</issue>), <fpage>511</fpage>&#x2013;<lpage>520</lpage>. <pub-id pub-id-type="doi">10.1007/s00439-006-0312-0</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Dunson</surname>
<given-names>D. B.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Random effects selection in linear mixed models</article-title>. <source>Biometrics</source> <volume>59</volume> (<issue>4</issue>), <fpage>762</fpage>&#x2013;<lpage>769</lpage>. <pub-id pub-id-type="doi">10.1111/j.0006-341x.2003.00089.x</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Couillard</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Despr&#xb4;es</surname>
<given-names>J.-P.</given-names>
</name>
<name>
<surname>Lamarche</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bergeron</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gagnon</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Leon</surname>
<given-names>A. S.</given-names>
</name>
<etal/>
</person-group> (<year>2001</year>). <article-title>Effects of endurance exer-cise training on plasma HDL cholesterol levels depend on levels of triglycerides: evidence from men of the health, risk factors, exercise training and genetics (heritage) family study</article-title>. <source>Arteriosclerosis, thrombosis, Vasc. Biol.</source> <volume>21</volume> (<issue>7</issue>), <fpage>1226</fpage>&#x2013;<lpage>1232</lpage>. <pub-id pub-id-type="doi">10.1161/hq0701.092137</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cruceanu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ambalavanan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Spiegelman</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gauthier</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lafreni`ere</surname>
<given-names>R. G.</given-names>
</name>
<name>
<surname>Dion</surname>
<given-names>P. A.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Family-based exome-sequencing approach identifies rare susceptibility variants for lithium-responsive bipolar disorder</article-title>. <source>Genome</source> <volume>56</volume> (<issue>10</issue>), <fpage>634</fpage>&#x2013;<lpage>640</lpage>. <pub-id pub-id-type="doi">10.1139/gen-2013-0081</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dickson</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Krantz</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Hakonarson</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Goldstein</surname>
<given-names>D. B.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Rare vari-ants create synthetic genome-wide associations</article-title>. <source>PLoS Biol.</source> <volume>8</volume> (<issue>1</issue>), <fpage>1000294</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pbio.1000294</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dirani</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chamberlain</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shekar</surname>
<given-names>S. N.</given-names>
</name>
<name>
<surname>Islam</surname>
<given-names>A. F.</given-names>
</name>
<name>
<surname>Garoufalis</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C. Y.</given-names>
</name>
<etal/>
</person-group> (<year>2006</year>). <article-title>Heritability of refractive error and ocular bio-metrics: the genes in myopia (gem) twin study</article-title>. <source>Investigative Ophthalmol. Vis. Sci.</source> <volume>47</volume> (<issue>11</issue>), <fpage>4756</fpage>&#x2013;<lpage>4761</lpage>. <pub-id pub-id-type="doi">10.1167/iovs.06-0270</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Fernandes</surname>
<given-names>V.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). &#x201c;<article-title>Bernoulli&#x2013;Gaussian distribution with memory as a model for power line communication noise</article-title>,&#x201d; in <conf-name>Proc. Braz. Telecommun. Signal Process. Symp</conf-name>, <conf-loc>S&#xe3;o Pedro, Brazil</conf-loc>, <conf-date>September 2017</conf-date>, <fpage>328</fpage>&#x2013;<lpage>332</lpage>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gaukrodger</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Mayosi</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Imrie</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Avery</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Baker</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Connell</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>A rare variant of the leptin gene has large effects on blood pressure and carotid intima-medial thickness: A study of 1428 individuals in 248 families</article-title>. <source>J. Med. Genet.</source> <volume>42</volume> (<issue>6</issue>), <fpage>474</fpage>&#x2013;<lpage>478</lpage>. <pub-id pub-id-type="doi">10.1136/jmg.2004.027631</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gim</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kwak</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>K. S.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Improving disease prediction by incorporating family disease history in risk prediction models with large-scale genetic data</article-title>. <source>Genetics</source> <volume>207</volume> (<issue>3</issue>), <fpage>1147</fpage>&#x2013;<lpage>1155</lpage>. <pub-id pub-id-type="doi">10.1534/genetics.117.300283</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A Bayesian linear mixed model for prediction of com-plex traits</article-title>. <source>Bioinformatics</source> <volume>36</volume> (<issue>22-23</issue>), <fpage>5415</fpage>&#x2013;<lpage>5423</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa1023</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Helgadottir</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gretarsdottir</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Thorleifsson</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Hjartarson</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Sigurdsson</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Magnusdottir</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Variants with large effects on blood lipids and the role of cholesterol and triglycerides in coronary disease</article-title>. <source>Nat. Genet.</source> <volume>48</volume> (<issue>6</issue>), <fpage>634</fpage>&#x2013;<lpage>639</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3561</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hernandez</surname>
<given-names>R. D.</given-names>
</name>
<name>
<surname>Uricchio</surname>
<given-names>L. H.</given-names>
</name>
<name>
<surname>Hartman</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dahl</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zaitlen</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ultrarare variants drive substantial cis heritability of human gene expression</article-title>. <source>Nat. Genet.</source> <volume>51</volume> (<issue>9</issue>), <fpage>1349</fpage>&#x2013;<lpage>1355</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-019-0487-7</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Vieland</surname>
<given-names>V. J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Employing MCMC under the PPL frame-work to analyze sequence data in large pedigrees</article-title>. <source>Front. Genet.</source> <volume>4</volume>, <fpage>59</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2013.00059</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ionita-Laza</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Ottman</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Study designs for identification of rare disease variants in complex diseases: the utility of family-based designs</article-title>. <source>Genetics</source> <volume>189</volume> (<issue>3</issue>), <fpage>1061</fpage>&#x2013;<lpage>1068</lpage>. <pub-id pub-id-type="doi">10.1534/genetics.111.131813</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Foo</surname>
<given-names>J. N.</given-names>
</name>
<name>
<surname>O&#x2019;Roak</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Larson</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Simon</surname>
<given-names>D. B.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>Rare independent mutations in renal salt handling genes contribute to blood pressure variation</article-title>. <source>Nat. Genet.</source> <volume>40</volume> (<issue>5</issue>), <fpage>592</fpage>&#x2013;<lpage>599</lpage>. <pub-id pub-id-type="doi">10.1038/ng.118</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laitinen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Rasanen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kaprio</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Koskenvuo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Laitinen</surname>
<given-names>L. A.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>Importance of genetic factors in adolescent asthma: A population-based twin-family study</article-title>. <source>Am. J. Respir. Crit. care Med.</source> <volume>157</volume> (<issue>4</issue>), <fpage>1073</fpage>&#x2013;<lpage>1078</lpage>. <pub-id pub-id-type="doi">10.1164/ajrccm.157.4.9704041</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lali</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Chong</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Omidi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mohammadi-Shemirani</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pare</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Calibrated rare variant genetic risk scores for complex disease prediction using large exome sequence repositories</article-title>. <source>bioRxiv</source>. <pub-id pub-id-type="doi">10.1164/ajrccm.157.4.9704041</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Emond</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Bamshad</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Barnes</surname>
<given-names>K. C.</given-names>
</name>
<name>
<surname>Rieder</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Nickerson</surname>
<given-names>D. A.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Optimal unified approach for rare-variant association testing with application to small-sample case-control whole-exome sequencing studies</article-title>. <source>Am. J. Hum. Genet.</source> <volume>91</volume> (<issue>2</issue>), <fpage>224</fpage>&#x2013;<lpage>237</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2012.06.007</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lichtenstein</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yip</surname>
<given-names>B. H.</given-names>
</name>
<name>
<surname>Bj&#xa8;ork</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pawitan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cannon</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Sullivan</surname>
<given-names>P. F.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Common genetic determinants of schizophrenia and bipolar disor-der in Swedish families: A population-based study</article-title>. <source>Lancet</source> <volume>373</volume> (<issue>9659</issue>), <fpage>234</fpage>&#x2013;<lpage>239</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(09)60072-6</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>MacInnis</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Antoniou</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Eeles</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Severi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Al Olama</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>McGuf-fog</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>A risk prediction algorithm based on family history and common genetic variants: application to prostate cancer with potential clinical impact</article-title>. <source>Genet. Epidemiol.</source> <volume>35</volume> (<issue>6</issue>), <fpage>549</fpage>&#x2013;<lpage>556</lpage>. <pub-id pub-id-type="doi">10.1002/gepi.20605</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mancuso</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Rohland</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Rand</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Tandon</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Quinque</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>The contribution of rare variation to prostate cancer heritability</article-title>. <source>Nat. Genet.</source> <volume>48</volume> (<issue>1</issue>), <fpage>30</fpage>&#x2013;<lpage>35</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3446</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marateb</surname>
<given-names>H. R.</given-names>
</name>
<name>
<surname>Mohebian</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Javanmard</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Tavallaei</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Tajadini</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Heidari-Beni</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Prediction of dyslipidemia using gene mutations, family history of diseases and anthropometric indicators in children and adolescents: the caspian-iii study</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>16</volume>, <fpage>121</fpage>&#x2013;<lpage>130</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2018.02.009</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McIntosh</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Hall</surname>
<given-names>L. S.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Adams</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Gibson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wigmore</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Genetic and environmental risk for chronic pain and the contribution of risk variants for major depressive disorder: A family-based mixed-model analysis</article-title>. <source>PLoS Med.</source> <volume>13</volume> (<issue>8</issue>), <fpage>1002090</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pmed.1002090</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meigs</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Shrader</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Sullivan</surname>
<given-names>L. M.</given-names>
</name>
<name>
<surname>McAteer</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Fox</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Dupuis</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>Genotype score in addition to common risk factors for prediction of type 2 diabetes</article-title>. <source>N. Engl. J. Med.</source> <volume>359</volume> (<issue>21</issue>), <fpage>2208</fpage>&#x2013;<lpage>2219</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMoa0804742</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mihaescu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pencina</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Alonso</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lunetta</surname>
<given-names>K. L.</given-names>
</name>
<name>
<surname>Heckbert</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Ben-jamin</surname>
<given-names>E. J.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Incremental value of rare genetic variants for the prediction of multifactorial diseases</article-title>. <source>Genome Med.</source> <volume>5</volume> (<issue>8</issue>), <fpage>76</fpage>. <pub-id pub-id-type="doi">10.1186/gm480</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nilsson</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Salonen Ros</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Cnattingius</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lichtenstein</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>The importance of genetic and environmental effects for pre-eclampsia and gestational hypertension: A family study</article-title>. <source>BJOG Int. J. Obstetrics Gynaecol.</source> <volume>111</volume> (<issue>3</issue>), <fpage>200</fpage>&#x2013;<lpage>206</lpage>. <pub-id pub-id-type="doi">10.1111/j.1471-0528.2004.00042x.x</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peloso</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Auer</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Bis</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Voorman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Morrison</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Stitziel</surname>
<given-names>N. O.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Association of low-frequency and rare coding-sequence variants with blood lipids and coronary heart disease in 56,000 whites and blacks</article-title>. <source>Am. J. Hum. Genet.</source> <volume>94</volume> (<issue>2</issue>), <fpage>223</fpage>&#x2013;<lpage>232</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2014.01.009</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ruderfer</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Korn</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Purcell</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Family-based genetic risk prediction of multifactorial disease</article-title>. <source>Genome Med.</source> <volume>2</volume> (<issue>1</issue>), <fpage>2</fpage>. <pub-id pub-id-type="doi">10.1186/gm123</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ramachandrappa</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Raimondo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cali</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Keogh</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Henning</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Saeed</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Rare variants in single-minded 1 (SIM1) are associated with severe obesity</article-title>. <source>J. Clin. investigation</source> <volume>123</volume> (<issue>7</issue>), <fpage>3042</fpage>&#x2013;<lpage>3050</lpage>. <pub-id pub-id-type="doi">10.1172/JCI68016</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>So</surname>
<given-names>H.-C.</given-names>
</name>
<name>
<surname>Kwan</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Cherny</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Sham</surname>
<given-names>P. C.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Risk prediction of complex diseases from family history and known susceptibility loci, with applications for cancer screening</article-title>. <source>Am. J. Hum. Genet.</source> <volume>88</volume> (<issue>5</issue>), <fpage>548</fpage>&#x2013;<lpage>565</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2011.04.001</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Speed</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Balding</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>MultiBLUP: improved snp-based prediction for complex traits</article-title>. <source>Genome Res.</source> <volume>24</volume> (<issue>9</issue>), <fpage>1550</fpage>&#x2013;<lpage>1557</lpage>. <pub-id pub-id-type="doi">10.1101/gr.169375.113</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stefansson</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Rujescu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Cichon</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pietil&#xa8;ainen</surname>
<given-names>O. P.</given-names>
</name>
<name>
<surname>Ingason</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Steinberg</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>Large recurrent microdeletions associated with schizophrenia</article-title>. <source>nature</source> <volume>455</volume> (<issue>7210</issue>), <fpage>232</fpage>&#x2013;<lpage>236</lpage>. <pub-id pub-id-type="doi">10.1038/nature07229</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Valdez</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Greenlund</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Khoury</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Yoon</surname>
<given-names>P. W.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Is family history a use-ful tool for detecting children at risk for diabetes and cardiovascular diseases? A public health perspective</article-title>. <source>Pediatrics</source> <volume>120</volume>, <fpage>78</fpage>&#x2013;<lpage>86</lpage>. <comment>SUPPLEMENT 2</comment>. <pub-id pub-id-type="doi">10.1542/peds.2007-1010G</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>VanRaden</surname>
<given-names>P. M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Efficient methods to compute genomic predictions</article-title>. <source>J. Dairy Sci.</source> <volume>91</volume> (<issue>11</issue>), <fpage>4414</fpage>&#x2013;<lpage>4423</lpage>. <pub-id pub-id-type="doi">10.3168/jds.2007-0980</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J. Z.</given-names>
</name>
<name>
<surname>Payne</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Introduction to deep sequenc-ing and its application to drug addiction research with a focus on rare variants</article-title>. <source>Mol. Neurobiol.</source> <volume>49</volume> (<issue>1</issue>), <fpage>601</fpage>&#x2013;<lpage>614</lpage>. <pub-id pub-id-type="doi">10.1007/s12035-013-8541-4</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weissbrod</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Geiger</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Rosset</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Multikernel linear mixed models for com-plex phenotype prediction</article-title>. <source>Genome Res.</source> <volume>26</volume> (<issue>7</issue>), <fpage>969</fpage>&#x2013;<lpage>979</lpage>. <pub-id pub-id-type="doi">10.1101/gr.201996.115</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Risk prediction modeling on family-based sequencing data using a random field method</article-title>. <source>Genetics</source> <volume>117</volume>. <pub-id pub-id-type="doi">10.1534/genetics.117.199752</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Boehnke</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Rare-variant associa-tion testing for sequencing data with the sequence kernel association test</article-title>. <source>Am. J. Hum. Genet.</source> <volume>89</volume> (<issue>1</issue>), <fpage>82</fpage>&#x2013;<lpage>93</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2011.05.029</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Goddard</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Visscher</surname>
<given-names>P. M.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Gcta: A tool for genome-wide complex trait analysis</article-title>. <source>Am. J. Hum. Genet.</source> <volume>88</volume> (<issue>1</issue>), <fpage>76</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2010.11.011</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Non-parametric genetic prediction of complex traits with latent Dirichlet process regression models</article-title>. <source>Nat. Commun.</source> <volume>8</volume> (<issue>1</issue>), <fpage>456</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-017-00470-2</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Carbonetto</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Stephens</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Polygenic modeling with Bayesian sparse linear mixed models</article-title>. <source>PLoS Genet.</source> <volume>9</volume> (<issue>2</issue>), <fpage>1003264</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1003264</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>