<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fgene.2014.00293</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Methods Article</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A bioinformatics workflow for detecting signatures of selection in genomic data</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Cadzow</surname> <given-names>Murray</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://community.frontiersin.org/people/u/177787"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Boocock</surname> <given-names>James</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://community.frontiersin.org/people/u/163483"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Nguyen</surname> <given-names>Hoang T.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://community.frontiersin.org/people/u/163341"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wilcox</surname> <given-names>Phillip</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://community.frontiersin.org/people/u/175326"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Merriman</surname> <given-names>Tony R.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://community.frontiersin.org/people/u/23431"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Black</surname> <given-names>Michael A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://community.frontiersin.org/people/u/34584"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Biochemistry, University of Otago</institution> <country>Dunedin, New Zealand</country></aff>
<aff id="aff2"><sup>2</sup><institution>Virtual Institute of Statistical Genetics</institution> <country>Rotorua, New Zealand</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Mathematics and Statistics, University of Otago</institution> <country>Dunedin, New Zealand</country></aff>
<aff id="aff4"><sup>4</sup><institution>New Zealand Forest Research Institute Ltd</institution> <country>Rotorua, New Zealand</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Mensur Dlakic, Montana State University, USA</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Joanna Kelley, Stanford University, USA; Felipe Da Veiga Leprevost, Fiocruz, Brazil</p></fn>
<fn fn-type="corresp" id="fn001"><p>&#x0002A;Correspondence: Michael A. Black, Department of Biochemistry, University of Otago, PO Box 56, 710 Cumberland Street, Dunedin 9054, New Zealand e-mail: <email>mik.black&#x00040;otago.ac.nz</email></p></fn>
<fn fn-type="other" id="fn002"><p>This article was submitted to Bioinformatics and Computational Biology, a section of the journal Frontiers in Genetics.</p></fn>
<fn fn-type="present-address" id="fn003"><p>&#x02020; These authors have contributed equally to this work.</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>26</day>
<month>08</month>
<year>2014</year>
</pub-date>
<pub-date pub-type="collection">
<year>2014</year>
</pub-date>
<volume>5</volume>
<elocation-id>293</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>05</month>
<year>2014</year>
</date>
<date date-type="accepted">
<day>06</day>
<month>08</month>
<year>2014</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2014 Cadzow, Boocock, Nguyen, Wilcox, Merriman and Black.</copyright-statement>
<copyright-year>2014</copyright-year>
<license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/3.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) or licensor are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract><p>The detection of &#x0201C;signatures of selection&#x0201D; is now possible on a genome-wide scale in many plant and animal species, and can be performed in a population-specific manner due to the wealth of per-population genome-wide genotype data that is available. With genomic regions that exhibit evidence of having been under selection shown to also be enriched for genes associated with biologically important traits, detection of evidence of selective pressure is emerging as an additional approach for identifying novel gene-trait associations. While high-density genotype data is now relatively easy to obtain, for many researchers it is not immediately obvious how to go about identifying signatures of selection in these data sets. Here we describe a basic workflow, constructed from open source tools, for detecting and examining evidence of selection in genomic data. Code to install and implement the pipeline components, and instructions to run a basic analysis using the workflow described here, can be downloaded from our public GitHub repository: <ext-link ext-link-type="uri" xlink:href="http://www.github.com/smilefreak/selectionTools/">http://www.github.com/smilefreak/selectionTools/</ext-link></p></abstract>
<kwd-group>
<kwd>signatures of selection</kwd>
<kwd>genomics</kwd>
<kwd>genome-wide</kwd>
<kwd>analysis pipeline</kwd>
</kwd-group>
<counts>
<fig-count count="1"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="60"/>
<page-count count="8"/>
<word-count count="6493"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="introduction" id="s1">
<title>Introduction</title>
<p>With the increased availability of whole-genome genotype data, it is possible to identify regions of the genome that exhibit evidence of having been subjected to selective pressure (e.g., Sabeti et al., <xref ref-type="bibr" rid="B44">2002</xref>, <xref ref-type="bibr" rid="B46">2007</xref>). While these &#x0201C;signatures of selection&#x0201D; can help to shed light on the evolutionary pressures experienced throughout history, they have also been shown to be associated with regions of the genome that are enriched for genes involved in cultural differentiation and complex disease in humans (Laland et al., <xref ref-type="bibr" rid="B27">2010</xref>; Lappalainen et al., <xref ref-type="bibr" rid="B28">2010</xref>) and traits of adaptive and/or commercial significance in plant and animal species. Examples include forest trees (see reviews by Gonzalez-Martinez et al., <xref ref-type="bibr" rid="B17">2011</xref>; Neale and Kremer, <xref ref-type="bibr" rid="B34">2011</xref>), wheat (Cavanagh et al., <xref ref-type="bibr" rid="B5">2013</xref>), horses (Gu et al., <xref ref-type="bibr" rid="B20">2009</xref>), sheep (Moradi et al., <xref ref-type="bibr" rid="B33">2012</xref>), and domesticated dairy cattle (e.g., Qanbari et al., <xref ref-type="bibr" rid="B40">2011</xref>). As a result, methods for detecting evidence of selection also provide a mechanism for highlighting genomic regions that may be associated with biologically important traits.</p>
<p>Recently Pybus et al. (<xref ref-type="bibr" rid="B39">2014</xref>) described the &#x0201C;Selection Browser 1.0,&#x0201D; a web-based tool for investigating selection in the human genome, based on a subset of data available from the 1000 Genomes Project (The 1000 Genomes Project Consortium, <xref ref-type="bibr" rid="B51">2010</xref>). This resource delivers easy and intuitive access to pre-computed results from a number of tests for selection, applied to the available data, and thus provides an excellent example of the type of selection-specific information that can be extracted from low-coverage resequencing studies. For researchers wishing to investigate selection in other human cohorts or populations (or other organisms), however, a non-trivial amount of data manipulation and subsequent computation is required in order to extract this type of information from the available data.</p>
<p>Although detection of putative selective pressure offers a useful approach for identifying regions of interest in the genome, a number of steps are required to move from genome-wide (re)sequence or genotype data (e.g., as can easily be obtained using high-throughput microarray-based or sequencing technologies) to identifying specific genomic regions that exhibit evidence of having been under selection. The steps required reflect to some extent the series of advances that have been made in genomics technologies in recent years, with traditional file formats and software requiring manipulation and translation as part of the analysis workflow. While the process is not complex, for researchers unfamiliar with the required tools and data formats, the path from genotypes to signatures of selection can be a difficult one.</p>
<p>Here we provide a brief overview of a relatively simple workflow for taking high-density genotype data, and using it to identify evidence of selective pressure in regions of the genome. This pipeline is applicable to any diploid species where genome-wide (re)sequence and/or genotypic data are available (e.g., genomic/transcriptomic sequencing, whole genome SNP arrays), along with an ancestral reference genome and either a genetic or physical map.</p>
</sec>
<sec>
<title>Methods for detecting evidence of selection</title>
<p>The tools used to detect evidence of selection are dependent on the nature of the selective signature being investigated, which itself depends on the time scale over which the selection occurred (Sabeti et al., <xref ref-type="bibr" rid="B45">2006</xref>). Traditionally the F<sub>ST</sub> statistic has been a popular choice for investigating selection, utilizing differences in allele frequency between populations to infer selective pressure in one population relative to the other, and allowing detection of potential selection occurring in the range 50,000 to 75,000 years prior for human populations (Sabeti et al., <xref ref-type="bibr" rid="B45">2006</xref>), equivalent to approximately 2000 to 3000 generations. A thorough review of the use of the F<sub>ST</sub> statistic is provided by Holsinger and Weir (<xref ref-type="bibr" rid="B21">2009</xref>), with recent modifications able to account for genotypic uncertainty associated with more modern technologies (Fumagalli et al., <xref ref-type="bibr" rid="B14">2013</xref>). Differences in F<sub>ST</sub> estimates have been discussed by Bhatia et al. (<xref ref-type="bibr" rid="B2">2013</xref>), who examined the effect of choosing different estimation methods and SNP sets on estimates of F<sub>ST</sub>. Both of these aspects were found to impact F<sub>ST</sub> estimates, and the authors recommend that care be taken in the choice of both the estimators, and the SNPs being used.</p>
<p>Analysis of the reduction in genetic diversity provides another approach to examining selection, allowing the detection of possible &#x0201C;selective sweeps&#x0201D; which have resulted in regions where an allele conferring a selective advantage has risen in frequency in a population, carrying other variants in linkage disequilibrium to similarly increased frequencies, and thus reduced levels of diversity at that genomic locus. Tajima&#x00027;s D statistic (Tajima, <xref ref-type="bibr" rid="B48">1989</xref>) provides a popular method for identifying such regions (see reviews by Sabeti et al., <xref ref-type="bibr" rid="B45">2006</xref>; Barrett and Hoekstra, <xref ref-type="bibr" rid="B1">2011</xref>; Iskow et al., <xref ref-type="bibr" rid="B24">2012</xref>). More recently, modified methods have been developed to account for ascertainment bias in SNP microarrays (Ram&#x000ED;rez-Soriano and Nielsen, <xref ref-type="bibr" rid="B41">2009</xref>). Tajima&#x00027;s D is suitable for detecting evidence of positive selection in human populations occurring within the past 250,000 years (Sabeti et al., <xref ref-type="bibr" rid="B45">2006</xref>) or approximately 10,000 generations, and operates by identifying an excess of low-to-intermediate frequency variants. Another commonly used measure is Fay and Wu&#x00027;s H (Fay and Wu, <xref ref-type="bibr" rid="B10">2000</xref>) which is useful for detecting evidence of more recent positive selection (&#x0003C;80,000 years: Sabeti et al., <xref ref-type="bibr" rid="B45">2006</xref>, or approximately 3000 generations), particularly for intermediate-high frequency variants, and thus complements Tajima&#x00027;s D and other methods (see Fay and Wu, <xref ref-type="bibr" rid="B10">2000</xref>).</p>
<p>The advent of genome-wide genotyping technologies has facilitated the creation of whole genome haplotype maps, exemplified by the efforts of the HapMap Consortium (International HapMap Consortium, <xref ref-type="bibr" rid="B25">2003</xref>) for studying natural variation in humans, and with more recent initiatives extending this approach to other species including bovine (The Bovine HapMap Consortium, <xref ref-type="bibr" rid="B50">2009</xref>), maize (Gore et al., <xref ref-type="bibr" rid="B18">2009</xref>), and rice (Huang et al., <xref ref-type="bibr" rid="B23">2010</xref>). Analysis of haplotypes provides another mechanism for identifying evidence of selection, with a number of methods utilizing the Extended Haplotype Homozygosity (EHH) concept (Sabeti et al., <xref ref-type="bibr" rid="B44">2002</xref>). One of the more popular of these approaches is the Integrated Haplotype Homozygosity Score (iHS) methodology, which provides a standardized measure of the decay in EHH around a point (e.g., a SNP) from the derived allele relative to the ancestral allele (Voight et al., <xref ref-type="bibr" rid="B54">2006</xref>). Regions of slowly decaying haplotype homozygosity in the derived allele (i.e., longer than expected haplotypes, relative to the ancestral allele) are thus indicative of selection at that locus.</p>
<p>Underlying all of these tools are a number of demographic assumptions about the population(s) of interest, which must be considered when attempting to detect evidence of selection. In particular, for each method it is assumed that the existence of selective pressure is the most likely explanation for the generation of a statistically significant result. If present, other potential modifiers of variant frequency in a population can cause these tests to generate significant results, even in the absence of selection. Specific examples include: random drift, population bottlenecks, and population expansion, all of which can modify variant and haplotype frequencies in ways similar to selection. Some knowledge of the evolutionary history of the populations under study is therefore essential when considering the results generated when testing for evidence of selection.</p>
</sec>
<sec>
<title>Applying selection tools to genomic data</title>
<p>A number of software tools exist which implement the various methods described above for detecting evidence of selection. In order to use a specific tool, however, the data in question must be in an appropriate format. Both the F<sub>ST</sub> statistic, and Tajima&#x00027;s D, can be calculated using standard genotype data (e.g., SNP genotypes per individual). The iHS methodology, however, requires the use of haplotypes, and thus genotype data obtained from heterozgoygous populations must be phased prior to calculation of iHS. Various software applications exist for phasing genotype data (e.g., see Browning and Browning, <xref ref-type="bibr" rid="B4">2011</xref>), although large differences in accuracy and speed exist between the various algorithms (Williams et al., <xref ref-type="bibr" rid="B57">2012</xref>). Traditionally the Beagle algorithm has been a popular choice for phasing (Browning and Browning, <xref ref-type="bibr" rid="B3">2007</xref>), although a number of recently developed algorithms are offering increased speed and accuracy (Williams et al., <xref ref-type="bibr" rid="B57">2012</xref>; Delaneau et al., <xref ref-type="bibr" rid="B7">2013</xref>).</p>
<p>Once phasing is complete, the rehh package (Gautier and Vitalis, <xref ref-type="bibr" rid="B16">2012</xref>) provides a relatively simple interface for implementing various EHH-based analyses (including iHS) within the R computing environment (R Core Team, <xref ref-type="bibr" rid="B42">2014</xref>). Additionally, rehh provides tools for visualizing loci under selection, such as haplotype bifurcation plots (Sabeti et al., <xref ref-type="bibr" rid="B44">2002</xref>).</p>
</sec>
<sec>
<title>Bioinformatics workflow</title>
<p>In order to simplify the process of analysing genomewide genotype data to identify selection signatures, we have developed a collection of scripts that implement the various tools described above. These scripts are publicly available via GitHub, and include instructions for installation and usage, as well as a detailed manual containing a worked example using a downloadable data set. The following sections describe the analytical processes implemented in the workflow.</p>
<sec>
<title>Data processing and analysis via command line tools</title>
<p>The analysis pipeline described here runs within a standard Linux operating system (in our case, Ubuntu 13.04, although almost any Linux-based system would be suitable), and requires the installation of a relatively small number of software tools (Table <xref ref-type="table" rid="T1">1</xref>). The starting point of the analysis is a variant call format (VCF) file of the genotype data of interest (Danecek et al., <xref ref-type="bibr" rid="B6">2011</xref>). This is a text file containing (at a minimum) information about variant positions, reference and alternative bases, and genotypes per sample. In order to permit calculation of measures comparing selection between multiple populations (e.g., F<sub>ST</sub>), samples from at least two populations are required to be present in the VCF file. Additionally, a file listing the subject identifiers for each population is also required, along with a genetic map of the chromosome(s) of interest in either SHAPEIT (Delaneau et al., <xref ref-type="bibr" rid="B7">2013</xref>) or PLINK (Purcell et al., <xref ref-type="bibr" rid="B38">2007</xref>) format. As a genetic map may not contain distances for all markers present in the VCF file, the genetic distance is inferred by linear interpolation (Nievergelt et al., <xref ref-type="bibr" rid="B35">2004</xref>). If a genetic map is not available for the organism under study, a physical map (e.g., a reference genome) can be substituted, an approach that has recently been used in cattle (Gautier and Naves, <xref ref-type="bibr" rid="B15">2011</xref>). Alternatively, if a representative sample of the species of interest is available, the LDHat software (McVean, <xref ref-type="bibr" rid="B31">2014</xref>) can be used to generate recombination rate estimates, allowing conversion of physical distance to genetic distance, as was done in a recent analysis in <italic>Arabidopsis</italic> (Meij&#x000F3;n et al., <xref ref-type="bibr" rid="B32">2013</xref>).</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p><bold>Software tools used in the selection analysis workflow</bold>.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left"><bold>Application and version</bold></th>
<th align="left"><bold>Use in workflow</bold></th>
<th align="left"><bold>Website</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">R &#x02265; v3.0</td>
<td align="left">rehh</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://www.r-project.org">http://www.r-project.org</ext-link></td>
</tr>
<tr>
<td align="left">Perl &#x02265; v5.0</td>
<td align="left">Vcftools modules vcf-subset and vcf-merge</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://www.perl.org/">http://www.perl.org/</ext-link></td>
</tr>
<tr>
<td align="left">Python &#x02265; v2.6</td>
<td align="left">Running pipeline, haps file filtering and ancestral allele annotation</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="https://www.python.org/">https://www.python.org/</ext-link></td>
</tr>
<tr>
<td align="left">rehh v1.11</td>
<td align="left">Calculating iHS (and other EHH-based measures)</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://cran.r-project.org/package&#x0003D;rehh">http://cran.r-project.org/package&#x0003D;rehh</ext-link></td>
</tr>
<tr>
<td align="left">vcftools v1.11</td>
<td align="left">Conversion of VCF genotype data to PLINK format, and calculation of F<sub>ST</sub> and Tajima&#x00027;s D</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://vcftools.sourceforge.net">http://vcftools.sourceforge.net</ext-link></td>
</tr>
<tr>
<td align="left">SHAPEIT v2.r790</td>
<td align="left">Phasing the PLINK formatted data to produce phased haplotype file</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://www.shapeit.fr">http://www.shapeit.fr</ext-link></td>
</tr>
<tr>
<td align="left">Beagle v4 r1274</td>
<td align="left">Phasing un-phased VCF data to produce phased haplotype file</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://faculty.washington.edu/browning/beagle/beagle.html">http://faculty.washington.edu/browning/beagle/beagle.html</ext-link></td>
</tr>
<tr>
<td align="left">PLINK v1.07</td>
<td align="left">Remove SNPs with too many genotypes missing, filter on HWE and MAF</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://pngu.mgh.harvard.edu/&#x0007E;purcell/plink">http://pngu.mgh.harvard.edu/&#x0007E;purcell/plink</ext-link></td>
</tr>
<tr>
<td align="left">tabix/bgzip v0.2.5</td>
<td align="left">Required to get VCF into compressed and indexed format for vcftools</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://samtools.sourceforge.net/tabix.shtml">http://samtools.sourceforge.net/tabix.shtml</ext-link></td>
</tr>
<tr>
<td align="left">Multicore v0.1-7</td>
<td align="left">R multicore package used to parallelise rehh runs</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://cran.r-project.org/web/packages/multicore/index.html">http://cran.r-project.org/web/packages/multicore/index.html</ext-link></td>
</tr>
<tr>
<td align="left">impute2 v2.3.1</td>
<td align="left">Imputing genotypes from phased haplotype data</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://mathgen.stats.ox.ac.uk/impute/impute_v2.html">http://mathgen.stats.ox.ac.uk/impute/impute_v2.html</ext-link></td>
</tr>
<tr>
<td align="left">Pyfasta v0.5.2</td>
<td align="left">Required to process ancestral fasta files</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="https://github.com/brentp/pyfasta">https://github.com/brentp/pyfasta</ext-link></td>
</tr>
<tr>
<td align="left">PyVcf v0.6.0</td>
<td align="left">Required to process VCF files in python scripts</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="https://github.com/jamescasbon/PyVCF">https://github.com/jamescasbon/PyVCF</ext-link></td>
</tr>
<tr>
<td align="left">Variscan v2.0.3</td>
<td align="left">Calculation of Fay and Wu&#x00027;s H</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://www.ub.edu/softevol/variscan/">http://www.ub.edu/softevol/variscan/</ext-link></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>Analysis of a single population</title>
<p>For a population VCF file that contains phase information, indels are first removed using the <italic>vcftools</italic> software (Danecek et al., <xref ref-type="bibr" rid="B6">2011</xref>), as ancestral allele data are only available for SNP genotypes. The VCF is then converted to the Haps format (phased haplotypes: SNP genotypes per haplotype, per individual).</p>
<p>For a population VCF file without phased information, the file is converted to <italic>PLINK</italic> format (ped/map files) using <italic>vcftools</italic>. The Ped file contains relatedness information (if any) between subjects, affection status (e.g., for human case/control studies), and genotype data, while the &#x0201C;Map&#x0201D; file contains the genomic location of each variant (e.g., SNPs). PLINK is then used to filter the data based on multiple criteria (missingness, minor allele frequency, Hardy Weinberg Equilibrium, indels), and phasing is performed via SHAPEIT v2 (Delaneau et al., <xref ref-type="bibr" rid="B7">2013</xref>) to produce a &#x0201C;Haps&#x0201D; file of phased haplotypes (SNP genotypes per haplotype, per individual) and a &#x0201C;Sample&#x0201D; file (genotype-specific information). Alternatively (or if a physical map is used), <italic>Beagle</italic> can be used to phase the data (Browning and Browning, <xref ref-type="bibr" rid="B3">2007</xref>). If imputation is required, then <italic>impute2</italic> (Howie et al., <xref ref-type="bibr" rid="B22">2009</xref>) is used, followed by a second round of indel filtering (to remove any indels introduced by the imputation process).</p>
<p>The phased data are annotated with ancestral allele information (via a custom Python script). These data are then analyzed in R (R Core Team, <xref ref-type="bibr" rid="B42">2014</xref>) where the R package rehh (Gautier and Vitalis, <xref ref-type="bibr" rid="B16">2012</xref>) is used to calculate EHH, and integrated EHH (iES).</p>
</sec>
<sec>
<title>Analysis of multiple populations</title>
<p>If genotype data from multiple populations are available, then the data from the VCF file are used to calculate F<sub>ST</sub> between each pair of populations using <italic>vcftools</italic>. F<sub>ST</sub> is calculated using both the method of Weir and Cockerham (<xref ref-type="bibr" rid="B55">1984</xref>), and the method developed as part of the HapMap project (International HapMap Consortium, <xref ref-type="bibr" rid="B26">2005</xref>). The genotype data are then split into per-population VCF files, and the analysis of each population proceeds as described above (&#x0201C;Analysis of a single population&#x0201D;), producing filtered phased data, and EHH and iHS values. Calculation of iHS requires knowledge of the ancestral allele relating to the SNP of interest. For human data, this information was traditionally generated through direct comparison of DNA to that of a close phylogenetic relative, such as the chimpanzee. More recently, however, phylogenetic trees have been used to derive ancestral alleles in humans, based on DNA sequence data from related species. The ancestral information used here comes from the ancestral FASTA files provided by the 1000 Genomes Project and derived 6-way Enredo-Pecan-Ortheus (EPO) alignment (Paten et al., <xref ref-type="bibr" rid="B36">2008a</xref>,<xref ref-type="bibr" rid="B37">b</xref>) from the Ensembl Compara 59 database (Flicek et al., <xref ref-type="bibr" rid="B12">2012</xref>). For non-human species, a FASTA file containing ancestral allele information is required. These are also available via Ensembl for some animal species (along with the 6-way EPO alignment for humans) from: <ext-link ext-link-type="uri" xlink:href="http://www.ensembl.org/info/genome/compara/analyses.html">http://www.ensembl.org/info/genome/compara/analyses.html</ext-link></p>
<p>Alternatively, the EPO pipeline can be used locally to generate an ancestral reference, or a two-way alignment can be performed between the genome of interest, and that of a species with which a recent common ancestor is shared. This approach has previously been applied to human and chimpanzee by Voight et al. (<xref ref-type="bibr" rid="B54">2006</xref>) and to <italic>Arabidopsis thaliana</italic> and <italic>A. lyrata</italic> by Meij&#x000F3;n et al. (<xref ref-type="bibr" rid="B32">2013</xref>). The script used here can annotate either a phased haps file or a phased VCF file using the ancestral allele information. Finally, for each pair of populations, Rsb (the standardized ratio of iES from two populations) is calculated using the rehh package in R (Voight et al., <xref ref-type="bibr" rid="B54">2006</xref>; Tang et al., <xref ref-type="bibr" rid="B49">2007</xref>).</p>
</sec>
<sec>
<title>Visualizing the outputs&#x02014;investigating selection at the human lactase gene locus as an example</title>
<p>Once the various measures of selection have been calculated in a genotype data set from one or more populations, it is helpful to visualize the results. As mentioned above, the public GitHub repository for the pipeline includes a worked example of running the code on a human data set. The data set used relates to a subset of genotype data from chromosome 2 of the human genome, derived from data downloaded from the 1000 Genomes Project. Of interest is the region around the gene encoding <italic>lactase</italic> (LCT - HG19 chr2: 136,545,410&#x02013;136,594,750), which has shown evidence of selection over the past 5000&#x02013;10,000 years (Bersaglieri et al., <xref ref-type="bibr" rid="B1a">2004</xref>). The CEU (European) and YRI (Yoruban) populations were used for the analysis here, comprising 85 and 88 samples respectively.</p>
<p>The analysis pipeline produced results for the following statistics: F<sub>ST</sub>, Rsb, iHS, Fay and Wu&#x00027;s H, and Tajima&#x00027;s D. A window size of 30 Kbp was used for calculating F<sub>ST</sub> and Fay and Wu&#x00027;s H (with a sliding window of 3 Kbp for the latter), and a 3 Kbp window was used for Tajima&#x00027;s D. Figure <xref ref-type="fig" rid="F1">1</xref> contains plots of Rsb and iHS for the CEU and YRI populations (chromosome-wide, and zoomed-in around the LCT gene), generated in R using the ggplot2 package (Wickham, <xref ref-type="bibr" rid="B56">2009</xref>). The plots show clear evidence for differing degrees of selective pressure in the LCT gene between the CEU and YRI populations (i.e., selection in the CEU population), supporting previous observations in the literature (e.g., Bersaglieri et al., <xref ref-type="bibr" rid="B1a">2004</xref>). Not all of the measures of selection generated by the pipeline support this conclusion, however, with similar plots for F<sub>ST</sub> (Figure <xref ref-type="supplementary-material" rid="SM1">S1</xref>), Tajima&#x00027;s D (Figure <xref ref-type="supplementary-material" rid="SM2">S2</xref>), and Fay and Wu&#x00027;s H (Figure <xref ref-type="supplementary-material" rid="SM3">S3</xref>), providing little evidence of selection in this region. These results (which agree with those for LCT available via the &#x0201C;Selection Browser 1.0&#x0201D; application of Pybus et al., <xref ref-type="bibr" rid="B39">2014</xref>) highlight the importance of utilizing multiple measures for investigating selection, with different methodologies producing quite different results when applied to the same data. This again reinforces the fact that the various methods are utilizing different patterns of genetic variation to identify evidence of selection.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p><bold>Plots of Rsb (top row) and iHS (middle and bottom rows) values across chromosome 2 (whole chromosome in the left column, and the region around the LCT gene in the right column) based on 1000 Genomes Project data for the CEU and YRI populations</bold>. Blue vertical lines/boxes on the plots indicate the location of the LCT gene, and the red horizontal lines denote a <italic>p</italic>-value of less than 5% for any Rsb value above the line. The marked deviation of iHS away from zero in the CEU population provides evidence for the region around the LCT gene having been under selective pressure in the past. In contrast, there is no such evidence in the YRI population, which is also communicated by the Rsb statistic, which examines the relative evidence for selection in the two populations, here indicating that there is stronger evidence for this region having been under selective pressure in the CEU cohort.</p></caption>
<graphic xlink:href="fgene-05-00293-g0001.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s2">
<title>Discussion</title>
<p>Here we present a simple workflow, and an associated collection of shell and R scripts, for identifying signatures of selection in diploid organisms. The workflow allows researchers to start from a collection of genome-wide genotype data for multiple individuals, and use a collection of freely available software tools to identify regions that exhibit evidence of having undergone selection. A range of tools have been developed for specific analyses of smaller data sets (e.g., Librado and Rozas, <xref ref-type="bibr" rid="B29">2009</xref>; Delport et al., <xref ref-type="bibr" rid="B8">2010</xref>), however the workflow presented here has the ability to analyze large data sets using multiple analytical methods to detect evidence of selection. An additional benefit of this tool set is our incorporation of parallelization capability into some of the tools to speed up analyses. These include rehh, vcf-subset, SHAPEIT, and IMPUTE2. We have also included a version of rehh which invokes the R multicore package (Urbanek, <xref ref-type="bibr" rid="B52">2011</xref>) to allow utilization of multiple CPU cores. Other tools could potentially be parallelized should they become bottlenecks in analytical performance in larger data sets. The example analysis of chromosome two presented here required approximately 12.5 h of computation, running on 10 cores of a recent multicore linux server.</p>
<p>The methods described here fall broadly into three categories: frequency-based methods (Tajima&#x00027;s D and Fay and Wu&#x00027;s H), linkage disequilibrium-based methods (Rsb and iHS), and population differentiation-based methods (F<sub>ST</sub>), as reviewed by Vitti et al. (<xref ref-type="bibr" rid="B53">2013</xref>). By using each of these approaches, the differing characteristics of each method provide users with the ability to identify patterns of selective pressure arising in distinct contexts. As noted earlier, the time scale over which selection has occurred has a major impact on the ability of each method to detect evidence of its presence, with the frequency-based and population differentiation-based methods best suited to detecting events occurring further in the past. This is because these methods rely on the accumulation of additional mutations around the causal variant. In situations where the fitness advantage of the selected variant is small (particularly if it is recessive), then the time taken for the selected variant to rise to a detectable frequency in the population will be much longer, thus reducing the power of these methods. In situations where a new mutation (or a previously neutral variant encountering an environment of altered selective pressure) provides a fitness advantage and rises in frequency in the population without achieving fixation, linkage disequilibrium-based approaches provide increased power for detecting evidence of selection (Ferrer-Admetlla et al., <xref ref-type="bibr" rid="B11">2014</xref>).</p>
<p>The differences in the results produced here for the human LCT example reflect the underlying methods of detection employed by each of the approaches, with time scale likely having a major impact. The three methods which found no evidence to support selection (F<sub>ST</sub>, Tajima&#x00027;s D, and Fay and Wu&#x00027;s H) are all better placed to detect evidence of selection in the more distant past, well before the time at which the LCT gene was subjected to selective pressure. It is perhaps not surprising, therefore, that only the linkage disequilibrium-based methods (iHS and Rsb) provide any evidence of selection in this region.</p>
<p>As part of providing access to this computational workflow, it is important to mention the caveats associated with performing selection analyses. Reviews by Nielsen (<xref ref-type="bibr" rid="B34a">2005</xref>) and Vitti et al. (<xref ref-type="bibr" rid="B53">2013</xref>) provide an excellent overview of these and other issues associated with the detection of evidence of selection using genetic data. All of the tools implemented in this pipeline are designed to elucidate patterns of genotypic variation that are consistent with the presence of selective pressure at some time in the past. However, even when such patterns are identified, there is no guarantee that they are the result of selection, rather than other unrelated ancestral events. For example, Tajima&#x00027;s D is known to be sensitive to population growth (Simonsen et al., <xref ref-type="bibr" rid="B47">1995</xref>), whereas methods that assess changes in linkage disequilibrium and/or haplotype frequencies can be influenced by differences in recombination rates across the genome (Nielsen, <xref ref-type="bibr" rid="B34a">2005</xref>). Even in the case where selective pressure has led to changes in haplotype frequencies, it may not be possible to identify the type of selection involved. For example, positive selection (e.g., via hard or soft sweeps) may leave a genomic footprint that is indistinguishable from that created by background selection against deleterious mutations (Vitti et al., <xref ref-type="bibr" rid="B53">2013</xref>).</p>
<p>There are a number of improvements that could be made to this workflow. In terms of the various measures of selection that we have employed, we note that calculation of the F<sub>ST</sub> statistic is dependent on window size and step size, while calculation of Tajima&#x00027;s D statistic is dependent on window size. Ongoing work will examine how best to implement these methods on multiple scales, and allow the results to be combined. Incorporating a measure of the statistical significance of the F<sub>ST</sub> statistic would also be an improvement, along with appropriate adjustment for multiple hypothesis testing. Similarly, our pipeline could be extended to incorporate probabilistic measures of genotype, particularly relevant for modern genotyping-by-sequencing (GBS) technologies (e.g., Elshire et al., <xref ref-type="bibr" rid="B9">2011</xref>; Majewski et al., <xref ref-type="bibr" rid="B30">2011</xref>) where there is uncertainty in genotype calls (Li, <xref ref-type="bibr" rid="B28a">2011</xref>; Li et al., <xref ref-type="bibr" rid="B28b">2011</xref>), and for situations where SNP selection methods result in ascertainment bias (Ram&#x000ED;rez-Soriano and Nielsen, <xref ref-type="bibr" rid="B41">2009</xref>).</p>
<p>In addition to improving the algorithmic aspects of the pipeline, additional benefit could be gained through the inclusion of support for indel variants. Currently calculation of Fay and Wu&#x00027;s H, iHS and Tajima&#x00027;s D are not carried out for indels. Adding support for this feature would be difficult for human analyses involving iHS and Fay and Wu&#x00027;s H, as the available ancestral FASTA files do not contain any indel information. The inclusion of indels in the Tajima&#x00027;s D calculations is possible, however, and would require a reorganization of the pipeline to ensure indels are preserved until the point at which the D statistic is generated.</p>
<p>The pipeline is also flexible regarding input data types and biological contexts. The entr&#x000E9;e into this pipeline is via VCF formatted files, and it can therefore be used to analyse whole genome (re)sequence, transcriptome-derived data, exomes or specific gene candidates of interest on very large samples. More recently, several more computationally-intensive analytical methods have been developed (e.g., Grossman et al., <xref ref-type="bibr" rid="B19">2010</xref>; Ronen et al., <xref ref-type="bibr" rid="B43">2013</xref>) which could potentially be integrated into workflows such as those presented here.</p>
<p>Despite the potential for ongoing enhancement, we believe that in its current state this workflow provides researchers with a valuable tool for investigating selection within a collection of individuals for which high-density genotype data are available, and we hope that the research community is able to make good use of these tools. To that end we have made the pipeline software publicly available as a GitHub repository at: <ext-link ext-link-type="uri" xlink:href="https://github.com/smilefreak/selectionTools">https://github.com/smilefreak/selectionTools</ext-link></p>
<p>The repository includes an automated installation script, and a detailed manual containing an example analysis that can be followed by new users. The pipeline version corresponding to this publication is 1.0. As additions and refinements are made, these changes will be versioned and commented. However, using the functionality of GitHub, researchers will always be able to access the original published versions of the scripts that are referred to here.</p>
<sec>
<title>Conflict of interest statement</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</sec>
</body>
<back>
<ack>
<p>The Virtual Institute of Statistical Genetics (<ext-link ext-link-type="uri" xlink:href="http://www.visg.co.nz">www.visg.co.nz</ext-link>) is thanked for funding this research, including the PhD stipend of HTN. VISG is supported by a contract (C04X0804) from the Ministry of Business, Employment and Innovation. Computational resources from the New Zealand eScience Infrastructure (NeSI) were utilized for some of the analyses presented here (<ext-link ext-link-type="uri" xlink:href="http://www.nesi.org.nz">www.nesi.org.nz</ext-link>). We also thank Dr. John McCallum for discussions in the early phases of this project, and the reviewers of our manuscript for their constructive comments and helpful suggestions.</p>
</ack>
<sec sec-type="supplementary-material" id="s3">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="http://www.frontiersin.org/journal/10.3389/fgene.2014.00293/abstract">http://www.frontiersin.org/journal/10.3389/fgene.2014.00293/abstract</ext-link></p>
<supplementary-material xlink:href="DataSheet1.ZIP" id="SM1" mimetype="application/zip" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Figure S1</label>
<caption><p><bold>Plot of F<sub>ST</sub> values across chromosome 2 (whole chromosome, and the region around the LCT gene) based on 1000 Genomes Project data for the CEU and YRI populations</bold>. A 3 Kbp window was used for calculations. Blue vertical lines/boxes on the plots indicate the location of the LCT gene, and the red horizontal lines denote the mean plus three standard deviations for all the F<sub>ST</sub> window calculations. Although there is an indication from the whole chromosome plot that regions of this chromosome may have been under selection, there is little support for this in the region of the LCT gene.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="DataSheet1.ZIP" id="SM2" mimetype="application/zip" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Figure S2</label>
<caption><p><bold>Plot of Tajima&#x00027;s D values across chromosome 2 (whole chromosome, and the region around the LCT gene) based on 1000 Genomes Project data for the CEU and YRI populations</bold>. A 3 Kbp window was used for calculations. Blue vertical lines/boxes on the plots indicate the location of the LCT gene. Based on the values of the statistic around the LCT gene, there is no evidence of this region having been under selection in either the CEU or YRI populations.</p></caption>
</supplementary-material>
<supplementary-material xlink:href="DataSheet1.ZIP" id="SM3" mimetype="application/zip" xmlns:xlink="http://www.w3.org/1999/xlink">
<label>Figure S3</label>
<caption><p><bold>Plot of Fay and Wu&#x00027;s H values across chromosome 2 (whole chromosome, and region around LCT gene) based on 1000 Genomes Project data for the CEU and YRI populations</bold>. While there appears to be some evidence for regions of the chromosome having been subjected to selective pressure, when the area around the LCT gene is examined, for both populations the values of H in this region are unremarkable relative to those in the remainder of the chromosome.</p></caption>
</supplementary-material>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Barrett</surname> <given-names>R. D. H.</given-names></name> <name><surname>Hoekstra</surname> <given-names>H. E.</given-names></name></person-group> (<year>2011</year>). <article-title>Molecular spandrels: tests of adaptation at the genetic level</article-title>. <source>Nat. Rev. Genet</source>. <volume>12</volume>, <fpage>767</fpage>&#x02013;<lpage>780</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3015</pub-id><pub-id pub-id-type="pmid">22005986</pub-id></citation>
</ref>
<ref id="B1a">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bersaglieri</surname> <given-names>T.</given-names></name> <name><surname>Sabeti</surname> <given-names>P. C.</given-names></name> <name><surname>Patterson</surname> <given-names>N.</given-names></name> <name><surname>Vanderploeg</surname> <given-names>T.</given-names></name> <name><surname>Schaffner</surname> <given-names>S. F.</given-names></name> <name><surname>Drake</surname> <given-names>J. A.</given-names></name> <etal/></person-group>. (<year>2004</year>). <article-title>Genetic signatures of strong recent positive selection at the lactase gene</article-title>. <source>Am. J. Hum. Genet</source>. <volume>74</volume>, <fpage>1111</fpage>&#x02013;<lpage>1120</lpage>. <pub-id pub-id-type="doi">10.1086/421051</pub-id><pub-id pub-id-type="pmid">15114531</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bhatia</surname> <given-names>G.</given-names></name> <name><surname>Patterson</surname> <given-names>N.</given-names></name> <name><surname>Sankararaman</surname> <given-names>S.</given-names></name> <name><surname>Price</surname> <given-names>A. L.</given-names></name></person-group> (<year>2013</year>). <article-title>Estimating and interpreting F<sub>ST</sub>: the impact of rare variants</article-title>. <source>Genome Res</source>. <volume>23</volume>, <fpage>1514</fpage>&#x02013;<lpage>1521</lpage>. <pub-id pub-id-type="doi">10.1101/gr.154831.113</pub-id><pub-id pub-id-type="pmid">23861382</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Browning</surname> <given-names>S. R.</given-names></name> <name><surname>Browning</surname> <given-names>B. L.</given-names></name></person-group> (<year>2007</year>). <article-title>Rapid and accurate haplotype phasing and missing-data inference for whole-genome association studies by use of localized haplotype clustering</article-title>. <source>Am. J. Hum. Genet</source>. <volume>81</volume>, <fpage>1084</fpage>&#x02013;<lpage>1097</lpage>. <pub-id pub-id-type="doi">10.1086/521987</pub-id><pub-id pub-id-type="pmid">17924348</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Browning</surname> <given-names>S. R.</given-names></name> <name><surname>Browning</surname> <given-names>B. L.</given-names></name></person-group> (<year>2011</year>). <article-title>Haplotype phasing: existing methods and new developments</article-title>. <source>Nat. Rev. Genet</source>. <volume>12</volume>, <fpage>703</fpage>&#x02013;<lpage>714</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3054</pub-id><pub-id pub-id-type="pmid">21921926</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cavanagh</surname> <given-names>C. R.</given-names></name> <name><surname>Chao</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Huang</surname> <given-names>B. E.</given-names></name> <name><surname>Stephen</surname> <given-names>S.</given-names></name> <name><surname>Kiani</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>Genome-wide comparative diversity uncovers multiple targets of selection for improvement in hexaploid wheat landraces and cultivars</article-title>. <source>Proc. Nat. Acad. Sci. U.S.A</source>. <volume>110</volume>, <fpage>8057</fpage>&#x02013;<lpage>8062</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1217133110</pub-id><pub-id pub-id-type="pmid">23630259</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Danecek</surname> <given-names>P.</given-names></name> <name><surname>Auton</surname> <given-names>A.</given-names></name> <name><surname>Abecasis</surname> <given-names>G.</given-names></name> <name><surname>Albers</surname> <given-names>C. A.</given-names></name> <name><surname>Banks</surname> <given-names>E.</given-names></name> <name><surname>DePristo</surname> <given-names>M. A.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>The variant call format and VCFtools</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>2156</fpage>&#x02013;<lpage>2158</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr330</pub-id><pub-id pub-id-type="pmid">21653522</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Delaneau</surname> <given-names>O.</given-names></name> <name><surname>Zagury</surname> <given-names>J.-F.</given-names></name> <name><surname>Marchini</surname> <given-names>J.</given-names></name></person-group> (<year>2013</year>). <article-title>Improved whole-chromosome phasing for disease and population genetic studies</article-title>. <source>Nat. Methods</source> <volume>10</volume>, <fpage>5</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.2307</pub-id><pub-id pub-id-type="pmid">23269371</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Delport</surname> <given-names>W.</given-names></name> <name><surname>Poon</surname> <given-names>A. F. Y.</given-names></name> <name><surname>Frost</surname> <given-names>S. D. W.</given-names></name> <name><surname>Kosakovsky Pond</surname> <given-names>S. L.</given-names></name></person-group> (<year>2010</year>). <article-title>Datamonkey 2010: a suite of phylogenetic analysis tools for evolutionary biology</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>2455</fpage>&#x02013;<lpage>2457</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq429</pub-id><pub-id pub-id-type="pmid">20671151</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Elshire</surname> <given-names>R. J.</given-names></name> <name><surname>Glaubitz</surname> <given-names>J. C.</given-names></name> <name><surname>Sun</surname> <given-names>Q.</given-names></name> <name><surname>Poland</surname> <given-names>J. A.</given-names></name> <name><surname>Kawamoto</surname> <given-names>K.</given-names></name> <name><surname>Bucker</surname> <given-names>E. S.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>A robust, simple Genotyping-by-Sequencing (GBS) approach for high diversity species</article-title>. <source>PLoS ONE</source> <volume>6</volume>:<fpage>e19379</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0019379</pub-id><pub-id pub-id-type="pmid">21573248</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fay</surname> <given-names>J. C.</given-names></name> <name><surname>Wu</surname> <given-names>C.-I.</given-names></name></person-group> (<year>2000</year>). <article-title>Hitchhiking under positive darwinian selection</article-title>. <source>Genetics</source> <volume>155</volume>, <fpage>1405</fpage>&#x02013;<lpage>1413</lpage>. <pub-id pub-id-type="pmid">10880498</pub-id></citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ferrer-Admetlla</surname> <given-names>A.</given-names></name> <name><surname>Liang</surname> <given-names>M.</given-names></name> <name><surname>Korneliussen</surname> <given-names>T.</given-names></name> <name><surname>Nielsen</surname> <given-names>R.</given-names></name></person-group> (<year>2014</year>). <article-title>On detecting incomplete soft or hard selective sweeps using haplotype structure</article-title>. <source>Mol. Biol. Evol</source>. <volume>31</volume>, <fpage>1275</fpage>&#x02013;<lpage>1291</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msu077</pub-id><pub-id pub-id-type="pmid">24554778</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Flicek</surname> <given-names>P.</given-names></name> <name><surname>Amode</surname> <given-names>M. R.</given-names></name> <name><surname>Barrell</surname> <given-names>D.</given-names></name> <name><surname>Beal</surname> <given-names>K.</given-names></name> <name><surname>Brent</surname> <given-names>S.</given-names></name> <name><surname>Carvalho-Silva</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Ensembl 2012</article-title>. <source>Nucleic Acids Res</source>. <volume>40</volume>, <fpage>D84</fpage>&#x02013;<lpage>D90</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkr991</pub-id><pub-id pub-id-type="pmid">22086963</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fumagalli</surname> <given-names>M.</given-names></name> <name><surname>Vieira</surname> <given-names>F. G.</given-names></name> <name><surname>Korneliussen</surname> <given-names>T. S.</given-names></name> <name><surname>Linderoth</surname> <given-names>T.</given-names></name> <name><surname>Huerta-S&#x000E1;nchez</surname> <given-names>E.</given-names></name> <name><surname>Albrechtsen</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2013</year>). <article-title>quantifying population genetic differentiation from next-generation sequencing data</article-title>. <source>Genetics</source> <volume>195</volume>, <fpage>979</fpage>&#x02013;<lpage>992</lpage>. <pub-id pub-id-type="doi">10.1534/genetics.113.154740</pub-id><pub-id pub-id-type="pmid">23979584</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gautier</surname> <given-names>M.</given-names></name> <name><surname>Naves</surname> <given-names>M.</given-names></name></person-group> (<year>2011</year>). <article-title>Footprints of selection in the ancestral admixture of a New World Creole cattle breed</article-title>. <source>Mol. Ecol</source>. <volume>20</volume>, <fpage>3128</fpage>&#x02013;<lpage>3143</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-294X.2011.05163.x</pub-id><pub-id pub-id-type="pmid">21689193</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gautier</surname> <given-names>M.</given-names></name> <name><surname>Vitalis</surname> <given-names>R.</given-names></name></person-group> (<year>2012</year>). <article-title>rehh: an R package to detect footprints of selection in genome-wide SNP data from haplotype structure</article-title>. <source>Bioinformatics</source> <volume>28</volume>, <fpage>1176</fpage>&#x02013;<lpage>1177</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bts115</pub-id><pub-id pub-id-type="pmid">22402612</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gonzalez-Martinez</surname> <given-names>S. C.</given-names></name> <name><surname>Dillon</surname> <given-names>S. K.</given-names></name> <name><surname>Garnier-G&#x000E9;r&#x000E9;</surname> <given-names>P. H.</given-names></name> <name><surname>Krutovskii</surname> <given-names>K. V.</given-names></name> <name><surname>Alia</surname> <given-names>R.</given-names></name> <name><surname>Burgarella</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Patterns of nucleotide diversity and association mapping,</article-title> in <source>Genetics, Genomics and Breeding of Conifers</source>, eds <person-group person-group-type="editor"><name><surname>Plomion</surname> <given-names>C.</given-names></name> <name><surname>Bousquet</surname> <given-names>J.</given-names></name> <name><surname>Kole</surname> <given-names>C.</given-names></name></person-group> (<publisher-loc>St Helier</publisher-loc>: <publisher-name>Science Publishers</publisher-name>), <fpage>239</fpage>&#x02013;<lpage>275</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gore</surname> <given-names>M. A.</given-names></name> <name><surname>Chia</surname> <given-names>J. M.</given-names></name> <name><surname>Elshire</surname> <given-names>R. J.</given-names></name> <name><surname>Sun</surname> <given-names>Q.</given-names></name> <name><surname>Ersoz</surname> <given-names>E. S.</given-names></name> <name><surname>Hurwitz</surname> <given-names>B. L.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>A first-generation haplotype map of maize</article-title>. <source>Science</source> <volume>326</volume>, <fpage>1115</fpage>&#x02013;<lpage>1117</lpage>. <pub-id pub-id-type="doi">10.1126/science.1177837</pub-id><pub-id pub-id-type="pmid">19965431</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grossman</surname> <given-names>S. R.</given-names></name> <name><surname>Shylakhter</surname> <given-names>I.</given-names></name> <name><surname>Karlsson</surname> <given-names>E. K.</given-names></name> <name><surname>Byrne</surname> <given-names>E. H.</given-names></name> <name><surname>Morales</surname> <given-names>S.</given-names></name> <name><surname>Frieden</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>A composite of multiple signals distinguishes causal variants in regions of positive selection</article-title>. <source>Science</source> <volume>327</volume>, <fpage>883</fpage>&#x02013;<lpage>886</lpage>. <pub-id pub-id-type="doi">10.1126/science.1183863</pub-id><pub-id pub-id-type="pmid">20056855</pub-id></citation>
</ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gu</surname> <given-names>J.</given-names></name> <name><surname>Orr</surname> <given-names>N.</given-names></name> <name><surname>Park</surname> <given-names>S.</given-names></name> <name><surname>Katz</surname> <given-names>L.</given-names></name> <name><surname>Sulimova</surname> <given-names>G.</given-names></name> <name><surname>MacHugh</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>A genome scan for positive selection in thoroughbred horses</article-title>. <source>PLoS ONE</source> <volume>4</volume>:<fpage>e5767</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0005767</pub-id><pub-id pub-id-type="pmid">19503617</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Holsinger</surname> <given-names>K. E.</given-names></name> <name><surname>Weir</surname> <given-names>B. S.</given-names></name></person-group> (<year>2009</year>). <article-title>Genetics in geographically structured populations: defining, estimating and interpreting F(ST)</article-title>. <source>Nat. Rev. Genet</source>. <volume>10</volume>, <fpage>639</fpage>&#x02013;<lpage>650</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2611</pub-id><pub-id pub-id-type="pmid">19687804</pub-id></citation>
</ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Howie</surname> <given-names>B. N.</given-names></name> <name><surname>Donnelly</surname> <given-names>P.</given-names></name> <name><surname>Marchini</surname> <given-names>J.</given-names></name></person-group> (<year>2009</year>). <article-title>A flexible and accurate genotype imputation method for the next generation of genome-wide association studies</article-title>. <source>PLoS Genet</source>. <volume>5</volume>:<fpage>e1000529</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.1000529</pub-id><pub-id pub-id-type="pmid">19543373</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>X.</given-names></name> <name><surname>Wei</surname> <given-names>X.</given-names></name> <name><surname>Sang</surname> <given-names>T.</given-names></name> <name><surname>Zhao</surname> <given-names>Q.</given-names></name> <name><surname>Feng</surname> <given-names>Q.</given-names></name> <name><surname>Zhao</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Genome-wide association studies of 14 agronomic traits in rice landraces</article-title>. <source>Nat. Genet</source>. <volume>42</volume>, <fpage>961</fpage>&#x02013;<lpage>967</lpage>. <pub-id pub-id-type="doi">10.1038/ng.695</pub-id><pub-id pub-id-type="pmid">20972439</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Iskow</surname> <given-names>R. C.</given-names></name> <name><surname>Gokcumen</surname> <given-names>O.</given-names></name> <name><surname>Lee</surname> <given-names>C.</given-names></name></person-group> (<year>2012</year>). <article-title>Exploring the role of copy number variants in human adaptation</article-title>. <source>Trends Genet</source>. <volume>28</volume>, <fpage>245</fpage>&#x02013;<lpage>257</lpage>. <pub-id pub-id-type="doi">10.1016/j.tig.2012.03.002</pub-id><pub-id pub-id-type="pmid">22483647</pub-id></citation>
</ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><collab>International HapMap Consortium.</collab></person-group> (<year>2003</year>). <article-title>The International HapMap Project</article-title>. <source>Nature</source> <volume>426</volume>, <fpage>789</fpage>&#x02013;<lpage>796</lpage>. <pub-id pub-id-type="doi">10.1038/nature02168</pub-id><pub-id pub-id-type="pmid">14685227</pub-id></citation>
</ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><collab>International HapMap Consortium.</collab></person-group> (<year>2005</year>). <article-title>A haplotype map of the human genome</article-title>. <source>Nature</source> <volume>437</volume>, <fpage>1299</fpage>&#x02013;<lpage>1320</lpage>. <pub-id pub-id-type="doi">10.1038/nature04226</pub-id><pub-id pub-id-type="pmid">16255080</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Laland</surname> <given-names>K. N.</given-names></name> <name><surname>Odling-Smee</surname> <given-names>J.</given-names></name> <name><surname>Myles</surname> <given-names>S.</given-names></name></person-group> (<year>2010</year>). <article-title>How culture shaped the human genome: bringing genetics and the human sciences together</article-title>. <source>Nat. Rev. Genet</source>. <volume>11</volume>, <fpage>137</fpage>&#x02013;<lpage>148</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2734</pub-id><pub-id pub-id-type="pmid">20084086</pub-id></citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lappalainen</surname> <given-names>T.</given-names></name> <name><surname>Salmela</surname> <given-names>E.</given-names></name> <name><surname>Andersen</surname> <given-names>P. M.</given-names></name> <name><surname>Dahlman-Wright</surname> <given-names>K.</given-names></name> <name><surname>Sistonen</surname> <given-names>P.</given-names></name> <name><surname>Savontaus</surname> <given-names>M.-L.</given-names></name> <etal/></person-group>. (<year>2010</year>). <article-title>Genomic landscape of positive natural selection in Northern European populations</article-title>. <source>Eur. J. Hum. Genet</source>. <volume>18</volume>, <fpage>471</fpage>&#x02013;<lpage>478</lpage>. <pub-id pub-id-type="doi">10.1038/ejhg.2009.184</pub-id><pub-id pub-id-type="pmid">19844263</pub-id></citation>
</ref>
<ref id="B28a">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>H.</given-names></name></person-group> (<year>2011</year>). <article-title>A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>2987</fpage>&#x02013;<lpage>2993</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr509</pub-id><pub-id pub-id-type="pmid">21903627</pub-id></citation>
</ref>
<ref id="B28b">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Sidore</surname> <given-names>C.</given-names></name> <name><surname>Kang</surname> <given-names>H. M.</given-names></name> <name><surname>Boehnke</surname> <given-names>M.</given-names></name> <name><surname>Abecasis</surname> <given-names>G. R.</given-names></name></person-group> (<year>2011</year>). <article-title>Low-coverage sequencing: implications for design of complex trait association studies</article-title>. <source>Genome Res</source>. <volume>21</volume>, <fpage>940</fpage>&#x02013;<lpage>951</lpage>. <pub-id pub-id-type="doi">10.1101/gr.117259.110</pub-id><pub-id pub-id-type="pmid">21460063</pub-id></citation>
</ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Librado</surname> <given-names>P.</given-names></name> <name><surname>Rozas</surname> <given-names>J.</given-names></name></person-group> (<year>2009</year>). <article-title>DnaSP v5: a software for comprehensive analysis of DNA polymorphism data</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>1451</fpage>&#x02013;<lpage>1452</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp187</pub-id><pub-id pub-id-type="pmid">19346325</pub-id></citation>
</ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Majewski</surname> <given-names>J.</given-names></name> <name><surname>Schwartzentruber</surname> <given-names>J.</given-names></name> <name><surname>Lalonde</surname> <given-names>E.</given-names></name> <name><surname>Montpetit</surname> <given-names>A.</given-names></name> <name><surname>Jabado</surname> <given-names>N.</given-names></name></person-group> (<year>2011</year>). <article-title>What can exome sequencing do for you?</article-title> <source>J. Med. Genet</source>. <volume>48</volume>, <fpage>580</fpage>&#x02013;<lpage>589</lpage>. <pub-id pub-id-type="doi">10.1136/jmedgenet-2011-100223</pub-id><pub-id pub-id-type="pmid">21730106</pub-id></citation>
</ref>
<ref id="B31">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>McVean</surname> <given-names>G.</given-names></name></person-group> (<year>2014</year>). <source>LDHat</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://ldhat.sourceforge.net">http://ldhat.sourceforge.net</ext-link></citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meij&#x000F3;n</surname> <given-names>M.</given-names></name> <name><surname>Satbhai</surname> <given-names>S. B.</given-names></name> <name><surname>Tsuchimatsu</surname> <given-names>T.</given-names></name> <name><surname>Busch</surname> <given-names>W.</given-names></name></person-group> (<year>2013</year>). <article-title>Genome-wide association study using cellular traits identifies a new regulator of root development in Arabidopsis</article-title>. <source>Nat. Genet</source>. <volume>46</volume>, <fpage>77</fpage>&#x02013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1038/ng.2824</pub-id><pub-id pub-id-type="pmid">24212884</pub-id></citation>
</ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moradi</surname> <given-names>M. H.</given-names></name> <name><surname>Nejati-Javaremi</surname> <given-names>A.</given-names></name> <name><surname>Moradi-Shahrbabak</surname> <given-names>M.</given-names></name> <name><surname>Dodds</surname> <given-names>K.</given-names></name> <name><surname>McEwan</surname> <given-names>J.</given-names></name></person-group> (<year>2012</year>). <article-title>Genomic scan of selective sweeps in thin and fat tail sheep breeds for identifying of candidate regions associated with fat deposition</article-title>. <source>BMC Genet</source>. <volume>13</volume>:<fpage>10</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2156-13-10</pub-id><pub-id pub-id-type="pmid">22364287</pub-id></citation>
</ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Neale</surname> <given-names>D. B.</given-names></name> <name><surname>Kremer</surname> <given-names>A.</given-names></name></person-group> (<year>2011</year>). <article-title>Forest tree genomics: growing resources and applications</article-title>. <source>Nat. Rev. Genet</source>. <volume>12</volume>, <fpage>111</fpage>&#x02013;<lpage>122</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2931</pub-id><pub-id pub-id-type="pmid">21245829</pub-id></citation>
</ref>
<ref id="B34a">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nielsen</surname> <given-names>R.</given-names></name></person-group> (<year>2005</year>). <article-title>Molecular signatures of natural selection</article-title>. <source>Annu. Rev. Genet</source>. <volume>39</volume>, <fpage>197</fpage>&#x02013;<lpage>218</lpage>. <pub-id pub-id-type="doi">10.1146/annurev.genet.39.073003.112420</pub-id><pub-id pub-id-type="pmid">16285858</pub-id></citation>
</ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nievergelt</surname> <given-names>C. M.</given-names></name> <name><surname>Smith</surname> <given-names>D. W.</given-names></name> <name><surname>Kohlenberg</surname> <given-names>J. B.</given-names></name> <name><surname>Schork</surname> <given-names>N. J.</given-names></name></person-group> (<year>2004</year>). <article-title>Large-scale integration of human genetic and physical maps</article-title>. <source>Genome Res</source>. <volume>14</volume>, <fpage>1199</fpage>&#x02013;<lpage>1205</lpage>. <pub-id pub-id-type="doi">10.1101/gr.1475304</pub-id><pub-id pub-id-type="pmid">15140834</pub-id></citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Paten</surname> <given-names>B.</given-names></name> <name><surname>Herrero</surname> <given-names>J.</given-names></name> <name><surname>Beal</surname> <given-names>K.</given-names></name> <name><surname>Fitzgerald</surname> <given-names>S.</given-names></name> <name><surname>Birney</surname> <given-names>E.</given-names></name></person-group> (<year>2008a</year>). <article-title>Enredo and Pecan: genome-wide mammalian consistency-based multiple alignment with paralogs</article-title>. <source>Genome Res</source>. <volume>18</volume>, <fpage>1814</fpage>&#x02013;<lpage>1828</lpage>. <pub-id pub-id-type="doi">10.1101/gr.076554.108</pub-id><pub-id pub-id-type="pmid">18849524</pub-id></citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Paten</surname> <given-names>B.</given-names></name> <name><surname>Herrero</surname> <given-names>J.</given-names></name> <name><surname>Fitzgerald</surname> <given-names>S.</given-names></name> <name><surname>Beal</surname> <given-names>K.</given-names></name> <name><surname>Flicek</surname> <given-names>P.</given-names></name> <name><surname>Holmes</surname> <given-names>I.</given-names></name> <etal/></person-group>. (<year>2008b</year>). <article-title>Genome-wide nucleotide-level mammalian ancestor reconstruction</article-title>. <source>Genome Res</source>. <volume>18</volume>, <fpage>1829</fpage>&#x02013;<lpage>1843</lpage>. <pub-id pub-id-type="doi">10.1101/gr.076521.108</pub-id><pub-id pub-id-type="pmid">18849525</pub-id></citation>
</ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Purcell</surname> <given-names>S.</given-names></name> <name><surname>Neale</surname> <given-names>B.</given-names></name> <name><surname>Todd-Brown</surname> <given-names>K.</given-names></name> <name><surname>Thomas</surname> <given-names>L.</given-names></name> <name><surname>Ferreira</surname> <given-names>M. A. R.</given-names></name> <name><surname>Bender</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>PLINK: a toolset for whole-genome association and population-based linkage analysis</article-title>. <source>Am. J. Hum. Genet</source>. <volume>81</volume>, <fpage>559</fpage>&#x02013;<lpage>575</lpage>. <pub-id pub-id-type="doi">10.1086/519795</pub-id><pub-id pub-id-type="pmid">17701901</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pybus</surname> <given-names>M.</given-names></name> <name><surname>Dall&#x00027;Olio</surname> <given-names>G. M.</given-names></name> <name><surname>Luisi</surname> <given-names>P.</given-names></name> <name><surname>Uzkudun</surname> <given-names>M.</given-names></name> <name><surname>Carre&#x000F1;o-Torres</surname> <given-names>A.</given-names></name> <name><surname>Pavlidis</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>1000 Genomes Selection Browser 1.0: a genome browser dedicated to signatures of natural selection in modern humans</article-title>. <source>Nucleic Acids Res</source>. <volume>42</volume>, <fpage>D903</fpage>&#x02013;<lpage>D909</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkt1188</pub-id><pub-id pub-id-type="pmid">24275494</pub-id></citation>
</ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qanbari</surname> <given-names>S.</given-names></name> <name><surname>Gianola</surname> <given-names>D.</given-names></name> <name><surname>Hayes</surname> <given-names>B.</given-names></name> <name><surname>Schenkel</surname> <given-names>F.</given-names></name> <name><surname>Miller</surname> <given-names>S.</given-names></name> <name><surname>Moore</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Application of site and haplotype-frequency based approaches for detecting selection signatures in cattle</article-title>. <source>BMC Genomics</source> <volume>12</volume>:<fpage>318</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2164-12-318</pub-id><pub-id pub-id-type="pmid">21679429</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ram&#x000ED;rez-Soriano</surname> <given-names>A.</given-names></name> <name><surname>Nielsen</surname> <given-names>R.</given-names></name></person-group> (<year>2009</year>). <article-title>Correcting estimators of &#x003B8; and tajima&#x00027;s D for ascertainment biases caused by the single-nucleotide polymorphism discovery process</article-title>. <source>Genetics</source> <volume>181</volume>, <fpage>701</fpage>&#x02013;<lpage>710</lpage>. <pub-id pub-id-type="doi">10.1534/genetics.108.094060</pub-id><pub-id pub-id-type="pmid">19087964</pub-id></citation>
</ref>
<ref id="B42">
<citation citation-type="web"><person-group person-group-type="author"><collab>R Core Team.</collab></person-group> (<year>2014</year>). <source>R: A Language and Environment for Statistical Computing</source>. <publisher-loc>Vienna</publisher-loc>: <publisher-name>R Foundation for Statistical Computing</publisher-name>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://www.R-project.org/">http://www.R-project.org/</ext-link></citation>
</ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ronen</surname> <given-names>R.</given-names></name> <name><surname>Udpa</surname> <given-names>N.</given-names></name> <name><surname>Halperin</surname> <given-names>E.</given-names></name> <name><surname>Bafna</surname> <given-names>V.</given-names></name></person-group> (<year>2013</year>). <article-title>Learning natural selection from the site frequency spectrum</article-title>. <source>Genetics</source> <volume>195</volume>, <fpage>181</fpage>&#x02013;<lpage>193</lpage>. <pub-id pub-id-type="doi">10.1534/genetics.113.152587</pub-id><pub-id pub-id-type="pmid">23770700</pub-id></citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sabeti</surname> <given-names>P. C.</given-names></name> <name><surname>Reich</surname> <given-names>D. E.</given-names></name> <name><surname>Higgins</surname> <given-names>J. M.</given-names></name> <name><surname>Levine</surname> <given-names>H. Z. P.</given-names></name> <name><surname>Richter</surname> <given-names>D. J.</given-names></name> <name><surname>Schaffner</surname> <given-names>S. F.</given-names></name> <etal/></person-group>. (<year>2002</year>). <article-title>Detecting recent positive selection in the human genome from haplotype structure</article-title>. <source>Nature</source> <volume>419</volume>, <fpage>832</fpage>&#x02013;<lpage>837</lpage>. <pub-id pub-id-type="doi">10.1038/nature01140</pub-id><pub-id pub-id-type="pmid">12397357</pub-id></citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sabeti</surname> <given-names>P. C.</given-names></name> <name><surname>Schaffner</surname> <given-names>S. F.</given-names></name> <name><surname>Fry</surname> <given-names>B.</given-names></name> <name><surname>Lohmueller</surname> <given-names>J.</given-names></name> <name><surname>Varilly</surname> <given-names>P.</given-names></name> <name><surname>Shamovsky</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2006</year>). <article-title>Positive natural selection in the human lineage</article-title>. <source>Science</source> <volume>312</volume>, <fpage>1614</fpage>&#x02013;<lpage>1620</lpage>. <pub-id pub-id-type="doi">10.1126/science.1124309</pub-id><pub-id pub-id-type="pmid">16778047</pub-id></citation>
</ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sabeti</surname> <given-names>P. C.</given-names></name> <name><surname>Varilly</surname> <given-names>P.</given-names></name> <name><surname>Fry</surname> <given-names>B.</given-names></name> <name><surname>Lohmueller</surname> <given-names>J.</given-names></name> <name><surname>Hostetter</surname> <given-names>E.</given-names></name> <name><surname>Cotsapas</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>Genome-wide detection and characterization of positive selection in human populations</article-title>. <source>Nature</source> <volume>449</volume>, <fpage>913</fpage>&#x02013;<lpage>918</lpage>. <pub-id pub-id-type="doi">10.1038/nature06250</pub-id><pub-id pub-id-type="pmid">17943131</pub-id></citation>
</ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simonsen</surname> <given-names>K. L.</given-names></name> <name><surname>Churchill</surname> <given-names>G. A.</given-names></name> <name><surname>Aquadro</surname> <given-names>C. F.</given-names></name></person-group> (<year>1995</year>). <article-title>Properties of statistical tests of neutrality for DNA polymorphism data</article-title>. <source>Genetics</source> <volume>141</volume>, <fpage>413</fpage>&#x02013;<lpage>429</lpage>. <pub-id pub-id-type="pmid">8536987</pub-id></citation>
</ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tajima</surname> <given-names>F.</given-names></name></person-group> (<year>1989</year>). <article-title>Statistical method for testing the neutral mutation hypothesis by DNA polymorphism</article-title>. <source>Genetics</source> <volume>123</volume>, <fpage>585</fpage>&#x02013;<lpage>595</lpage>. <pub-id pub-id-type="pmid">2513255</pub-id></citation>
</ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname> <given-names>K.</given-names></name> <name><surname>Thornton</surname> <given-names>K. R.</given-names></name> <name><surname>Stoneking</surname> <given-names>M.</given-names></name></person-group> (<year>2007</year>). <article-title>A new approach for using genome scans to detect recent positive selection in the human genome</article-title>. <source>PLoS Biol</source>. <volume>5</volume>:<fpage>e171</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pbio.0050171</pub-id><pub-id pub-id-type="pmid">17579516</pub-id></citation>
</ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><collab>The Bovine HapMap Consortium.</collab></person-group> (<year>2009</year>). <article-title>Genome-wide survey of SNP variation uncovers the genetic structure of cattle breeds</article-title>. <source>Science</source> <volume>324</volume>, <fpage>528</fpage>&#x02013;<lpage>532</lpage>. <pub-id pub-id-type="doi">10.1126/science.1167936</pub-id><pub-id pub-id-type="pmid">19390050</pub-id></citation>
</ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><collab>The 1000 Genomes Project Consortium.</collab></person-group> (<year>2010</year>). <article-title>A map of human genome variation from population-scale sequencing</article-title>. <source>Nature</source> <volume>467</volume>, <fpage>1061</fpage>&#x02013;<lpage>1073</lpage>. <pub-id pub-id-type="doi">10.1038/nature09534</pub-id><pub-id pub-id-type="pmid">20981092</pub-id></citation>
</ref>
<ref id="B52">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Urbanek</surname> <given-names>S.</given-names></name></person-group> (<year>2011</year>). <source>multicore: Parallel Processing of R Code on Machines with Multiple Cores or CPUs</source>. R Package Version 0.1&#x02013;7. Available online at: <ext-link ext-link-type="uri" xlink:href="http://cran.r-project.org/package&#x0003D;multicore">http://cran.r-project.org/package&#x0003D;multicore</ext-link></citation>
</ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vitti</surname> <given-names>J. J.</given-names></name> <name><surname>Grossman</surname> <given-names>S. R.</given-names></name> <name><surname>Sabeti</surname> <given-names>P. C.</given-names></name></person-group> (<year>2013</year>). <article-title>Detecting natural selection in genomic data</article-title>. <source>Annu. Rev. Genet</source>. <volume>47</volume>, <fpage>97</fpage>&#x02013;<lpage>120</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-genet-111212-133526</pub-id><pub-id pub-id-type="pmid">24274750</pub-id></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Voight</surname> <given-names>B. F.</given-names></name> <name><surname>Kudaravalli</surname> <given-names>S.</given-names></name> <name><surname>Wen</surname> <given-names>X.</given-names></name> <name><surname>Pritchard</surname> <given-names>J. K.</given-names></name></person-group> (<year>2006</year>). <article-title>A map of recent positive selection in the human genome</article-title>. <source>PLoS Biol</source>. <volume>4</volume>:<fpage>e72</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pbio.0040072</pub-id><pub-id pub-id-type="pmid">16494531</pub-id></citation>
</ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weir</surname> <given-names>B. S.</given-names></name> <name><surname>Cockerham</surname> <given-names>C. C.</given-names></name></person-group> (<year>1984</year>). <article-title>Estimating F-statistics for the analysis of population structure</article-title>. <source>Evolution</source> <volume>38</volume>, <fpage>1358</fpage>&#x02013;<lpage>1370</lpage>. <pub-id pub-id-type="doi">10.2307/2408641</pub-id></citation>
</ref>
<ref id="B56">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wickham</surname> <given-names>H.</given-names></name></person-group> (<year>2009</year>). <source>ggplot2: Elegant Graphics for Data Analysis</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer</publisher-name>.</citation>
</ref>
<ref id="B57">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Williams</surname> <given-names>A. L.</given-names></name> <name><surname>Patterson</surname> <given-names>N.</given-names></name> <name><surname>Glessner</surname> <given-names>J.</given-names></name> <name><surname>Hakonarson</surname> <given-names>H.</given-names></name> <name><surname>Reich</surname> <given-names>D.</given-names></name></person-group> (<year>2012</year>). <article-title>Phasing of many thousands of genotyped samples</article-title>. <source>Am. J. Hum. Genet</source>. <volume>91</volume>, <fpage>238</fpage>&#x02013;<lpage>251</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajhg.2012.06.013</pub-id><pub-id pub-id-type="pmid">22883141</pub-id></citation>
</ref>
</ref-list>
</back>
</article>
