<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="review-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1092853</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2023.1092853</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioinformatics</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Enhancer/gene relationships: Need for more reliable genome-wide reference sets</article-title>
<alt-title alt-title-type="left-running-head">Hoellinger et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2023.1092853">10.3389/fbinf.2023.1092853</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Hoellinger</surname>
<given-names>Tristan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2175451/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Mestre</surname>
<given-names>Camille</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Aschard</surname>
<given-names>Hugues</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1010183/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Le Goff</surname>
<given-names>Wilfried</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1640973/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Foissac</surname>
<given-names>Sylvain</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/72510/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Faraut</surname>
<given-names>Thomas</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/992086/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Djebali</surname>
<given-names>Sarah</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1662295/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>IRSD</institution>, <institution>Universit&#xe9; de Toulouse, INSERM, INRAE, ENVT</institution>, <institution>Univ Toulouse III - Paul Sabatier (UPS)</institution>, <addr-line>Toulouse</addr-line>, <country>France</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>INSA Toulouse</institution>, <institution>INP-ENSEEIHT</institution>, <addr-line>Toulouse</addr-line>, <country>France</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>GenPhySE, Universit&#xe9; de Toulouse, INRAE, INPT, ENVT</institution>, <addr-line>Toulouse</addr-line>, <country>France</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Institut Pasteur</institution>, <institution>Universit&#xe9; Paris Cit&#xe9;</institution>, <institution>Department of Computational Biology</institution>, <addr-line>Paris</addr-line>, <country>France</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Program in Genetic Epidemiology and Statistical Genetics</institution>, <institution>Harvard T.H. Chan School of Public Health</institution>, <addr-line>Boston</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Sorbonne Universit&#xe9;</institution>, <institution>INSERM</institution>, <institution>Institute of Cardiometabolism and Nutrition (ICAN)</institution>, <institution>UMR_S1166</institution>, <addr-line>Paris</addr-line>, <country>France</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/126999/overview">Zhi-Ping Liu</ext-link>, Shandong University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1097142/overview">Oriol Fornes</ext-link>, University of British Columbia, Canada</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/142415/overview">Marcel H. Schulz</ext-link>, Goethe University Frankfurt, Germany</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Sarah Djebali, <email>sarah.djebali@inserm.fr</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Integrative Bioinformatics, a section of the journal Frontiers in Bioinformatics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>24</day>
<month>02</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>3</volume>
<elocation-id>1092853</elocation-id>
<history>
<date date-type="received">
<day>08</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>02</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Hoellinger, Mestre, Aschard, Le Goff, Foissac, Faraut and Djebali.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Hoellinger, Mestre, Aschard, Le Goff, Foissac, Faraut and Djebali</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Differences in cells&#x2019; functions arise from differential activity of regulatory elements, including enhancers. Enhancers are cis-regulatory elements that cooperate with promoters through transcription factors to activate the expression of one or several genes by getting physically close to them in the 3D space of the nucleus. There is increasing evidence that genetic variants associated with common diseases are enriched in enhancers active in cell types relevant to these diseases. Identifying the enhancers associated with genes and conversely, the sets of genes activated by each enhancer (the so-called enhancer/gene or E/G relationships) across cell types, can help understanding the genetic mechanisms underlying human diseases. There are three broad approaches for the genome-wide identification of E/G relationships in a cell type: 1) genetic link methods or eQTL, 2) functional link methods based on 1D functional data such as open chromatin, histone mark or gene expression and 3) spatial link methods based on 3D data such as HiC. Since 1) and 3) are costly, the current strategy is to develop functional link methods and to use data from 1) and 3) as reference to evaluate them. However, there is still no consensus on the best functional link method to date, and method comparison remain seldom. Here, we compared the relative performances of three recent methods for the identification of enhancer-gene links, <monospace>TargetFinder</monospace>, <monospace>Average-Rank</monospace>, and the <monospace>ABC model</monospace>, using the three latest benchmarks from the field: a reference that combines 3D and eQTL data, called <monospace>BENGI</monospace>, and two genetic screening references, called <monospace>CRiFF</monospace> and <monospace>CRiSPRi</monospace>. Overall, none of the three methods performed best on the three references. <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace> reference sets are likely more reliable, but <monospace>CRiFF</monospace> is not genome-wide and <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace> are mostly available on the K562 cancer cell line. The <monospace>BENGI</monospace> reference set is genome-wide but likely contains many false positives. This study therefore calls for new reliable and genome-wide E/G reference data rather than new functional link E/G identification methods.</p>
</abstract>
<kwd-group>
<kwd>gene expression regulation</kwd>
<kwd>identification of enhancer/gene relationships</kwd>
<kwd>method evaluation</kwd>
<kwd>chromatin structure</kwd>
<kwd>eQTL</kwd>
<kwd>functional genomic data</kwd>
<kwd>genetic screening</kwd>
</kwd-group>
<contract-sponsor id="cn001">AgreenSkills<named-content content-type="fundref-id">10.13039/501100018695</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">Institut National de la Sant&#xe9; et de la Recherche M&#xe9;dicale<named-content content-type="fundref-id">10.13039/501100001677</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Vertebrate organisms are made of billions of cells that all have the same genome, but able to deliver a wide range of biological functions. These functional differences are conveyed by the differential expression of genes across cell types, which is partly driven by the differential action of their regulatory elements (promoters, enhancers, insulators, etc.). Among those regulatory elements, enhancers are particularly interesting, not only because they are predominant and cover more genomic space (<xref ref-type="bibr" rid="B29">Pennacchio et al. (2013)</xref>), but also because they appear to play important roles in human diseases (<xref ref-type="bibr" rid="B42">Zhang et al. (2018)</xref>; <xref ref-type="bibr" rid="B28">Nasser et al. (2021)</xref>). Enhancers, like promoters, are DNA elements bound by transcription factors (TF). They are known to activate the expression of one or several genes by getting physically close to their promoters in the 3D space of the nucleus (<xref ref-type="bibr" rid="B20">Krivega and Dean (2012)</xref>; <xref ref-type="bibr" rid="B34">Schoenfelder and Fraser (2019)</xref>). There are several publicly available catalogs of enhancers covering many different cell types, especially for the human and the mouse genomes. Enhancers are typically identified experimentally, as, for example, in the VISTA catalog<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref>, or bioinformatically, according to functional genomic data: a combination of open chromatin, histone modification and insulator data in the case of the ENCODE catalog (<xref ref-type="bibr" rid="B26">Moore et al. (2020b)</xref>), and Cap Analysis Gene Expression (CAGE) data in the case of the FANTOM catalog (<xref ref-type="bibr" rid="B1">Andersson et al. (2014)</xref>). Nevertheless, the degree of reliability and the coverage of these catalogs remains limited.</p>
<p>The identification of enhancers and associated genes, i.e., which genes are the targets of which enhancers in a particular cell type, is an important objective in the field. There is increasing evidence that variants associated with common diseases are located in enhancers active in cell types relevant to these diseases (<xref ref-type="bibr" rid="B9">Corradin and Scacheri (2014)</xref>; <xref ref-type="bibr" rid="B21">Kundaje et al. (2015)</xref>). Understanding the enhancer/gene (E/G) relationships active in these particular cell types can help pinpointing important and potentially new genes associated with these diseases, and prioritizing variants in the context of genome-wide association studies (<xref ref-type="bibr" rid="B28">Nasser et al. (2021)</xref>). Nonetheless, this task faces important challenges because of the multivariate nature of the enhancer/gene relationship. Indeed, enhancers may 1) be far away from the genes they activate (up to several Mbp), 2) act either upstream or downstream from the activated genes, 3) activate several genes, and 4) need other enhancers to activate a given gene (<xref ref-type="bibr" rid="B20">Krivega and Dean (2012)</xref>; <xref ref-type="bibr" rid="B34">Schoenfelder and Fraser (2019)</xref>).</p>
<p>There are three broad approaches that are currently used for the genome-wide identification of E/G relationships in a given cell type (<xref ref-type="fig" rid="F1">Figure 1</xref>): 1) genetic link methods that identify eQTL genetic variants, potentially located in regulatory elements such as enhancers, using expression data (microarray, RNA-seq) applied to a given cell type (<xref ref-type="bibr" rid="B3">Bahcall (2015)</xref>; <xref ref-type="bibr" rid="B19">Kerimov et al. (2021)</xref>), 2) functional link methods that directly identify E/G using genome-wide functional genomic 1D data (open chromatin, histone mark, TF, gene expression) in one or several cell types (see next section), and 3) spatial link (3D) methods that predict E/G using a combination of genome-wide 1D and 3D data (promoter capture HiC, ChiA-PET, etc.) in a given cell type, under the assumption that true E/G relationships are in proximity in 3D space (<xref ref-type="bibr" rid="B18">Jung et al. (2019)</xref>; <xref ref-type="bibr" rid="B37">Tang et al. (2015)</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The genome-wide identification of enhancer/gene (E/G) relationships in a particular cell type. Illustration of the three broad approaches that have been described in the literature: <bold>(1)</bold> genetic link methods, <bold>(2)</bold> functional link methods and <bold>(3)</bold> spatial link methods. In panel (1) taken from <xref ref-type="bibr" rid="B6">Cheung and Spielman (2009)</xref>, the triangles and rectangles represent genetic variants and genes, respectively. When the variant is G the gene is highly expressed, and when it is C the gene is lowly expressed. This variant is said to be an eQTL of the gene, and if located in an enhancer the relationship between the variant and the gene becomes an E/G. Panel (2) illustrates a typical heuristic functional link method, which correlates chromatin accessibility in promoters and enhancers across several cell types and is described in more details in <xref ref-type="fig" rid="F2">Figure 2</xref> below. Panel (3) represents a squared heatmap where both the horizontal and the vertical axes represent the same portion of the genome divided into equal size bins. The darker the red in the cell, the closer the two regions in the 3D space of the nucleus according to HiC data. Apart from the diagonal, some points far from the diagonal indicate relationships that could be E/G if one of the bin lies in an enhancer and the other one lies at the transcription start site (TSS) or promoter of a gene.</p>
</caption>
<graphic xlink:href="fbinf-03-1092853-g001.tif"/>
</fig>
<p>Because genetic 1) and spatial link 3) methods are very costly and the generation of 3D data in spatial link methods requires a specific expertise, functional link methods 2) have become the most widely used approach to identify E/G relationships. This is confirmed by the plethora of functional link methods that have been developed since 2011 (see below). On the other hand, data underlying methods of types 1) and 3) are commonly considered as references to assess the reliability of methods of type 2) (see <xref ref-type="sec" rid="s2">Section 2</xref>).</p>
<p>Functional link methods, also reviewed in <xref ref-type="bibr" rid="B15">Hariprakash and Ferrari (2019)</xref>, can be divided into two broad categories: non-supervised/heuristic methods, and supervised machine learning methods. While the former generally use few types of functional genomic data in a large number of cell types, the latter use many types of functional genomic data in a reduced number of cell types. Broadly speaking, non-supervised methods use correlations between functional genomic signals present at enhancers and promoters across many cell types. Distance between promoters and enhancers as well as correlation thresholds are determined heuristically and the evaluation of the accuracy of the method is done <italic>a posteriori</italic> using external reference data (most often 3D or genetic) (<xref ref-type="bibr" rid="B10">Ernst et al. (2011)</xref>; <xref ref-type="bibr" rid="B36">Shen et al. (2012)</xref>; <xref ref-type="bibr" rid="B38">Thurman et al. (2012)</xref>; <xref ref-type="bibr" rid="B35">Sheffield et al. (2013)</xref>; <xref ref-type="bibr" rid="B1">Andersson et al. (2014)</xref>; <xref ref-type="bibr" rid="B8">Corradin et al. (2014)</xref>; <xref ref-type="bibr" rid="B41">Yao et al. (2015)</xref>; <xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>; <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>). For illustration purposes, an example of such unsupervised/heuristic methods, the open chromatin correlation method, is provided in <xref ref-type="fig" rid="F2">Figure 2</xref> (see <italic>Material and Methods</italic> for details).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Example of a non-supervised/heuristic method for the identification of E/G in a cell type: the open chromatin correlation method. A common approach to identify candidate E/G is to investigate the correlation between chromatin accessibility signal at two regions across several cell types. The plot represents a portion of the human genome (from position 64,502,213 to position 64,511,220 of chromosome 1 on the hg19 human genome assembly) in IGV (Integrative Genomics Viewer). The horizontal tracks represent ENCODE DNAse-seq signal in 10 different cell types, followed by gene annotation (Refseq) in dark blue and by DNAse-seq consensus peaks from the 10 cell types in light blue. The vertical green rectangles highlight two consensus peaks (and their signal) that have a high (more than 0.7) Pearson correlation between the <italic>log</italic>10 of their normalized accessibilty across the 10 cell types (see <italic>Material and methods</italic>). If one of the consensus peaks was at the transcription start site (TSS) of a gene, then it would typically be interpreted as an E/G.</p>
</caption>
<graphic xlink:href="fbinf-03-1092853-g002.tif"/>
</fig>
<p>The second category of methods uses machine learning approaches such as random forests or neural networks. They consist in training a model to discriminate true vs false E/G based on distinctive features from the 1D data they use, from a reference dataset of known E/G (ground positives, most often a combination of 1D data for enhancer and promoter identification and 3D or genetic data for the relationship identification), and a dataset of unsupported E/G (ground negatives), as a negative control. When provided with new data, the model determines which E/G are more likely to be true (<xref ref-type="bibr" rid="B32">R&#xf6;delsperger et al. (2011)</xref>; <xref ref-type="bibr" rid="B2">Aran et al. (2013)</xref>; <xref ref-type="bibr" rid="B16">He et al. (2014)</xref>; <xref ref-type="bibr" rid="B33">Roy et al. (2015)</xref>; <xref ref-type="bibr" rid="B39">Whalen et al. (2016)</xref>; <xref ref-type="bibr" rid="B5">Cao et al. (2017)</xref>; <xref ref-type="bibr" rid="B40">Yang et al. (2017)</xref>; <xref ref-type="bibr" rid="B14">Hait et al. (2018)</xref>; <xref ref-type="bibr" rid="B23">Li et al. (2019)</xref>; <xref ref-type="bibr" rid="B4">Belokopytova et al. (2020)</xref>; <xref ref-type="bibr" rid="B17">Hong et al. (2020)</xref>; <xref ref-type="bibr" rid="B11">Fan and Peng (2022)</xref>).</p>
</sec>
<sec id="s2">
<title>2 Evaluating the most recent functional link methods</title>
<p>Two recent studies evaluated functional link methods (<xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>; <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>). However, they did not evaluate the same methods and did not rely on the same reference data. In order to extend the evaluation of existing methods, we assessed the best performing methods of these two studies on the two reference sets they proposed. We also included a third reference set from a recent extended genetic screening analysis (<xref ref-type="bibr" rid="B13">Gasperini et al. (2019)</xref>).</p>
<p>The first study (<xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>) proposes a new unsupervised/heuristic method called the <monospace>Activity-By-Contact (ABC) model</monospace> that performs best in their evaluation. The second study (<xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>) separately evaluates unsupervised/heuristic and supervised machine learning methods. Within the first category, they propose a new method, called <monospace>Average-Rank</monospace>, that performs best within its category, while in the second category they identify <monospace>TargetFinder</monospace> (<xref ref-type="bibr" rid="B39">Whalen et al. (2016)</xref>) as the best performing one. <monospace>TargetFinder</monospace> also performed best overall. Those are the methods that will be evaluated here, together with the simplest baseline <monospace>distance method</monospace>, that consists in assigning an enhancer to its closest gene.</p>
<sec id="s2-1">
<title>2.1 Description of the evaluated methods</title>
<p>The <monospace>ABC model</monospace> defines the score of a potential E/G in a cell type as the product of the activity of the potential enhancer <italic>E</italic> in this cell type, and the contact between <italic>E</italic> and gene <italic>G</italic>, divided by the sum of the same products but across all potential enhancers in a 5&#xa0;<italic>Mb</italic> region from <italic>G</italic>. The <monospace>ABC model</monospace> starts by defining candidate regulatory regions <italic>E</italic>, as regions of open chromatin (defined by either DNAse-seq or ATAC-seq) in a cell type. It then quantifies the enhancer activity (<italic>A</italic>) of these regions <italic>E</italic> by computing the geometric mean of the read counts of chromatin accessibility (usually assessed using DNAse-seq or ATAC-seq) and H3K27ac ChIP-seq at <italic>E</italic>. The contact (<italic>C</italic>) between <italic>E</italic> and <italic>G</italic> is then computed either as the Knight-Ruiz (KR) matrix-balancing normalized Hi-C contact frequency between <italic>E</italic> and the promoter of gene <italic>G</italic>, if cell type specific Hi-C data are available, or simply as the inverse of the distance (fractal globule model) between <italic>E</italic> and <italic>G</italic> otherwise (<xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>). In order to predict E/G only for expressed genes, the <monospace>ABC model</monospace> can either take cell type specific gene expression data in, or consider as a proxy of gene expression, the activity of its promoter as defined above using chromatin accessibility and H3K27ac ChIP-seq data<xref ref-type="fn" rid="fn2">
<sup>2</sup>
</xref>.</p>
<p>The <monospace>Average-Rank method</monospace> defines the score of a potential E/G as the inverse of the average of the ranks provided by the <monospace>Sheffield</monospace> and the <monospace>distance</monospace> methods (<xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>). The <monospace>Sheffield method</monospace> was introduced in 2013 and defines the score of a potential E/G as the Pearson correlation between the logarithm of the chromatin accessibility at <italic>E</italic> (assessed by DNAse-seq) and the logarithm of the expression of <italic>G</italic> across many cell types (<xref ref-type="bibr" rid="B35">Sheffield et al. (2013)</xref>). The <monospace>distance method</monospace> scores a potential E/G as the inverse of the distance between <italic>E</italic> and <italic>G</italic>. Here potential enhancers are all distal enhancer elements (distal enhancer like signature elements or <monospace>dELS</monospace>) of the ENCODE registry of candidate cis-regulatory elements (<monospace>cCREs</monospace>) (<xref ref-type="bibr" rid="B26">Moore et al. (2020b)</xref>).</p>
<p>
<monospace>TargetFinder</monospace> defines true (ground positive) E/G based on 3D data (HiC) and learns features associated to those using gradient boosting. The learnt features are as diverse as open chromatin, methylation, histone marks or transcription factors, and can both be taken from enhancer and promoter regions and from the window between them (<xref ref-type="bibr" rid="B39">Whalen et al. (2016)</xref>). Indeed its authors showed that features located in enhancer-promoter windows (EPW) are also predictive of true E/G relationships and should be incorporated in the model.</p>
<p>The two first link methods mentioned above also propose their own reference/evaluation datasets.</p>
</sec>
<sec id="s2-2">
<title>2.2 Description of the reference sets used for the evaluation</title>
<p>
<xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>&#x2019;s reference set is based on previous CRISPR-based experiments performed in K562 cells and on the output of a new genetic screening technique developed by the authors, called <monospace>CRISPRi-FlowFISH</monospace>. This technique was specifically designed to predict E/G in a cell type for a given small number of genes. As stated by the authors, it perturbs &#x201c;hundreds of non-coding elements in parallel and quantifies their effects on the expression of an RNA of interest, combining CRISPR interference, RNA fluorescence <italic>in situ</italic> hybridization (FISH) and flow cytometry&#x201d;. In this approach, they &#x201c;deliver KRAB-dCas9 to many candidate regulatory elements in a population of cells by using a library of guide RNAs&#x201d;. The results of this technique are then subjected to a statistical framework to determine the sets of E/G that are active and inactive in the cell type. The technique was then applied to thirty genes in five genomic regions (spanning 1.1&#x2013;4.0&#xa0;<italic>Mb</italic>) for which they tested all DNase I hypersensitive (DHS) elements (representing open chromatin regions) in K562 cells within 450&#xa0;<italic>kb</italic> of the gene of interest. Together with previous CRISPR experiments, this approach yielded 109 ground positives (i.e., &#x201c;positive in the evaluation set&#x201d;) and 3,754 ground negatives (i.e., &#x201c;negative in the evaluation set&#x201d;) E/G, which are considered as a reference set for the evaluation of numerous methods of the field including the <monospace>ABC model</monospace> and the <monospace>distance method</monospace> (<xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>). We will use this reference set here and call it <monospace>CRiFF</monospace> (<xref ref-type="table" rid="T1">Table 1</xref>). Note that the 30 selected genes had an RPKM expression level above 20 in K562, and that some of them were erythroid-specific while others were ubiquitous. No filtering on chromatin accessibility level was applied to open chromatin regions, however the sequences of the probes that were designed to target open chromatin regions through gRNAs had to be specific enough. Fulco et al. also garantee a 5% FDR to detect E/G and more than 80% power to detect a 25% effect on gene expression with <monospace>CRiFF</monospace>. Finally it has to be noted that the 109 ground positives and the 3,754 ground negatives of <monospace>CRiFF</monospace> exclude repressive elements and promoter-promoter interactions (interactions where the targeted element is located less than 500<italic>bp</italic> away from a TSS).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Number of ground positive and ground negative E/G relationships for each of the three evaluation sets considered, namely, <monospace>BENGI</monospace>, <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace>.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Evaluation set (cell type)</th>
<th align="center">Source data type</th>
<th align="center">&#x23; Ground positive E/G Relationships</th>
<th align="center">&#x23; Ground negative E/G Relationships</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="6" align="center">
<monospace>BENGI</monospace> (GM12878)</td>
<td align="center">GEUVADIS eQTL</td>
<td align="center">2,073</td>
<td align="center">48,926</td>
</tr>
<tr>
<td align="center">CHi-C</td>
<td align="center">88,245</td>
<td align="center">287,483</td>
</tr>
<tr>
<td align="center">CTCF ChIA-PET</td>
<td align="center">7,591</td>
<td align="center">97,425</td>
</tr>
<tr>
<td align="center">GTEx eQTL</td>
<td align="center">1,301</td>
<td align="center">36,899</td>
</tr>
<tr>
<td align="center">HiC</td>
<td align="center">3,404</td>
<td align="center">150,335</td>
</tr>
<tr>
<td align="center">RNA polII ChIA-PET</td>
<td align="center">23,699</td>
<td align="center">133,536</td>
</tr>
<tr>
<td align="center">
<monospace>CRiFF</monospace> (K562)</td>
<td align="center">
<monospace>CRiFF</monospace>
</td>
<td align="center">109</td>
<td align="center">3,754</td>
</tr>
<tr>
<td align="center">
<monospace>CRISPRi</monospace> (K562)</td>
<td align="center">
<monospace>CRISPRi</monospace>
</td>
<td align="center">651</td>
<td align="center">24,576</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To complement <monospace>CRiFF</monospace> which is rather small, we decided to use another recent genetic screening set that differs from <monospace>CRiFF</monospace> in being enhancer-centric instead of gene-centric. It was also available on K562 and could be retrieved from <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>&#x2019;s reference set. It is made of 651 ground positive and 24,576 ground negative E/G relationships, and we will call it <monospace>CRISPRi</monospace> (<xref ref-type="table" rid="T1">Table 1</xref>).</p>
<p>Moore et al.&#x2018;s reference set is entitled <monospace>BENGI</monospace> (Benchmark of candidate Enhancer-Gene Interactions) and is made of sets of E/G active and inactive in different cell lines according to different types of data (3D, genetic). We focus our evaluation on the GM12878 cell line, which has the largest amount of annotation data, with 6 sets of active and inactive E/G available. The active E/G sets result from the processing of four types of 3D data, Hi-C (<xref ref-type="bibr" rid="B30">Rao et al. (2014)</xref>) and promoter capture Hi-C (<xref ref-type="bibr" rid="B24">Mifsud et al. (2015)</xref>) data and ChiA-PET of polymerase II and CTCF (<xref ref-type="bibr" rid="B37">Tang et al. (2015)</xref>) data, and of eQTL data from two different studies, GEUVADIS (<xref ref-type="bibr" rid="B22">Lappalainen et al. (2013)</xref>) and GTEx (<xref ref-type="bibr" rid="B7">Consortium et al. (2015)</xref>). The sets of ground negatives are built by taking, for each enhancer of a positive set, all the genes not connected to it in the positive set and lying within the 95 percentile of the positive set distances from it. The number of ground positive and negative E/G obtained are indicated in <xref ref-type="table" rid="T1">Table 1</xref>. Since 3D and eQTL data are not specifically generated to identify E/G relationships, the <monospace>BENGI</monospace> reference sets are expected to be overall less reliable than the <monospace>CRiFF</monospace> and the <monospace>CRISPRi</monospace> reference sets. However, the fact that <monospace>BENGI</monospace> provides genome-wide information is an advantage over <monospace>CRiFF</monospace>.</p>
<p>Given all these data we proceeded to the evaluation of the <monospace>ABC model</monospace>, the <monospace>Average-Rank method</monospace>, the <monospace>distance method</monospace>, and <monospace>TargetFinder</monospace>, on all three reference datasets: <monospace>BENGI</monospace>, <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace>.</p>
</sec>
<sec id="s2-3">
<title>2.3 Description of the evaluation</title>
<p>For the <monospace>ABC model</monospace>, the <monospace>Average-Rank method</monospace> and the <monospace>distance method</monospace>, we used the code provided by the authors (<xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>; <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>), with some adjustments, while for the last one we downloaded the predictions provided by the authors (<xref ref-type="bibr" rid="B39">Whalen et al. (2016)</xref>)<xref ref-type="fn" rid="fn3">
<sup>3</sup>
</xref>. The obtained results are presented on <xref ref-type="fig" rid="F3">Figure 3</xref> for <monospace>BENGI</monospace>, and on <xref ref-type="fig" rid="F4">Figure 4</xref> for <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace> (see <italic>Material and methods</italic>). Note that while Fulco et al. provided a code associated to their proposed method<xref ref-type="fn" rid="fn4">
<sup>4</sup>
</xref>, Moore et al. only provided a code for the evaluation of their proposed method on a given evaluation set, which is less generic<xref ref-type="fn" rid="fn5">
<sup>5</sup>
</xref>. In addition, the fact that we used <monospace>TargetFinder</monospace>&#x2019;s already thresholded predictions only allowed us to compute a single pair of (precision, recall) values for each reference set, and explains the absence of AUPR curves for this tool. In fact, two different pairs of (precision, recall) values, a pessimistic one and an optimistic one, were computed and plotted for <monospace>TargetFinder</monospace>, leading to two different dots for <monospace>TargetFinder</monospace> on the plots, <monospace>TargetFinder_pes</monospace> and <monospace>TargetFinder_opt</monospace> (see <italic>Material and methods</italic> for details).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Performances of <monospace>Average-Rank</monospace>, <monospace>distance</monospace>, <monospace>ABC model</monospace> and <monospace>TargetFinder</monospace> methods on the six datasets of the GM12878 <monospace>BENGI</monospace> evaluation set (all pairs, natural ratio). For each method except <monospace>TargetFinder</monospace>, a Precision-Recall curve and an AUPR (Area Under the Precision-Recall curve) are provided. For <monospace>TargetFinder</monospace>, the two dots, <monospace>TargetFinder_pes</monospace> and <monospace>TargetFinder_opt</monospace>, correspond to two different ways of computing recall, a pessimistic and an optimistic one (see <italic>Material and methods</italic> and <xref ref-type="table" rid="T2">Table 2</xref> for more details).</p>
</caption>
<graphic xlink:href="fbinf-03-1092853-g003.tif"/>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Performances of <monospace>ABC model</monospace>, <monospace>distance</monospace>, <monospace>Average-Rank</monospace> and <monospace>TargetFinder</monospace> methods on the K562 <monospace>CRiFF</monospace> <bold>(A)</bold> and <monospace>CRISPRi</monospace> <bold>(B)</bold> evaluation sets. For each method except <monospace>TargetFinder</monospace>, a Precision-Recall curve and an AUPR (Area Under the Precision-Recall curve) are provided. For <monospace>TargetFinder</monospace>, the two dots, <monospace>TargetFinder_pes</monospace> and <monospace>TargetFinder_opt</monospace>, correspond to two different ways of computing recall, a pessimistic and an optimistic one (see <italic>Material and methods</italic> and <xref ref-type="table" rid="T2">Table 2</xref> for more details).</p>
</caption>
<graphic xlink:href="fbinf-03-1092853-g004.tif"/>
</fig>
<p>We replicated the results of the two evaluation papers (<xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>; <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>). The curves and AUPRs (Area Under the Precision-Recall curve) of the <monospace>Average-Rank</monospace> and the <monospace>distance</monospace> methods of <xref ref-type="fig" rid="F3">Figure 3</xref> are in agreement with Figure S2 of <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref> derived for GM12878 cell line (all pairs, natural ratio). Similarly, the curves and AUPRs of the <monospace>ABC model</monospace> and the <monospace>distance method</monospace> of <xref ref-type="fig" rid="F4">Figure 4A</xref> agree with <xref ref-type="fig" rid="F3">Figure 3A</xref> of <xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>. Like <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>, we also found that <monospace>TargetFinder</monospace> performs better than <monospace>Average-Rank</monospace>, except on eQTL reference sets. Altogether, these positive controls confirmed the validity of the pipeline we implemented.</p>
<p>
<xref ref-type="fig" rid="F3">Figure 3</xref> further shows low AUPR values for the first three methods on the six <monospace>BENGI</monospace> datasets, and that <monospace>TargetFinder</monospace> performs best overall, followed by <monospace>Average-Rank</monospace>, <monospace>distance</monospace> and <monospace>ABC model</monospace>. Note that <monospace>TargetFinder</monospace> is much more precise than sensitive, and performs much better on HiC and CTCF sets. This last result can be explained by the fact that <monospace>TargetFinder</monospace> learns true E/G based on HiC data. Nevertheless, <xref ref-type="fig" rid="F4">Figure 4A</xref> shows larger AUPRs for the three last methods on the <monospace>CRiFF</monospace> set, and that the <monospace>ABC model</monospace> performs best (<italic>AUPR</italic> &#x3d; 0.63), before the <monospace>distance method</monospace> and finally the <monospace>Average-Rank method</monospace>. Contrary to its result on <monospace>BENGI</monospace>, <monospace>TargetFinder</monospace> does not perform well on <monospace>CRiFF</monospace>.</p>
<p>When comparing the performances of the methods on <monospace>CRISPRi</monospace> with respect to <monospace>CRiFF</monospace>, if the methods that perform best (<monospace>ABC model</monospace>) and worst (<monospace>TargetFinder</monospace>) are the same, it has to be noted that <monospace>Average-Rank</monospace> performs better than <monospace>distance</monospace> and that the AUPRs are globally lower (<xref ref-type="fig" rid="F4">Figure 4B</xref>). It is also important to note that on half of the evaluation sets used here, the number of <monospace>TargetFinder</monospace>&#x2019;s validated E/G relationships is lower or equal to 9 (<xref ref-type="table" rid="T2">Table 2</xref>), showing that this tool is not easily applicable and might only deliver predictions for small subsets of references.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>
<monospace>TargetFinder</monospace>&#x2019;s performances on <monospace>BENGI</monospace>, <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace>. <italic>&#x23;Predicted</italic> refers to the number of positive (TP &#x2b; FP) and negative (TN &#x2b; FN) predictions that were also in the reference set, and is used to compute Precision. <italic>Ground positives pes.</italic> refers to the total number of ground positive elements in the reference set, while <italic>Ground Positives opt.</italic> refers to the subset of those that were also in the set of E/G relationships <monospace>TargetFinder</monospace> started from. <italic>Recall pes.</italic> and <italic>Recall opt.</italic> are computed from <italic>Ground Positives pes.</italic> and <italic>Ground Positives opt.</italic> respectively, therefore corresponding to pessimistic and optimistic estimations of recall respectively.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Evaluation set</th>
<th align="center">Source</th>
<th align="center">True positives</th>
<th align="center">&#x23;Predicted</th>
<th align="center">Ground positives pes</th>
<th align="center">Ground positives opt</th>
<th align="center">Precision (in %)</th>
<th align="center">Recall pes (in %)</th>
<th align="center">Recall opt (in %)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="6" align="center">
<monospace>BENGI</monospace> (GM12878)</td>
<td align="center">GEUVADIS eQTL</td>
<td align="center">9</td>
<td align="center">35</td>
<td align="center">2,073</td>
<td align="center">61</td>
<td align="center">25.7</td>
<td align="center">0.43</td>
<td align="center">14.75</td>
</tr>
<tr>
<td align="center">CHi-C</td>
<td align="center">342</td>
<td align="center">456</td>
<td align="center">88,245</td>
<td align="center">2,986</td>
<td align="center">75.0</td>
<td align="center">0.39</td>
<td align="center">11.45</td>
</tr>
<tr>
<td align="center">CTCF ChIA-PET</td>
<td align="center">143</td>
<td align="center">211</td>
<td align="center">7,591</td>
<td align="center">382</td>
<td align="center">67.8</td>
<td align="center">1.88</td>
<td align="center">37.43</td>
</tr>
<tr>
<td align="center">GTEx eQTL</td>
<td align="center">2</td>
<td align="center">15</td>
<td align="center">1,301</td>
<td align="center">33</td>
<td align="center">13.3</td>
<td align="center">0.15</td>
<td align="center">6.06</td>
</tr>
<tr>
<td align="center">HiC</td>
<td align="center">564</td>
<td align="center">592</td>
<td align="center">3,404</td>
<td align="center">792</td>
<td align="center">95.3</td>
<td align="center">16.57</td>
<td align="center">71.21</td>
</tr>
<tr>
<td align="center">Pol II ChIA-PET</td>
<td align="center">222</td>
<td align="center">290</td>
<td align="center">23,699</td>
<td align="center">911</td>
<td align="center">76.6</td>
<td align="center">0.94</td>
<td align="center">24.37</td>
</tr>
<tr>
<td align="center">
<monospace>CRiFF</monospace> (K562)</td>
<td align="center">
<monospace>CRiFF</monospace>
</td>
<td align="center">4</td>
<td align="center">17</td>
<td align="center">103</td>
<td align="center">6</td>
<td align="center">23.5</td>
<td align="center">3.88</td>
<td align="center">66.67</td>
</tr>
<tr>
<td align="center">
<monospace>CRISPRi</monospace> (K562)</td>
<td align="center">
<monospace>CRISPRi</monospace>
</td>
<td align="center">3</td>
<td align="center">10</td>
<td align="center">651</td>
<td align="center">35</td>
<td align="center">30.0</td>
<td align="center">0.46</td>
<td align="center">12.0</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Therefore, state-of-the-art E/G identification methods do not perform very well overall, and using 3D or genetic screening data as reference provides completely opposite answers to the question of the best performing E/G identification method.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s3">
<title>3 Discussion</title>
<p>The poor performance (small precision values even for small recall values) of the <monospace>ABC model</monospace> on the <monospace>BENGI</monospace> sets could be due to the fact that <monospace>BENGI</monospace>&#x2019;s underlying data (HiC, promoter capture HiC, ChiA-PET and RNA-seq) were not specifically designed to identify E/G relationships. For instance, some E/G relationships may not need spatial proximity or the presence of CTCF to operate (<xref ref-type="bibr" rid="B31">Ray-Jones and Spivakov (2021)</xref>). Likewise the presence of an eQTL in a predicted enhancer does not necessarily imply the presence of an E/G relationship. The poor performances of the <monospace>Average-Rank</monospace> and the <monospace>TargetFinder</monospace> methods (small precision values even for small recall values) on the <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace> data are more difficult to explain as these techniques should be quite exhaustive in identifying the enhancers of a given gene. However, the authors of <monospace>CRiFF</monospace> state in their paper that &#x201c;CRISPRi might fail to discover certain regulatory elements, for example, due to differential sensitivity to KRAB-mediated inhibition&#x201d; (<xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>). The <monospace>CRISPRi</monospace> set also only includes intergenic enhancers, which could seem quite restrictive knowing that there should be a large number of intronic enhancers as well. Why this would affect the <monospace>Average-Rank</monospace> and <monospace>TargetFinder</monospace> methods more than the <monospace>ABC model</monospace> still requires further investigation. Looking for a good compromise between the two types of evaluations, at first glance, the baseline <monospace>distance method</monospace> could appear as the best one, with the most stable results across evaluation sets. However, in addition to the fact that it is one of the worst methods on <monospace>CRISPRi</monospace> (<xref ref-type="fig" rid="F4">Figure 4B</xref>), we know this method does not work well in many cases (<xref ref-type="bibr" rid="B20">Krivega and Dean (2012)</xref>; <xref ref-type="bibr" rid="B27">Mumbach et al. (2017)</xref>; <xref ref-type="bibr" rid="B28">Nasser et al. (2021)</xref>). Altogether our results illustrate the challenge in defining the best approach for E/G inference.</p>
<p>Because they were specifically designed to identify E/G relationships for a selected set of genes or enhancers, the <monospace>CRiFF</monospace> and the <monospace>CRISPRi</monospace> techniques seem to be better suited to generate true E/G reference/evaluation data. Therefore if we really had to select an E/G relationship identification method, then we would choose the one that performs best on the <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace> data, namely, the <monospace>ABC model</monospace>. Another, more practical, reason to select the <monospace>ABC model</monospace> over the <monospace>Average-Rank</monospace> and <monospace>TargetFinder</monospace> methods is that a dedicated and more importantly well documented software has been made available to the community by its authors<xref ref-type="fn" rid="fn6">
<sup>6</sup>
</xref>. Implementing the <monospace>Average-Rank</monospace> and <monospace>TargetFinder</monospace> methods can be more challenging. Finally, contrary to the <monospace>Average-Rank method</monospace> that requires gene expression and chromatin accessibility data on many cell types (here 112), and to <monospace>TargetFinder</monospace> that requires tens of data types on the cell type of interest, the <monospace>ABC model</monospace> only requires two types of data (open chromatin and H3K27ac) on the cell type of interest. This substantially broadens the scope of application, as the actual amount of data available is likely going to be limited in many real data settings.</p>
<p>One of the limitations of the <monospace>CRiFF</monospace> technique is that it does not provide genome-wide results. In the present study, the <monospace>CRiFF</monospace> data we used cover 58 different genes located in 21 different genomic regions ranging from 1<italic>Mb</italic> to 4&#xa0;<italic>Mb</italic> in size, which represents less than 1 percent of the genome. The <monospace>CRISPRi</monospace> technique is expected to be more representative of the genome, but it produced results that were similar to <monospace>CRiFF</monospace>. Another potential bias could come from the use of the K562 cancer cell line which is the only cell line for which there was sufficient <monospace>CRiFF</monospace> data. Even if the authors have performed more CRISPRi-FlowFISH experiments since our study (283 true validated and 5,756 false E/G in 11 cell types, <xref ref-type="bibr" rid="B28">Nasser et al. (2021)</xref>), this type of reference data remains not genome-wide and biased toward cancer cell lines, like <monospace>CRISPRi</monospace>.</p>
<p>Altogether our results call for the generation of more complete and reliable E/G relationship reference/evaluation data, rather than for new more elaborate E/G relationship identification methods, such as the ones that are currently being developed. A more reliable genome-wide set of E/G would indeed allow to better evaluate the numerous already existing E/G relationship identification methods that are based on 1D data (i.e., functional link methods), in order to finally reach a consensus in this field, and be able to answer numerous questions related to cell function and disease.</p>
</sec>
<sec sec-type="materials|methods" id="s4">
<title>4 Materials and methods</title>
<sec id="s4-1">
<title>4.1 Pairwise chromatin accessibility correlation across cell types</title>
<p>In order to illustrate unsupervised/heuristic enhancer/gene identification methods, we chose the simplest one, the pairwise chromatin accessibility correlation across cell types, and represented it on <xref ref-type="fig" rid="F2">Figure 2</xref>. The actions that led to this figure are the following: we first downloaded the ENCODE uniformly processed read alignments (<monospace>bam</monospace> files) of DNAse-seq data (single end) from 10 cell types: stomach, HepG2, K562, thymus, adrenal gland, small intestine, GM12878, IMR-90, heart and H1-hESC, with accession numbers provided in <xref ref-type="table" rid="T3">Table 3</xref>. We then called the chromatin accessibility peaks from the mapped reads in each cell type using macs2<xref ref-type="fn" rid="fn7">
<sup>7</sup>
</xref> (<xref ref-type="fig" rid="F2">Figure 2</xref>; <xref ref-type="table" rid="T3">Table 3</xref>).</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>ENCODE cell types and accession numbers of associated DNA-seq alignment <monospace>bam</monospace> files.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">ENCODE cell type</th>
<th align="center">
<monospace>bam</monospace> file accession number</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">stomach</td>
<td align="center">ENCFF703DYP</td>
</tr>
<tr>
<td align="center">HepG2</td>
<td align="center">ENCFF343CEI</td>
</tr>
<tr>
<td align="center">K562</td>
<td align="center">ENCFF224FMI</td>
</tr>
<tr>
<td align="center">thymus</td>
<td align="center">ENCFF067LVL</td>
</tr>
<tr>
<td align="center">adrenal gland</td>
<td align="center">ENCFF900LLD</td>
</tr>
<tr>
<td align="center">small intestine</td>
<td align="center">ENCFF315TUQ</td>
</tr>
<tr>
<td align="center">GM12878</td>
<td align="center">ENCFF246VVI</td>
</tr>
<tr>
<td align="center">IMR-90</td>
<td align="center">ENCFF775ZJX</td>
</tr>
<tr>
<td align="center">heart</td>
<td align="center">ENCFF923SKV</td>
</tr>
<tr>
<td align="center">H1-hESC</td>
<td align="center">ENCFF869SQU</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>We obtained from about 60,000 (GM12878) to about 200,000 (IMR-90) peaks per cell type. By concatenating, sorting and merging on the genome the peaks called in each cell type using <monospace>bedtools merge</monospace>, we then obtained 473,766 consensus peaks across all cell types. We then quantified the chromatin accessibility of the 473,766 consensus peaks in each cell type by simply counting the number of mapped reads of each cell type overlapping each consensus peak using <monospace>bedtools intersect</monospace>, and normalized the number of reads of each peak in each cell type by the total number of mapped reads in peaks for this cell type. Finally we computed the consensus peak pairwise Pearson correlation between the <italic>log</italic>10 of the normalized chromatin accessibility across the 10 cell types of these peaks for all pairs of peaks less distant than 500&#xa0;<italic>kb</italic> using a script that we wrote: <monospace>compute_correlations.py</monospace>
<xref ref-type="fn" rid="fn8">
<sup>8</sup>
</xref>. We then only considered as E/G relationships, the pairs of peaks with a correlation above 0.7 and for which one of the two peaks overlapped the most 5&#x2019; bp (TSS) of a Gencode v19<xref ref-type="fn" rid="fn9">
<sup>9</sup>
</xref> gene (vertical green rectangles on <xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
</sec>
<sec id="s4-2">
<title>4.2 Method evaluation</title>
<p>In addition to evaluating the <monospace>ABC model</monospace> on <monospace>BENGI</monospace> and the <monospace>Average-Rank</monospace> and the <monospace>TargetFinder</monospace> methods on <monospace>CRiFF</monospace>, and since Moore et al. provided the code of the <monospace>Average-Rank method</monospace>, and Fulco et al. the code to run the <monospace>ABC model</monospace>, we decided to try and reproduce the evaluation of the <monospace>Average-Rank method</monospace> on <monospace>BENGI</monospace> and of the <monospace>ABC model</monospace> on <monospace>CRiFF</monospace>. We also used the code of the <monospace>distance method</monospace> provided by Moore et al. (with some modifications), to evaluate the baseline <monospace>distance method</monospace> on <monospace>BENGI</monospace>, <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace> (<xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T2">2</xref>; <xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref>).</p>
<p>In total we evaluated four methods, the <monospace>ABC model</monospace>, the <monospace>Average-Rank</monospace>, the <monospace>distance</monospace> and the <monospace>TargetFinder</monospace> methods on three references sets, <monospace>BENGI</monospace>, <monospace>CRiFF</monospace> and <monospace>CRISPRi</monospace> (<xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref>). It has to be noted that contrary to the other methods, <monospace>TargetFinder</monospace>&#x2019;s predictions were downloaded directly from its authors&#x2019;s website<xref ref-type="fn" rid="fn10">
<sup>10</sup>
</xref>, therefore only allowing us to compute a single pair of (precision, recall) values, and not AUPR curves. In fact, we used two different ways to compute <monospace>TargetFinder</monospace>&#x2019;s recall, an optimistic and a pessimistic one, which led to two different dots for this tool in the evaluation plots (see sections below about <monospace>TargetFinder</monospace>). In addition, since the code to generate the Precision-Recall curves and the AUPRs was not provided in the papers, we generated our own R code to make these plots using existing R packages. The code used to perform all these analyses was stored in Jupyter notebooks that we provide below, together with additional details about these analyses.</p>
<sec id="s4-2-1">
<title>4.2.1 Method evaluation on <monospace>BENGI</monospace>
</title>
<p>The Moore et al.&#x2019;s code, reference data and annotation were first downloaded from the <monospace>BENGI</monospace> github repository<xref ref-type="fn" rid="fn11">
<sup>11</sup>
</xref>. More precisely the <monospace>Scripts</monospace> directory included, on the one hand the scripts to make the <monospace>BENGI</monospace> sets, and on the other hand the scripts to run the evaluation of the methods on a given <monospace>BENGI</monospace> set (note that other cell types than GM12878 were provided). It is important to bear in mind that the script corresponding to a method was not a generic script allowing to retrieve all the E/G relationships called by this method in a particular cell type, but rather only produces evaluation data of this method on a given <monospace>BENGI</monospace> set, i.e., attaches to each true and false E/G of a <monospace>BENGI</monospace> set, the score of the method&#x2019;s associated prediction (to be used to draw the Precision-Recall curves and compute the AUPRs). Since we could not run any of the scripts from Moore et al. without modifying them, sometimes quite deeply, we suspect these scripts were provided to give a general idea of the underlying analyses rather than to be used as such. No mention of program versions were provided neither, which again hampers reproducibility.</p>
<sec id="s4-2-1-1">
<title>4.2.1.1 Evaluating the <monospace>distance method</monospace> on <monospace>BENGI</monospace>
</title>
<p>To evaluate the <monospace>distance method</monospace> on the <monospace>BENGI</monospace> sets, we used a slightly modified version of the <monospace>Run-Distance-Method.sh</monospace> script provided by Moore et al. This script takes as input a string defining the <monospace>BENGI</monospace> set (<monospace>celltype.settype</monospace>, for instance <monospace>GM12878.CHiC</monospace>), the version of the <monospace>BENGI</monospace> set (here <monospace>v3</monospace>), the mode (here <monospace>normal</monospace>), the expression threshold (here 0.2 but this parameter is not used in normal mode) and the output path. In normal mode, this script calls the <monospace>rank.distance.py</monospace> script on the set of human TSSs, the set of all <monospace>cCREs</monospace> (candidate cis-regulatory elements) and the <monospace>BENGI</monospace> set. It then outputs a 2 column file including for each E/G of the <monospace>BENGI</monospace> set on a row, 1 or 0 according to whether this E/G is true or false according to the <monospace>BENGI</monospace> set and the score provided by the <monospace>distance method</monospace> which is defined as the inverse of the smallest distance between a TSS of G and the enhancer E. Our modification consisted in adding two additional columns to this tabulated file, one for the enhancer id and one for the gene id, this for an easier downstream fusion with the evaluation result of the <monospace>Sheffield method</monospace>. For this purpose we also had to modify the <monospace>Run-Distance-Method.sh</monospace> script so that it sorts the 4 column tabulated file provided by the python script according to the enhancer id and the gene id. After running the evaluation script we plotted the Precision-Recall curves using existing R packages. The following Jupyter notebook provides all the necessary information for evaluating the <monospace>distance method</monospace> on the <monospace>BENGI</monospace> sets<xref ref-type="fn" rid="fn12">
<sup>12</sup>
</xref>.</p>
</sec>
<sec id="s4-2-1-2">
<title>4.2.1.2 Evaluating the <monospace>Average-Rank method</monospace> on <monospace>BENGI</monospace>
</title>
<p>To evaluate the <monospace>Average-Rank method</monospace> on the <monospace>BENGI</monospace> sets, we first had to run the <monospace>Sheffield method</monospace> (correlation between open chromatin at E and expression level at G) on each <monospace>BENGI</monospace> set.</p>
<p>For this we first downloaded the DNAse Hypersensitivity (DHS) peaks with their chromatin accessibility in 112 cell types (<monospace>dhs112_v3.bed</monospace> file) and the genes with their expression levels in the same 112 cell types (<monospace>exp112.bed</monospace> file) from the web<xref ref-type="fn" rid="fn13">
<sup>13</sup>
</xref> and as indicated in page 14 of <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>. We then ran the <monospace>Run-Sheffield.sh</monospace> script that evaluates the <monospace>Sheffield method</monospace> on a given <monospace>BENGI</monospace> set. This script takes as input a string defining the <monospace>BENGI</monospace> set, the version of the <monospace>BENGI</monospace> set and the output path. It then makes the set of enhancers of the <monospace>BENGI</monospace> set in <monospace>bed</monospace> format, the enhancer matrix with these enhancers in rows and their chromatin accessibility in the 112 cell types in columns, the genes of the <monospace>BENGI</monospace> set in <monospace>bed</monospace> format, the matrix of these genes in rows with their expression levels in the 112 cell types in columns, and then calls the <monospace>sheffield.correlation.py</monospace> script. This script takes as input a matrix of gene expression in the 112 cell types, the gene file in <monospace>bed</monospace> format, the enhancer matrix, a gene summary file, the <monospace>BENGI</monospace> set and the cell type. It then outputs a 6 column file including for each E/G of the <monospace>BENGI</monospace> set on a row, 1 or 0 according to whether this E/G is true or false in the <monospace>BENGI</monospace> set, the Pearson correlation between the chromatin accessibility at E and the expression level at G across the 112 cell types, the <italic>p</italic>-value, the Z-score, the enhancer id and the gene id.</p>
<p>In fact we had to modify the <monospace>Run-Sheffield.sh</monospace> script and the <monospace>sheffield.correlation.py</monospace> script to make them work. The complete process to run the <monospace>Sheffield method</monospace> on the <monospace>BENGI</monospace> sets can be found on this page<xref ref-type="fn" rid="fn14">
<sup>14</sup>
</xref>.</p>
<p>Finally we ran the <monospace>Run-Average-Rank.sh</monospace> script that evaluates the <monospace>Average-Rank method</monospace> on a <monospace>BENGI</monospace> set. This script takes as input the <monospace>BENGI</monospace> set and its version, and outputs a 7 column tabulated file including for each E/G of the <monospace>BENGI</monospace> set, 1 or 0 according to whether this E/G is true or false in <monospace>BENGI</monospace>, the average rank score, the distance score, the correlation score, the distance rank, the correlation rank and the average rank between the distance and the correlation. Here we also had to modify the bash script to make it run but more importantly to correct a bug. The exact process and modifications are provided in<xref ref-type="fn" rid="fn15">
<sup>15</sup>
</xref>.</p>
<p>Once again we plotted the Precision-Recall curves using an R code of our own. The complete process to evaluate the <monospace>Average-Rank method</monospace> on the <monospace>BENGI</monospace> sets can be found here<xref ref-type="fn" rid="fn16">
<sup>16</sup>
</xref>.</p>
</sec>
<sec id="s4-2-1-3">
<title>4.2.1.3 Evaluating the <monospace>ABC model</monospace> on <monospace>BENGI</monospace>
</title>
<p>In order to evaluate the <monospace>ABC model</monospace> on the <monospace>BENGI</monospace> sets, we downloaded the <monospace>ABC model</monospace> code from its github repository<xref ref-type="fn" rid="fn17">
<sup>17</sup>
</xref>. Although the complete process is not a pipeline but is rather made of several steps to run one after the other, the documentation was so pedagogic and complete that we had no particular issue running the <monospace>ABC model</monospace> on GM12878 data. We also found the tools and associated version to use. Non-etheless and for the sake of reproducibility the complete process is detailed in this notebook<xref ref-type="fn" rid="fn18">
<sup>18</sup>
</xref>.</p>
</sec>
<sec id="s4-2-1-4">
<title>4.2.1.4 Evaluating <monospace>TargetFinder</monospace> on <monospace>BENGI</monospace>
</title>
<p>In order to evaluate <monospace>TargetFinder</monospace> on the <monospace>BENGI</monospace> sets, we first downloaded <monospace>TargetFinder</monospace>&#x2019;s GM12878 predictions from the dedicated github repository<xref ref-type="fn" rid="fn19">
<sup>19</sup>
</xref>. We used the GBM classifier including Enhancer-Promoter windows (EPW). The prediction file was made of all true and false GM12878 HiC loops (44,313 in total, of which 2,113 are true and 42,200 are false) associated to whether <monospace>TargetFinder</monospace> predicted an E/G or not.</p>
<p>In order to compute <monospace>TargetFinder</monospace>&#x2019;s precision and recall on each of the 6 <monospace>BENGI</monospace> sets, we first computed <monospace>TargetFinder</monospace>&#x2019;s true positives (TPs) on each set, i.e., <monospace>TargetFinder</monospace>&#x2019;s predictions that corresponded to a ground positive E/G of the <monospace>BENGI</monospace> set. To do so, we first had to convert the enhancer and promoter coordinates of the <monospace>TargetFinder</monospace>&#x2019;s prediction file into <monospace>cCRE-ELS</monospace> (candidate cis-regulatory elements with enhancer like signature) ids and gene ids respectively. For that we used <monospace>bedtools intersect</monospace> on the GM12878 <monospace>cCRE-ELS</monospace> file and the Gencode v19 TSS file from Moore et al. respectively. In total we found 342, 222, 143, 564, 9 and 2&#xa0;TPs for CHIC, RNAPII ChIA-PET, CTCF ChIA-PET, HiC, Geuvadis and GTEx (<xref ref-type="table" rid="T2">Table 2</xref>).</p>
<p>Precision was then computed by dividing these numbers by the sum of these numbers and <monospace>TargetFinder</monospace>&#x2019;s false positive predictions according to <monospace>BENGI</monospace>.</p>
<p>Recall was computed in two different ways: by dividing the TPs 1) by the total number of <monospace>BENGI</monospace> ground positive E/G (Recall_pes, like pessimistic Recall, giving rise to the TargetFinder_pes dot on the plot) and 2) by the subset of <monospace>BENGI</monospace> ground positive E/G that were also in the initial set of 44,313&#xa0;E/G relationships given as input to <monospace>TargetFinder</monospace> (Recall_opt, like optimistic recall, giving rise to the TargetFinder_opt dot on the plot).</p>
<p>The performances of <monospace>TargetFinder</monospace> on <monospace>BENGI</monospace> are indicated on <xref ref-type="table" rid="T2">Table 2</xref>.</p>
</sec>
</sec>
<sec id="s4-2-2">
<title>4.2.2 Method evaluation on CRISPRi-FlowFISH (<monospace>CRiFF</monospace>)</title>
<p>To obtain the <monospace>CRiFF</monospace> reference set we first downloaded Table S6a from <xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref> as a <monospace>tsv</monospace> file, and then obtained the 109 ground positive and the 3754 ground negative E/G relationships by performing the filters detailed in<xref ref-type="fn" rid="fn20">
<sup>20</sup>
</xref> (the ground negatives are defined as either not significant or not associated to a decrease in gene expression).</p>
<p>In order to be able to use almost the same scripts as above for the <monospace>distance</monospace> and the <monospace>Average-Rank</monospace> methods, we first intersected the enhancers of the <monospace>CRiFF</monospace> set with the ENCODE <monospace>cCRE-ELS</monospace> (candidate cis-regulatory element with enhancer like signature) provided and used by Moore et al. This process is described in the three notebooks below. We have to say that we only slightly modified the <monospace>distance</monospace> and <monospace>Average-Rank</monospace> methods scripts used above for <monospace>BENGI</monospace> and GM12878 in order to run then on <monospace>CRiFF</monospace> and K562 (see notebooks below).</p>
<sec id="s4-2-2-1">
<title>4.2.2.1 Evaluating the <monospace>distance method</monospace> on <monospace>CRiFF</monospace>
</title>
<p>The complete process for this evaluation is provided in the following notebook<xref ref-type="fn" rid="fn21">
<sup>21</sup>
</xref>.</p>
</sec>
<sec id="s4-2-2-2">
<title>4.2.2.2 Evaluating the <monospace>Average-Rank method</monospace> on <monospace>CRiFF</monospace>
</title>
<p>The complete process for this evaluation is provided in the following notebook<xref ref-type="fn" rid="fn22">
<sup>22</sup>
</xref>.</p>
</sec>
<sec id="s4-2-2-3">
<title>4.2.2.3 Evaluating the <monospace>ABC model</monospace> on <monospace>CRiFF</monospace>
</title>
<p>The complete process for this evaluation is provided in the following notebook<xref ref-type="fn" rid="fn23">
<sup>23</sup>
</xref>.</p>
</sec>
<sec id="s4-2-2-4">
<title>4.2.2.4 Evaluating <monospace>TargetFinder</monospace> on <monospace>CRiFF</monospace>
</title>
<p>In order to evaluate <monospace>TargetFinder</monospace> on the <monospace>CRiFF</monospace> set, we first downloaded <monospace>TargetFinder</monospace>&#x2019;s K562 predictions from the dedicated github repository<xref ref-type="fn" rid="fn24">
<sup>24</sup>
</xref>. We used the GBM classifier including Enhancer-Promoter windows (EPW). The prediction file was made of all true and false K562 HiC loops (41,477 in total, of which 1977 are true and 39,500 are false) associated to whether <monospace>TargetFinder</monospace> predicted an E/G or not.</p>
<p>In order to compute <monospace>TargetFinder</monospace>&#x2019;s precision and recall on the <monospace>CRiFF</monospace> set, we first computed <monospace>TargetFinder</monospace>&#x2019;s true positives (TPs), i.e., <monospace>TargetFinder</monospace>&#x2019;s predictions that corresponded to a ground positive E/G of the <monospace>CRiFF</monospace> set. To do so, we first had to convert the enhancer and promoter coordinates of the <monospace>TargetFinder</monospace>&#x2019;s prediction file into <monospace>cCRE-ELS</monospace> (candidate cis-regulatory elements with enhancer-like signature) ids and gene ids respectively. For that we used <monospace>bedtools intersect</monospace> on the K562 <monospace>cCRE-ELS</monospace> file and the Gencode v19 TSS file from Moore et al. respectively. In total we only found 4&#xa0;TPs (<xref ref-type="table" rid="T2">Table 2</xref>).</p>
<p>Precision was then computed by dividing these numbers by the sum of these numbers and <monospace>TargetFinder</monospace>&#x2019;s false positive predictions according to <monospace>CRiFF</monospace>.</p>
<p>Recall was computed in two different ways: by dividing the TPs 1) by the total number of <monospace>CRiFF</monospace> ground positive E/G (Recall_pes, in reference to pessimistic recall, giving rise to the TargetFinder_pes dot on the plot) and 2) by the subset of <monospace>CRiFF</monospace> ground positive E/G that were also in the initial set of 41,477&#xa0;E/G relationships given as input to <monospace>TargetFinder</monospace> (Recall_opt, in reference to optimistic recall, giving rise to the TargetFinder_opt dot on the plot).</p>
<p>The performances of <monospace>TargetFinder</monospace> on <monospace>CRiFF</monospace> are indicated on <xref ref-type="table" rid="T2">Table 2</xref>.</p>
</sec>
</sec>
<sec id="s4-2-3">
<title>4.2.3 Method evaluation on <monospace>CRISPRi</monospace>
</title>
<p>The K562 <monospace>CRISPRi</monospace> set was obtained from the <monospace>BENGI</monospace>&#x2019;s github repository<xref ref-type="fn" rid="fn25">
<sup>25</sup>
</xref>. It included 651 ground positive and 24,576 ground negative E/G (<xref ref-type="table" rid="T1">Table 1</xref>).</p>
<sec id="s4-2-3-1">
<title>4.2.3.1 Evaluating the <monospace>distance method</monospace> on <monospace>CRISPRi</monospace>
</title>
<p>The evaluation of the <monospace>distance method</monospace> on the K562 <monospace>CRISPRi</monospace> set was done exactly the same way as on the GM12878 <monospace>BENGI</monospace> sets, but replacing the GM12878 <monospace>cCREs</monospace> by the K562 <monospace>cCREs</monospace> (see above).</p>
</sec>
<sec id="s4-2-3-2">
<title>4.2.3.2 Evaluating the <monospace>Average-Rank method</monospace> on <monospace>CRISPRi</monospace>
</title>
<p>The evaluation of the <monospace>Average-Rank method</monospace> on the K562 <monospace>CRISPRi</monospace> set was done exactly the same was as on the GM12878 <monospace>BENGI</monospace> sets, but replacing the GM12878 <monospace>cCREs</monospace> by the K562 <monospace>cCREs</monospace> (see above).</p>
</sec>
<sec id="s4-2-3-3">
<title>4.2.3.3 Evaluating the <monospace>ABC model</monospace> on <monospace>CRISPRi</monospace>
</title>
<p>To evaluate the <monospace>ABC model</monospace> on the K562 <monospace>CRISPRi</monospace> set, we had to rerun the <monospace>ABC model</monospace> on K562 but using a different white list as the one used for the evaluation on <monospace>CRiFF</monospace>. Indeed, the <monospace>ABC model</monospace>&#x2019;s step 1.3 called <monospace>make candidate region</monospace> can take as input a <monospace>white list</monospace> of promoters and enhancers on which to enforce predictions, and it was important to use it to ensure that all <monospace>CRISPRi</monospace> ground positives and negatives could be predicted by the <monospace>ABC model</monospace>. Here the white list we used was made of the union of all K562 <monospace>cCRE-ELS</monospace> from Moore et al., and of the Gencode v19 TSS from Moore et al. that we extended by 250<italic>bp</italic> on each side.</p>
</sec>
<sec id="s4-2-3-4">
<title>4.2.3.4 Evaluating <monospace>TargetFinder</monospace> on <monospace>CRISPRi</monospace>
</title>
<p>The evaluation of <monospace>TargetFinder</monospace> on the K562 <monospace>CRISPRi</monospace> set was done exactly the same way as on the GM12878 <monospace>BENGI</monospace> set (see above). The number of TPs was only 3. The performances of <monospace>TargetFinder</monospace> on <monospace>CRISPRi</monospace> are indicated on <xref ref-type="table" rid="T2">Table 2</xref>.</p>
</sec>
</sec>
</sec>
</sec>
</body>
<back>
<sec id="s5">
<title>Author contributions</title>
<p>TH performed the entire method evaluation, supervised by SD. CM worked on the chromatin accessibility correlation method under the supervision of SF, TF, and SD. HA, WLG, SF, and TF provided critical views on the evaluation results. SD designed the study.</p>
</sec>
<sec id="s6">
<title>Funding</title>
<p>TH was funded by INSERM and SD was partly supported by the Agreenskills &#x2b; fellowship program with funding from the EU&#x2019;s Seventh Framework Program under grant agreement FP7-609398 program, and partly by INSERM (salary and young researcher grant).</p>
</sec>
<ack>
<p>We would like to thank the authors of <xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>; <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref> for the detailed <italic>Material and methods</italic>&#x2019; section of their papers, that allowed us to reproduce the curves and AUPRs of <xref ref-type="fig" rid="F3">Figure 3A</xref> of <xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref>, and the part of Figure S2 corresponding to the GM12878 cell line (all pairs, natural ratio) for <xref ref-type="bibr" rid="B25">Moore et al. (2020a)</xref>. We would also like to thank the authors of <xref ref-type="bibr" rid="B12">Fulco et al. (2019)</xref> for their reactivity answering the questions we had about their study, Nathalie Vialaneix from INRAE for her advice on statistical analyses and C&#xe9;dric Cabau from INRAE for his technical help. Finally we would like to thank the Toulouse genotoul bioinformatics plateform, and in particular Marie-St&#xe9;phane Trotard and Didier Laborie, for their very efficient computing infrastructure and their help in installing and running software.</p>
</ack>
<sec sec-type="COI-statement" id="s7">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>
<ext-link ext-link-type="uri" xlink:href="http://enhancer.lbl.gov/">http://enhancer.lbl.gov/</ext-link>
</p>
</fn>
<fn id="fn2">
<label>2</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction">https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction</ext-link>
</p>
</fn>
<fn id="fn3">
<label>3</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/shwhalen/targetfinder/">https://github.com/shwhalen/targetfinder/</ext-link>
</p>
</fn>
<fn id="fn4">
<label>4</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction">https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction</ext-link>
</p>
</fn>
<fn id="fn5">
<label>5</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/weng-lab/BENGI">https://github.com/weng-lab/BENGI</ext-link>
</p>
</fn>
<fn id="fn6">
<label>6</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction">https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction</ext-link>
</p>
</fn>
<fn id="fn7">
<label>7</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/guidebooks/compute_correlations.html">https://hoellin.github.io/eg/guidebooks/compute_correlations.html</ext-link>
</p>
</fn>
<fn id="fn8">
<label>8</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/sdjebali/EnhancerGene">https://github.com/sdjebali/EnhancerGene</ext-link>
</p>
</fn>
<fn id="fn9">
<label>9</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.gencodegenes.org/">https://www.gencodegenes.org/</ext-link>
</p>
</fn>
<fn id="fn10">
<label>10</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/shwhalen/targetfinder">https://github.com/shwhalen/targetfinder</ext-link>
</p>
</fn>
<fn id="fn11">
<label>11</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/weng-lab/BENGI">https://github.com/weng-lab/BENGI</ext-link>
</p>
</fn>
<fn id="fn12">
<label>12</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_BENGI/distance_method/distance_evaluation_with_code.html">https://hoellin.github.io/eg/notes_BENGI/distance_method/distance_evaluation_with_code.html</ext-link>
</p>
</fn>
<fn id="fn13">
<label>13</label>
<p>
<ext-link ext-link-type="uri" xlink:href="http://big.databio.org/papers/RED/supplement/">http://big.databio.org/papers/RED/supplement/</ext-link>
</p>
</fn>
<fn id="fn14">
<label>14</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_BENGI/dnase_expression_correlation/correlation_method_with_code.html">https://hoellin.github.io/eg/notes_BENGI/dnase_expression_correlation/correlation_method_with_code.html</ext-link>
</p>
</fn>
<fn id="fn15">
<label>15</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_BENGI/avg_rank_method/avg_rank_method_with_code.html\#partial-reimplementation-of-run-average-ranksh">https://hoellin.github.io/eg/notes_BENGI/avg_rank_method/avg_rank_method_with_code.html\&#x23;partial-reimplementation-of-run-average-ranksh</ext-link>
</p>
</fn>
<fn id="fn16">
<label>16</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_BENGI/avg_rank_method/avg_rank_method_with_code.html">https://hoellin.github.io/eg/notes_BENGI/avg_rank_method/avg_rank_method_with_code.html</ext-link>
</p>
</fn>
<fn id="fn17">
<label>17</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction">https://github.com/broadinstitute/ABC-Enhancer-Gene-Prediction</ext-link>
</p>
</fn>
<fn id="fn18">
<label>18</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_ABC/BENGI/notebook_ABC_over_BENGI_GM12878_from_ccRE_ELSs.html">https://hoellin.github.io/eg/notes_ABC/BENGI/notebook_ABC_over_BENGI_GM12878_from_ccRE_ELSs.html</ext-link>
</p>
</fn>
<fn id="fn19">
<label>19</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/shwhalen/targetfinder/blob/master/paper/targetfinder/GM12878/output-epw/predictions-gbm.csv">https://github.com/shwhalen/targetfinder/blob/master/paper/targetfinder/GM12878/output-epw/predictions-gbm.csv</ext-link>
</p>
</fn>
<fn id="fn20">
<label>20</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_ABC/K562/ABC_K562_CRISPRi_FlowFISH.html">https://hoellin.github.io/eg/notes_ABC/K562/ABC_K562_CRISPRi_FlowFISH.html</ext-link>
</p>
</fn>
<fn id="fn21">
<label>21</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_BENGI/CRISPRi_FlowFISH/distance_method/distance_over_fulco_et_al_crispri.html">https://hoellin.github.io/eg/notes_BENGI/CRISPRi_FlowFISH/distance_method/distance_over_fulco_et_al_crispri.html</ext-link>
</p>
</fn>
<fn id="fn22">
<label>22</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_BENGI/CRISPRi_FlowFISH/avg_rank_method/avg_rank_method_with_code.html">https://hoellin.github.io/eg/notes_BENGI/CRISPRi_FlowFISH/avg_rank_method/avg_rank_method_with_code.html</ext-link>
</p>
</fn>
<fn id="fn23">
<label>23</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://hoellin.github.io/eg/notes_ABC/K562/april_K562_56_genes/april_K562_56_genes.html">https://hoellin.github.io/eg/notes_ABC/K562/april_K562_56_genes/april_K562_56_genes.html</ext-link>
</p>
</fn>
<fn id="fn24">
<label>24</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/shwhalen/targetfinder/blob/master/paper/targetfinder/K562/output-epw/predictions-gbm.csv">https://github.com/shwhalen/targetfinder/blob/master/paper/targetfinder/K562/output-epw/predictions-gbm.csv</ext-link>
</p>
</fn>
<fn id="fn25">
<label>25</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/weng-lab/BENGI/tree/master/Benchmark/All-Pairs.Natural-Ratio">https://github.com/weng-lab/BENGI/tree/master/Benchmark/All-Pairs.Natural-Ratio</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Andersson</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gebhard</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Miguel-Escalada</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Hoof</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bornholdt</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Boyd</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>An atlas of active enhancers across human cell types and tissues</article-title>. <source>Nature</source> <volume>507</volume>, <fpage>455</fpage>&#x2013;<lpage>461</lpage>. <pub-id pub-id-type="doi">10.1038/nature12787</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aran</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sabato</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hellman</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Dna methylation of distal regulatory sites characterizes dysregulation of cancer genes</article-title>. <source>Genome Biol.</source> <volume>14</volume>, <fpage>R21</fpage>&#x2013;<lpage>R14</lpage>. <pub-id pub-id-type="doi">10.1186/gb-2013-14-3-r21</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bahcall</surname>
<given-names>O. G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Gtex pilot quantifies eqtl variation across tissues and individuals</article-title>. <source>Nat. Rev. Genet.</source> <volume>16</volume>, <fpage>375</fpage>. <pub-id pub-id-type="doi">10.1038/nrg3969</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Belokopytova</surname>
<given-names>P. S.</given-names>
</name>
<name>
<surname>Nuriddinov</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Mozheiko</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Fishman</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fishman</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Quantitative prediction of enhancer&#x2013;promoter interactions</article-title>. <source>Genome Res.</source> <volume>30</volume>, <fpage>72</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1101/gr.249367.119</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Anyansi</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Reconstruction of enhancer&#x2013;target networks in 935 samples of human primary cells, tissues and cell lines</article-title>. <source>Nat. Genet.</source> <volume>49</volume>, <fpage>1428</fpage>&#x2013;<lpage>1436</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3950</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheung</surname>
<given-names>V. G.</given-names>
</name>
<name>
<surname>Spielman</surname>
<given-names>R. S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Genetics of human gene expression: Mapping dna variants that influence gene expression</article-title>. <source>Nat. Rev. Genet.</source> <volume>10</volume>, <fpage>595</fpage>&#x2013;<lpage>604</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2630</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Consortium</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ardlie</surname>
<given-names>K. G.</given-names>
</name>
<name>
<surname>Deluca</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Segr&#xe8;</surname>
<given-names>A. V.</given-names>
</name>
<name>
<surname>Sullivan</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Young</surname>
<given-names>T. R.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Human genomics. The genotype-tissue expression (GTEx) pilot analysis: Multitissue gene regulation in humans</article-title>. <source>Science</source> <volume>348</volume>, <fpage>648</fpage>&#x2013;<lpage>660</lpage>. <pub-id pub-id-type="doi">10.1126/science.1262110</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Corradin</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Saiakhova</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Akhtar-Zaidi</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Myeroff</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Willis</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cowper-Sal</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Combinatorial effects of multiple enhancer variants in linkage disequilibrium dictate levels of gene expression to confer susceptibility to common traits</article-title>. <source>Genome Res.</source> <volume>24</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1101/gr.164079.113</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Corradin</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Scacheri</surname>
<given-names>P. C.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Enhancer variants: Evaluating functions in common disease</article-title>. <source>Genome Med.</source> <volume>6</volume>, <fpage>85</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1186/s13073-014-0085-3</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ernst</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kheradpour</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mikkelsen</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Shoresh</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ward</surname>
<given-names>L. D.</given-names>
</name>
<name>
<surname>Epstein</surname>
<given-names>C. B.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Mapping and analysis of chromatin state dynamics in nine human cell types</article-title>. <source>Nature</source> <volume>473</volume>, <fpage>43</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1038/nature09906</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Stackepi: Identification of cell line-specific enhancer&#x2013;promoter interactions based on stacking ensemble learning</article-title>. <source>BMC Bioinforma.</source> <volume>23</volume>, <fpage>272</fpage>&#x2013;<lpage>289</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-022-04821-9</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fulco</surname>
<given-names>C. P.</given-names>
</name>
<name>
<surname>Nasser</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>T. R.</given-names>
</name>
<name>
<surname>Munson</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Bergman</surname>
<given-names>D. T.</given-names>
</name>
<name>
<surname>Subramanian</surname>
<given-names>V.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Activity-by-contact model of enhancer&#x2013;promoter regulation from thousands of crispr perturbations</article-title>. <source>Nat. Genet.</source> <volume>51</volume>, <fpage>1664</fpage>&#x2013;<lpage>1669</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-019-0538-0</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Gasperini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hill</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Figueroa</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Trapnell</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ahituv</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). &#x201c;<article-title>Crispr-qtl mapping as a genome-wide association framework for cellular genetic screens of the noncoding genome</article-title>,&#x201d; in <source>EUROPEAN journal of human genetics</source> (<publisher-name>nature publishing group macmillan building, 4 crinan st</publisher-name>), <publisher-loc>london n1 9xw, england</publisher-loc>, <volume>27</volume>, <fpage>749</fpage>&#x2013;<lpage>750</lpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hait</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Amar</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Shamir</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Elkon</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Focs: A novel method for analyzing enhancer and gene activity patterns infers an extensive enhancer&#x2013;promoter map</article-title>. <source>Genome Biol.</source> <volume>19</volume>, <fpage>56</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-018-1432-2</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hariprakash</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Ferrari</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Computational biology solutions to identify enhancers-target gene pairs</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>17</volume>, <fpage>821</fpage>&#x2013;<lpage>831</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2019.06.012</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Teng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Global view of enhancer&#x2013;promoter interactome in human cells</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>111</volume>, <fpage>E2191</fpage>&#x2013;<lpage>E2199</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1320308111</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identifying enhancer&#x2013;promoter interactions with neural network based on pre-trained dna vectors and attention mechanism</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>1037</fpage>&#x2013;<lpage>1043</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz694</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jung</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Schmitt</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Diao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>A compendium of promoter-centered long-range chromatin interactions in the human genome</article-title>. <source>Nat. Genet.</source> <volume>51</volume>, <fpage>1442</fpage>&#x2013;<lpage>1449</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-019-0494-8</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kerimov</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hayhurst</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Peikova</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Manning</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Walter</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kolberg</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A compendium of uniformly processed human gene expression and splicing quantitative trait loci</article-title>. <source>Nat. Genet.</source> <volume>53</volume>, <fpage>1290</fpage>&#x2013;<lpage>1299</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-021-00924-w</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krivega</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Dean</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Enhancer and promoter interactions&#x2014;Long distance calls</article-title>. <source>Curr. Opin. Genet. Dev.</source> <volume>22</volume>, <fpage>79</fpage>&#x2013;<lpage>85</lpage>. <pub-id pub-id-type="doi">10.1016/j.gde.2011.11.001</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kundaje</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Meuleman</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Ernst</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bilenky</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Heravi-Moussavi</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Integrative analysis of 111 reference human epigenomes</article-title>. <source>Nature</source> <volume>518</volume>, <fpage>317</fpage>&#x2013;<lpage>330</lpage>. <pub-id pub-id-type="doi">10.1038/nature14248</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lappalainen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sammeth</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Friedl&#xe4;nder</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>t Hoen</surname>
<given-names>P. A.</given-names>
</name>
<name>
<surname>Monlong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rivas</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Transcriptome and genome sequencing uncovers functional variation in humans</article-title>. <source>Nature</source> <volume>501</volume>, <fpage>506</fpage>&#x2013;<lpage>511</lpage>. <pub-id pub-id-type="doi">10.1038/nature12531</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>W. H.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Deeptact: Predicting 3d chromatin contacts via bootstrapping deep learning</article-title>. <source>Nucleic acids Res.</source> <volume>47</volume>, <fpage>e60</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz167</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mifsud</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Tavares-Cadete</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Young</surname>
<given-names>A. N.</given-names>
</name>
<name>
<surname>Sugar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Schoenfelder</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ferreira</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Mapping long-range promoter contacts in human cells with high-resolution capture hi-c</article-title>. <source>Nat. Genet.</source> <volume>47</volume>, <fpage>598</fpage>&#x2013;<lpage>606</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3286</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moore</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Pratt</surname>
<given-names>H. E.</given-names>
</name>
<name>
<surname>Purcaro</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Weng</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020a</year>). <article-title>A curated benchmark of enhancer-gene interactions for evaluating enhancer-target gene prediction methods</article-title>. <source>Genome Biol.</source> <volume>21</volume>, <fpage>17</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-019-1924-8</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moore</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Purcaro</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Pratt</surname>
<given-names>H. E.</given-names>
</name>
<name>
<surname>Epstein</surname>
<given-names>C. B.</given-names>
</name>
<name>
<surname>Shoresh</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Adrian</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2020b</year>). <article-title>Expanded encyclopaedias of dna elements in the human and mouse genomes</article-title>. <source>Nature</source> <volume>583</volume>, <fpage>699</fpage>&#x2013;<lpage>710</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2493-4</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mumbach</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Satpathy</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Boyle</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gowen</surname>
<given-names>B. G.</given-names>
</name>
<name>
<surname>Cho</surname>
<given-names>S. W.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Enhancer connectome in primary human cells identifies target genes of disease-associated dna elements</article-title>. <source>Nat. Genet.</source> <volume>49</volume>, <fpage>1602</fpage>&#x2013;<lpage>1612</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3963</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nasser</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bergman</surname>
<given-names>D. T.</given-names>
</name>
<name>
<surname>Fulco</surname>
<given-names>C. P.</given-names>
</name>
<name>
<surname>Guckelberger</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Doughty</surname>
<given-names>B. R.</given-names>
</name>
<name>
<surname>Patwardhan</surname>
<given-names>T. A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Genome-wide enhancer maps link risk variants to disease genes</article-title>. <source>Nature</source> <volume>593</volume>, <fpage>238</fpage>&#x2013;<lpage>243</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-021-03446-x</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pennacchio</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Bickmore</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Dean</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nobrega</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Bejerano</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Enhancers: Five essential questions</article-title>. <source>Nat. Rev. Genet.</source> <volume>14</volume>, <fpage>288</fpage>&#x2013;<lpage>295</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3458</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rao</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Huntley</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Durand</surname>
<given-names>N. C.</given-names>
</name>
<name>
<surname>Stamenova</surname>
<given-names>E. K.</given-names>
</name>
<name>
<surname>Bochkov</surname>
<given-names>I. D.</given-names>
</name>
<name>
<surname>Robinson</surname>
<given-names>J. T.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>A 3d map of the human genome at kilobase resolution reveals principles of chromatin looping</article-title>. <source>Cell</source> <volume>159</volume>, <fpage>1665</fpage>&#x2013;<lpage>1680</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2014.11.021</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ray-Jones</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Spivakov</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Transcriptional enhancers and their communication with gene promoters</article-title>. <source>Cell. Mol. Life Sci.</source> <volume>78</volume>, <fpage>6453</fpage>&#x2013;<lpage>6485</lpage>. <pub-id pub-id-type="doi">10.1007/s00018-021-03903-w</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>R&#xf6;delsperger</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kolanczyk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pletschacher</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>K&#xf6;hler</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bauer</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Integrative analysis of genomic, functional and protein interaction data predicts long-range enhancer-target gene interactions</article-title>. <source>Nucleic acids Res.</source> <volume>39</volume>, <fpage>2492</fpage>&#x2013;<lpage>2502</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkq1081</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roy</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Siahpirani</surname>
<given-names>A. F.</given-names>
</name>
<name>
<surname>Chasman</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Knaack</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ay</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Stewart</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>A predictive modeling approach for cell line-specific long-range regulatory interactions</article-title>. <source>Nucleic acids Res.</source> <volume>43</volume>, <fpage>1977</fpage>&#x2013;<lpage>1978</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1181</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schoenfelder</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fraser</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Long-range enhancer&#x2013;promoter contacts in gene expression control</article-title>. <source>Nat. Rev. Genet.</source> <volume>20</volume>, <fpage>437</fpage>&#x2013;<lpage>455</lpage>. <pub-id pub-id-type="doi">10.1038/s41576-019-0128-0</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sheffield</surname>
<given-names>N. C.</given-names>
</name>
<name>
<surname>Thurman</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Safi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Stamatoyannopoulos</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Lenhard</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Patterns of regulatory activity across diverse human cell types predict tissue identity, transcription factor binding, and long-range interactions</article-title>. <source>Genome Res.</source> <volume>23</volume>, <fpage>777</fpage>&#x2013;<lpage>788</lpage>. <pub-id pub-id-type="doi">10.1101/gr.152140.112</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yue</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>McCleary</surname>
<given-names>D. F.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Edsall</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kuan</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>A map of the cis-regulatory sequences in the mouse genome</article-title>. <source>Nature</source> <volume>488</volume>, <fpage>116</fpage>&#x2013;<lpage>120</lpage>. <pub-id pub-id-type="doi">10.1038/nature11243</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>O. J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Szalaj</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Ctcf-mediated human 3d genome architecture reveals chromatin topology for transcription</article-title>. <source>Cell</source> <volume>163</volume>, <fpage>1611</fpage>&#x2013;<lpage>1627</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2015.11.024</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thurman</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Rynes</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Humbert</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Vierstra</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Maurano</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Haugen</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>The accessible chromatin landscape of the human genome</article-title>. <source>Nature</source> <volume>489</volume>, <fpage>75</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1038/nature11232</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Whalen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Truty</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Pollard</surname>
<given-names>K. S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Enhancer&#x2013;promoter interactions are encoded by complex genomic signatures on looping chromatin</article-title>. <source>Nat. Genet.</source> <volume>48</volume>, <fpage>488</fpage>&#x2013;<lpage>496</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3539</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Exploiting sequence-based features for predicting enhancer&#x2013;promoter interactions</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>i252</fpage>&#x2013;<lpage>i260</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx257</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Laird</surname>
<given-names>P. W.</given-names>
</name>
<name>
<surname>Farnham</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Berman</surname>
<given-names>B. P.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Inferring regulatory element landscapes and transcription factor networks from cancer methylomes</article-title>. <source>Genome Biol.</source> <volume>16</volume>, <fpage>105</fpage>&#x2013;<lpage>121</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-015-0668-3</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Diseaseenhancer: A resource of human disease-associated enhancer catalog</article-title>. <source>Nucleic acids Res.</source> <volume>46</volume>, <fpage>D78</fpage>&#x2013;<lpage>D84</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx920</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>