<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cell Dev. Biol.</journal-id>
<journal-title>Frontiers in Cell and Developmental Biology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cell Dev. Biol.</abbrev-journal-title>
<issn pub-type="epub">2296-634X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1124266</article-id>
<article-id pub-id-type="doi">10.3389/fcell.2023.1124266</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Cell and Developmental Biology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>
<italic>In silico</italic> characterisation of minor wave genes and LINE-1s transcriptional dynamics at murine zygotic genome activation</article-title>
<alt-title alt-title-type="left-running-head">Ansaloni et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fcell.2023.1124266">10.3389/fcell.2023.1124266</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Ansaloni</surname>
<given-names>Federico</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1412426/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Gustincich</surname>
<given-names>Stefano</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/798001/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Sanges</surname>
<given-names>Remo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/383456/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Area of Neuroscience</institution>, <institution>Scuola Internazionale Superiore di Studi Avanzati (SISSA)</institution>, <addr-line>Trieste</addr-line>, <country>Italy</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Central RNA Laboratory</institution>, <institution>Istituto Italiano di Tecnologia&#x2014;IIT</institution>, <addr-line>Genova</addr-line>, <country>Italy</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1146992/overview">Luisa Di Stefano</ext-link>, FR3743 Centre de Biologie Int&#xe9;grative (CBI), France</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2158532/overview">Sergio Ruiz Macias</ext-link>, National Institutes of Health (NIH), United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2168088/overview">Federica Marasca</ext-link>, National Institute of Molecular Genetics (INGM), Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Stefano Gustincich, <email>stefano.gustincich@iit.it</email>; Remo Sanges, <email>remo.sanges@gmail.com</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>06</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>11</volume>
<elocation-id>1124266</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>12</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>06</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Ansaloni, Gustincich and Sanges.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Ansaloni, Gustincich and Sanges</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Introduction:</bold> In mouse, the zygotic genome activation (ZGA) is coordinated by MERVL elements, a class of LTR retrotransposons. In addition to MERVL, another class of retrotransposons, LINE-1 elements, recently came under the spotlight as key regulators of murine ZGA. In particular, LINE-1 transcripts seem to be required to switch-off the transcriptional program started by MERVL sequences, suggesting an antagonistic interplay between LINE-1 and MERVL pathways.</p>
<p>
<bold>Methods:</bold> To better investigate the activities of LINE-1 and MERVL elements at ZGA, we integrated publicly available transcriptomics (RNA-seq), chromatin accessibility (ATAC-seq) and Pol-II binding (Stacc-seq) datasets and characterised the transcriptional and epigenetic dynamics of such elements during murine ZGA.</p>
<p>
<bold>Results:</bold> We identified two likely distinct transcriptional activities characterising the murine zygotic genome at ZGA onset. On the one hand, our results confirmed that ZGA minor wave genes are preferentially transcribed from MERVL-rich and gene-dense genomic compartments, such as gene clusters. On the other hand, we identified a set of evolutionary young and likely transcriptionally autonomous LINE-1s located in intergenic and gene-poor regions showing, at the same stage, features such as open chromatin and RNA Pol II binding suggesting them to be, at least, poised for transcription.</p>
<p>
<bold>Discussion:</bold> These results suggest that, across evolution, transcription of two different classes of transposable elements, MERVLs and LINE-1s, have likely been confined in genic and intergenic regions respectively in order to maintain and regulate two successive transcriptional programs at ZGA.</p>
</abstract>
<kwd-group>
<kwd>zygotic genome activation</kwd>
<kwd>transposable elements (TEs)</kwd>
<kwd>LINE-1 (L1)</kwd>
<kwd>MERVL</kwd>
<kwd>transcriptional regulation</kwd>
<kwd>computational genomics</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Epigenomics and Epigenetics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Upon fertilisation, the fusion of highly differentiated sperm and oocyte cells generates a totipotent embryo. The newly generated embryo is transcriptionally inactive and, in absence of transcription, its development relies on maternally supplied transcripts and proteins originally deposited into the oocyte cytoplasm (<xref ref-type="bibr" rid="B16">Eckersley-Maslin et al., 2018</xref>; <xref ref-type="bibr" rid="B45">Schulz and Harrison, 2019</xref>). However, for the embryo to continue its development, zygotic genome activation (ZGA) must occur (<xref ref-type="bibr" rid="B45">Schulz and Harrison, 2019</xref>).</p>
<p>ZGA is characterised by a minor and a major transcriptional waves which, in <italic>Mus musculus</italic> (mouse), occur in the early 2-cell and 2-cell stages respectively (<xref ref-type="bibr" rid="B47">Tadros and Lipshitz, 2009</xref>; <xref ref-type="bibr" rid="B30">Lee et al., 2014</xref>; <xref ref-type="bibr" rid="B26">Jukam et al., 2017</xref>). Murine ZGA appears to be activated by the transcription factor (TF) <italic>Dux</italic> (encoded by the <italic>Duxf3</italic> gene) which coordinates a specific transcriptional programme characterised by the expression of, among others, the <italic>Zscan4</italic>, <italic>Prame</italic>, <italic>Eif1a</italic>-like gene family members and of the endogenous retroviruses (ERVs) MERVLs (<xref ref-type="bibr" rid="B14">De Iaco et al., 2017</xref>; <xref ref-type="bibr" rid="B23">Hendrickson et al., 2017</xref>). MERVL elements, however, are not simply <italic>Dux</italic> targets since they act as potent ZGA activators by themselves. By providing alternative promoter sequences, MERVLs activate the transcription of many ZGA minor wave genes thus enabling a robust and coordinated transcriptional activation of the zygotic genome (<xref ref-type="bibr" rid="B37">Peaston et al., 2004</xref>; <xref ref-type="bibr" rid="B34">Macfarlan et al., 2012</xref>; <xref ref-type="bibr" rid="B49">Torres-Padilla, 2020</xref>).</p>
<p>In addition to MERVLs, long interspersed nuclear element 1 (LINE-1), another class of transposable elements (TEs), play crucial roles during murine ZGA (<xref ref-type="bibr" rid="B25">Jachowicz et al., 2017</xref>; <xref ref-type="bibr" rid="B38">Percharde et al., 2018</xref>). Evolutionary young LINE-1 transcripts (A, G<sub>
<italic>f</italic>
</sub> and T<sub>
<italic>f</italic>
</sub> subfamilies) act as chromatin remodellers in the 2-cell stage (<xref ref-type="bibr" rid="B25">Jachowicz et al., 2017</xref>). In particular, LINE-1 function appears to be necessary and stage-specific as both the elongation of LINE-1 transcription beyond the 2-cell stage and its transcriptional repression immediately after fertilisation lead to failures in embryonic development (<xref ref-type="bibr" rid="B25">Jachowicz et al., 2017</xref>). In addition, recent evidence described how LINE-1 RNAs, together with Nucleolin and Kap1 proteins, are required to repress <italic>Dux</italic> and its 2-cell stage-specific transcriptional program (<xref ref-type="bibr" rid="B38">Percharde et al., 2018</xref>).</p>
<p>In summary, MERVL elements activate a ZGA-specific transcriptional program and LINE-1 RNAs are required to switch it off, allowing the embryo to develop beyond this stage. This model leaves several important questions unanswered. How is the transcription of LINE-1 elements activated at ZGA and from which loci? Is this linked to the <italic>Dux</italic>/MERVL transcriptional program or LINE-1s are activated by an independent program?</p>
<p>In this study, we integrated publicly available RNA-seq, ATAC-seq and Stacc&#x2013;seq datasets to characterise the dynamics regulating the activation of MERVL and LINE-1 elements at murine ZGA at the transcriptional and epigenetic levels. We identified and characterised a set of LINE-1 elements which result likely transcribed at ZGA minor wave onset, according to our analyses of ATAC-seq and Pol-II binding site datasets. We propose that the transcription of these LINE-1s might be regulated by YY1. Altogether, our results show that MERVL elements are transcribed from gene-dense regions while LINE-1 RNAs are expressed from intergenic regions, suggesting the interplay of two, distinct, transcriptional programs co-ordinately regulating murine ZGA.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and methods</title>
<sec id="s2-1">
<title>RNA-seq data collection</title>
<p>The RNA-seq data analysed in this study were retrieved from a previous publication (<xref ref-type="bibr" rid="B55">Wu et al., 2016</xref>). The RNA-seq dataset is composed by 5 different samples: MII-oocyte, zygote, early 2-cell (30&#xa0;h post fertilization [hpf]), 2-cell (39&#x2013;43 hpf), 4-cell (54&#x2013;56 hpf) and 8-cell (68&#x2013;70 hpf) stages. Each stage is represented by two biological replicates. Reads are in paired-end (PE) layout (2 &#xd7; 126bp). Raw RNA-seq reads were retrieved from the ENA-EBI database (accession number: PRJNA277361), the technical replicates corresponding to the same experiment were merged. Next, the quality of the raw reads was assessed using FastQC (<xref ref-type="bibr" rid="B3">Andrews, 2010</xref>). Having detected the presence of both adapters and low-quality reads, the sequencing reads were trimmed using Trimmomatic (v0.38, parameters: ILLUMINACLIP:{adapter.fa}:2:30:10:2:keepBothReads, LEADING:5 TRAILING:5, SLIDINGWINDOW:4:15 MINLEN:30) (<xref ref-type="bibr" rid="B10">Bolger et al., 2014</xref>).</p>
</sec>
<sec id="s2-2">
<title>Gene expression quantification</title>
<p>Trimmed RNA-seq reads were mapped to the murine genome (mm10 version&#x2014;gencode vM22 version) using STAR (v2.6.0c) (<xref ref-type="bibr" rid="B15">Dobin et al., 2013</xref>). Default parameters were used except for the number of multimapping reads that was set to 80 (--outFilterMultimapNmax 80). Reads were counted by using htseq-count (v0.11.2, parameters: --stranded no -m union --nonunique all) (<xref ref-type="bibr" rid="B2">Anders et al., 2015</xref>).</p>
</sec>
<sec id="s2-3">
<title>TE expression quantification&#x2014;locus specific</title>
<p>TE locus specific expression levels were calculated using SQuIRE (<xref ref-type="bibr" rid="B56">Yang et al., 2019</xref>). First, the reference genome and the annotation datasets referring to the murine mm10 genome version were downloaded and prepared for the subsequent analyses using the SQuIRE Fetch and Clean modules, then the trimmed reads were mapped to the reference genome using the Map module and finally read counts were estimated using the Count module (strandedness &#x3d; &#x201c;0&#x201d;). Only TEs annotated as DNA, LINE, SINE, LTR and RC were selected for subsequent analyses. TEs annotated as &#x201c;MERVL-int&#x201d;, &#x201c;MERVL_2A-int&#x201d;, &#x201c;MT2A&#x2033;, &#x201c;MT2B&#x2033;, &#x201c;MT2B1&#x201d;, &#x201c;MT2B2&#x201d;, &#x201c;MT2C_Mm&#x201d; and &#x201c;MT2_Mm&#x201d;, were classified as MERVL.</p>
</sec>
<sec id="s2-4">
<title>TE expression quantification&#x2014;consensus level</title>
<p>Expression levels of the TE consensus were calculated using TEspeX (<xref ref-type="bibr" rid="B5">Ansaloni et al., 2019</xref>; <xref ref-type="bibr" rid="B4">2022</xref>). Here is reported a brief description of the TEspeX workflow. The reference transcriptome was built merging the RepBase TE sequences (<xref ref-type="bibr" rid="B9">Bao et al., 2015</xref>) and the Ensembl transcript sequences containing all the coding and non-coding annotated transcripts (<xref ref-type="bibr" rid="B58">Zerbino et al., 2018</xref>). Reads were then mapped on the reference transcriptome using STAR (v2.6.0c) (<xref ref-type="bibr" rid="B15">Dobin et al., 2013</xref>) assigning primary alignment flag to all the alignments with the best score. All alignments flagged as primary (&#x2212;F 0 &#xd7; 100 parameter) were then selected using samtools (v1.3.1) (<xref ref-type="bibr" rid="B32">Li et al., 2009</xref>). To avoid counting reads mapping to TE fragments embedded in coding and/or long non-coding transcripts, reads mapping with best-scoring alignments to any Ensembl transcript were discarded using Python scripts and Picard FilterSamReads (v2.18.4) (<ext-link ext-link-type="uri" xlink:href="http://broadinstitute.github.io/Picard">http://broadinstitute.github.io/Picard</ext-link>). Selected reads mapping exclusively on TEs and in the proper orientation were finally counted in each sample.</p>
</sec>
<sec id="s2-5">
<title>Differentially expressed gene analyses</title>
<p>Differentially expressed genes (DEG) between the early 2-cell and the zygote stages were identified using edgeR (<xref ref-type="bibr" rid="B42">Robinson et al., 2010</xref>). Normalisation of raw read counts was applied using the TMM method whereas the common, trended and tagwise dispersions were estimated by maximizing the negative binomial likelihood (default). Next, DEG were tested performing a quasi-likelihood F-tests (glmQLFit and glmQLTest). Genes were considered as differentially expressed (DE) if showing FDR &#x3c; 0.05 and log2FC &#x3c; &#x2212;1 or &#x3e; 1. The same workflow was used for all the DE gene analyses and also to identify the DE TE (single loci). DE TE consensus sequences were identified by using the same workflow previously described with the exception that the library size of each sample was calculated providing the total number of reads mapped on the TEspeX transcriptome (coding, non-coding and TE consensus sequences) instead of using the default values.</p>
</sec>
<sec id="s2-6">
<title>GO enrichment analysis</title>
<p>GO enrichment analysis was performed using topGO (<xref ref-type="bibr" rid="B1">Alexa and Rahnenfuhrer, 2019</xref>). GO enrichment analysis was conducted on the GO terms associated to the early 2-cell/zygote upregulated genes, using as background the GO terms associated to the whole set of coding and non-coding annotated genes. First, the statistical significance of the enrichments was tested with the Fisher&#x2019;s Exact Test (algorithm &#x3d; &#x201c;weight&#x201d;). Then, GO terms associated to less than 15 significant genes were discarded prior to FDR calculation (Benjamini and Hochberg). Significant threshold was imposed to FDR &#x3c; 0.05.</p>
</sec>
<sec id="s2-7">
<title>Upregulated gene TSS enrichment analysis</title>
<p>Transcription start sites (TSS) of the upregulated genes were defined as the first nucleotide at the 5&#x2019; of each given gene. To identify TEs enriched nearby the TSS of the early 2-cell vs. zygote upregulated genes, the TSS of each gene was elongated both at the 5&#x2019; and 3&#x2019; by 100 nucleotides. The genomic coordinates of the elongated TSS were then overlapped with the genomic coordinates of all the murine TEs using bedtools intersect (v.2.27.0) (<xref ref-type="bibr" rid="B39">Quinlan and Hall, 2010</xref>) and its python wrapper pybedtools (<xref ref-type="bibr" rid="B13">Dale et al., 2011</xref>). Finally, the number of genes overlapping each TE subfamily was counted. The same analysis was repeated selecting an equal number of randomly selected genes for 1,000 times. Z-scores and <italic>p</italic>-values were then calculated and corrected using the FDR Benjamini and Hochberg correction. FDR significant threshold was set to 0.05.</p>
</sec>
<sec id="s2-8">
<title>Identification of gene clusters in the murine genome and overlap with upregulated genes</title>
<p>Gene clusters in the murine genome were identified using ClusterScan (parameters: -n 5 -d 500,000 &#x2013;singletons) (<xref ref-type="bibr" rid="B51">Volpe et al., 2018</xref>). Next, the number of minor/major wave genes overlapping at least one gene cluster calculated by overlapping the gene and cluster coordinates by bedtools intersect (parameter: -f 0.5). The same analysis was repeated by shuffling 1,000 times the coordinates of the gene clusters using bedtools shuffle. Z-scores, <italic>p</italic>-values and FDR were calculated and filtered as already described.</p>
</sec>
<sec id="s2-9">
<title>RNA-seq from oocyte treated with alpha-amanitin</title>
<p>Raw RNA-seq data was retrieved from two previous studies (<xref ref-type="bibr" rid="B52">Wang et al., 2019</xref>; <xref ref-type="bibr" rid="B6">Asami et al., 2023</xref>). In the first dataset (<xref ref-type="bibr" rid="B52">Wang et al., 2019</xref>), denuded GV oocytes were treated with alpha-amanitin and RNA was isolated and sequenced at the 2-cell stage (4 biological replicates, SE layout, 75 bp). In the second dataset (<xref ref-type="bibr" rid="B6">Asami et al., 2023</xref>), embryos were generated by sperm-oocyte mixing for 1&#xa0;h and then incubated in KSOM containing &#x3b1;-amanitin (100&#xa0;&#x3bc;g/mL) for a further 10&#xa0;h, when bipronuclear embryo samples (1-cell stage) were collected (4 biological replicates, PE layout, 2 &#xd7; 50 bp). RNA-seq reads were treated as previously described and TE expression was calculated at the consensus level using TEspeX (<xref ref-type="bibr" rid="B4">Ansaloni et al., 2022</xref>) as previously described.</p>
</sec>
<sec id="s2-10">
<title>ATAC-seq data collection</title>
<p>ATAC-seq data were retrieved from a previous publication (<xref ref-type="bibr" rid="B55">Wu et al., 2016</xref>) (accession number: PRJNA277362). The dataset is composed by a total of 5 stages (2 biological replicates, PE layout, 2 &#xd7; 101bp). The stages were matched with the ones of the RNA-seq except for MII-oocyte ad zygote stages that were missing in the ATAC-seq data as it is technically challenging to extract enough chromatin to perform an ATAC-seq at these stages. To overcome this issue, the authors of the paper treated early zygotes (PN3&#x2014;pronucleus phase 3) in CZB medium supplemented with alpha-amanitin for about 14&#xa0;h (<xref ref-type="bibr" rid="B55">Wu et al., 2016</xref>). Alpha-amanitin is an inhibitor of the RNA-pol-II, thus alpha-amanitin treated zygotes are transcriptionally inactive stages and likely represent a pre-ZGA sample. Having detected the presence of both adapters and low-quality reads, the ATAC-seq reads were trimmed using Trimmomatic as previously described.</p>
</sec>
<sec id="s2-11">
<title>Identification of ATAC-seq peaks</title>
<p>Trimmed ATAC-seq reads were mapped to the murine genome (mm10 version&#x2014;gencode vM22 version) using bowtie2 (v2.3.5.1) (<xref ref-type="bibr" rid="B28">Langmead and Salzberg, 2012</xref>). Default parameters were used when mapping single-ended reads whereas paired-ended reads were mapped avoiding the selection of both discordant pairs and singleton reads (--no-mixed, --no-discordant parameters). If a sequencing read multimaps to <italic>n</italic> different loci, bowtie2 defines the best alignment (based on mismatches, gaps and other mapping parameters) among these <italic>n</italic> and reports only this to the output file. Next, the Genrich tool was used to perform the ATAC peak calling analysis merging the biological replicates (v0.6, parameters: -j -d 250 -r -a 0 -q 0.05 -e chrM and genomic scaffolds) (<ext-link ext-link-type="uri" xlink:href="https://github.com/jsh58/genrich">https://github.com/jsh58/genrich</ext-link>). Only ATAC peaks longer than 10 nucleotides were retained for further analyses. The same analysis was repeated by using bwa (v 0.7.15-r1140), bowtie1 (v 1.2.3; --chunkmbs 200 --sam --best) and bowtie2 to rule out possible biases introduced by the random assignment of non-uniquely mapping reads.</p>
</sec>
<sec id="s2-12">
<title>ATAC peaks annotation</title>
<p>ATAC peaks were annotated respective to the transcript genic features using the R/Bioconductor package ChIPseeker (v1.24.0) (<xref ref-type="bibr" rid="B57">Yu et al., 2015</xref>). The TxDB object was generated from the gtf annotation file used for previous analyses (gencode vM22 version, primary assembly) using the R/Bioconductor package GenomicFeatures (v1.40.1) (<xref ref-type="bibr" rid="B29">Lawrence et al., 2013</xref>). ATAC peaks were next annotated using the ChIPseeker annotatePeak function defining a promoter region of &#x2b;/&#x2212; 3&#xa0;kb (tssRegion &#x3d; c(-3,000, 3,000)). In case of ATAC peaks overlapping more than one genomic feature, priorities were assigned following the ChIPseeker default parameters (promoter, 5&#x2019; UTR, 3&#x2019; UTR, exon, intron, downstream, intergenic). In order to define how many of the intergenic ATAC peaks were associated to TEs, the genomic coordinates of the ATAC-seq peaks were overlapped with the genomic coordinates of the TEs annotated in the mm10 genome (downloaded from repeatmasker). The overlap was performed by using bedtools intersect (v2.27.0, parameters: -f 0.5, -wao) (<xref ref-type="bibr" rid="B39">Quinlan and Hall, 2010</xref>). Since it might happen that one single ATAC-seq peak overlaps more than one annotated TEs, the output file of the bedtools intersect was furtherly parsed in order to reduce this redundancy. Namely, in case of ATAC-seq peaks overlapping more than one annotated TEs, the longest intersection was selected.</p>
</sec>
<sec id="s2-13">
<title>LINE-1 monomer annotation</title>
<p>Genomic coordinates of the LINE-1 monomers were retrieved from a previous publication (<xref ref-type="bibr" rid="B59">Zhou and Smith, 2019</xref>). Monomers were then associated to the closest starting coordinate of the annotated LINE-1 elements by using bedtools closest (parameters: -s, -D ref). A given monomer was associated to the closest LINE-1 starting coordinate if: 1) the two features were on the same strand and 2) the monomer overlapped the LINE-1 starting coordinate or was located no more than 100 nucleotides downstream to it. Of note, while performing this analysis we discovered that in many cases two portions of the same LINE-1 element were annotated as distinct LINE-1s by repeatmasker. For instance, in our TE annotation file, two different LINE-1s were annotate at coordinates chr1:3037726&#x2013;3043047 and chr1:3043047&#x2013;3044040 and they represented the coordinates of the L1Md_A consensus sequence 279&#x2013;5,586 and 5,587&#x2013;6,580, respectively. We believe that these two elements actually belong to a unique LINE-1 element as the two elements are contingent and also represent contingent portions of the respective consensus. To fill this gap, we used custom code to join all the contingent LINE-1s in the murine genome if: 1) the given LINE-1s belonged to the same LINE-1 subfamily (e.g., L1 Md_A), 2) the distance between the two elements on the genome was less than 2 nucleotides and 3) the distance between the represented portions on the LINE-1 consensus sequence was less than 5 nucleotides.</p>
</sec>
<sec id="s2-14">
<title>LINE-1 expression</title>
<p>To define whether the young LINE-1s overlapping early2-cell intergenic ATAC peaks were expressed during the developmental time-course, the expression levels of the LINE-1s of interest were compared to the expression levels of randomly selected young LINE-1s. To this end, the normalised expression levels of the LINE-1s of interest were retrieved from the previous TE single loci expression quantification analysis (see &#x201c;TE expression quantification&#x2014;locus specific&#x201d; section). Expression of contingent LINE-1s whose coordinates were joined (see &#x201c;LINE-1 monomer annotation&#x201d;) was summed. The same analysis was then repeated 1,000 times on an equal number of randomly selected young LINE-1s regardless of their ATAC status in the early2-cell stage. This analysis was performed separately for young LINE-1s with (n &#x3d; 2,116) and without (n &#x3d; 1,298) a monomer.</p>
</sec>
<sec id="s2-15">
<title>Stacc-seq data collection</title>
<p>Stacc-seq data were retrieved from a previous publication (accession number: PRJNA558961) (<xref ref-type="bibr" rid="B33">Liu et al., 2020</xref>). In total, the dataset was composed by: MII-oocyte (13 hpf), PN2 1-cell (23 hpf), PN3 1-cell (26 hpf), PN5 1-cell (30 hpf), early 2-cell (36 hpf), late 2-cell (48 hpf) and 8-cell (71 hpf). In order to match these stages with the ones previously analysed in the RNA-seq and ATAC-seq dataset, we retrieved 5 stages: 1) MII-oocyte, corresponding to MII-oocyte of the RNA-seq dataset; 2) PN3 1-cell embryos, corresponding to the zygote stage of the RNA-seq dataset; 3) PN5 1-cell embryos, corresponding to the early 2-cell stage of the RNA-seq and ATAC-seq dataset (they were both collected at 30 hpf, but in the Stacc-seq dataset is called &#x201c;PN5 1-cell&#x201d;, whereas in the RNA-seq and ATAC-seq it is called &#x201c;early 2-cell&#x201d;); 4) late 2-cell, corresponding to the 2-cell stage of the RNA-seq and ATAC-seq datasets and v) 8-cell stage, corresponding to the 8-cell stage of the RNA-seq and ATAC-seq datasets. Each stage is represented by two biological replicates. Reads are in PE layout (2 &#xd7; 150bp). Having detected the presence of both adapters and low-quality reads, the Stacc-seq reads were trimmed using Trimmomatic as previously described.</p>
</sec>
<sec id="s2-16">
<title>Identification of Pol-II binding sites</title>
<p>Mapping to the reference genome, peaks identification and bigwigs files generation were done as previously described for the ATAC-seq data with the exception that no threshold was set on the minimum length of the Pol-II binding sites and that only binding sites scoring an area under the curve (AUC) higher than 500 were selected for further analyses.</p>
</sec>
<sec id="s2-17">
<title>Enrichment of Pol-II binding sites on LINE-1 monomers</title>
<p>Genomic coordinates of the monomers of the LINE-1s of interest were elongated by 100 nucleotides at both far ends and intersected (bedtools intersect) with the genomic coordinates of the Pol-II binding sites identified in each of the analysed stages. The same analysis was repeated by shuffling 1,000 times the coordinates of the Pol-II binding sites by using bedtools shuffle. Z-scores, <italic>p</italic>-values and FDR were calculated as previously described. The same analysis was also repeated intersecting the end-point coordinate of the same LINE-1 elements. Upset plots were generated by using the ComplexUpset R library (<xref ref-type="bibr" rid="B27">Krassowski et al., 2022</xref>), ggplot2 extension of UpSetR (<xref ref-type="bibr" rid="B12">Conway et al., 2017</xref>), based on the &#x201c;UpSet&#x201d; technique firstly conceived by (<xref ref-type="bibr" rid="B31">Lex et al., 2014</xref>).</p>
</sec>
<sec id="s2-18">
<title>Motif enrichment analysis</title>
<p>Motif enrichment analysis was performed by using the findMotifs tool of the Homer package (<xref ref-type="bibr" rid="B22">Heinz et al., 2010</xref>). Motifs were searched in the monomer sequences of the LINE-1s of interest (evolutionary young LINE-1 overlapping early 2-cell intergenic ATAC peaks and carrying a monomer) using as background the sequences of evolutionary young LINE-1 overlapping early 2-cell intergenic ATAC peaks and not carrying a monomer. To validate the results of this analysis, the same analysis was repeated using the <italic>runAme()</italic> function of the memes R package, which is a wrapper of the AME tool of MEME Suite (<xref ref-type="bibr" rid="B36">McLeay and Bailey, 2010</xref>). Motif enrichment analysis on the LINE-1 sequences overlapping early 2-cell intergenic ATAC-seq peaks was instead performed using as background the nucleotide sequence of the portion of the LINE-1s of interest not overlapping any ATAC-peaks.</p>
</sec>
<sec id="s2-19">
<title>Metagene plot</title>
<p>The metagene plots represented in <xref ref-type="fig" rid="F1">Figures 1C</xref>, <xref ref-type="fig" rid="F3">3D</xref> were generated using custom scripts developed in python3. Briefly, the genomic features of interest were subdivided in a given number of bins. Each bin of each genomic feature of interest was then overlapped with the second genomic feature of interest (e.g., MERVL coordinates or ATAC-seq peaks). When at least 50% of the nucleotides of the bin overlapped the investigated element, 1 was assigned to the bin otherwise 0. In the plots, on the y-axis is represented the mean number of genomic features of interest overlapping a given element (e.g., MERVL coordinates or ATAC-seq peaks). This number was multiplied by 100, thus representing the percentage of genomic features of interest covered by the investigated element. The random signal depicted in <xref ref-type="fig" rid="F1">Figure 1C</xref> was obtained applying the same workflow to randomly selected genes (1,000 randomisations). Metagene in <xref ref-type="fig" rid="F4">Figure 4D</xref> was generated by using the computeMatrix scale-regions tool of the deeptools package (<xref ref-type="bibr" rid="B41">Ram&#xed;rez et al., 2016</xref>). The random signal depicted in <xref ref-type="fig" rid="F4">Figure 4D</xref> was obtained applying the same workflow to shuffled (bedtools shuffle) LINE-1 coordinates (1,000 randomisations). Since the genomic tracks at these developmental stages are quite noisy, the bedGraph/bigwig genomic regions falling outside significantly identified peaks (see Genrich analysis) were assigned a value of 0.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>ZGA minor wave genes are enriched in MERVL sequences and located within gene clusters. <bold>(A)</bold> Volcano plot showing DE genes between early 2-cell and zygote stages. Up and downregulated genes in early 2-cell vs. zygote stages are reported. log<sub>2</sub>FC is reported on x-axis, whereas -log<sub>10</sub> of the FDR on the y-axis. Significantly downregulated genes are coloured in red, the upregulated in light orange (FDR&#x3c;0.05 and log<sub>2</sub>FC &#x3e; 1 or &#x3c; -1). Gene names are reported for 2C-specific genes. <bold>(B)</bold> GO terms enriched in the set of genes upregulated in early 2-cell vs. zygote. Only GO terms relative to biological process (BP) are reported. Significant threshold set to FDR&#x3c;0.05. <bold>(C)</bold> Metagene showing the MERVL enrichment nearby the 861 genes upregulated in early 2-cell <sub>
<italic>vs</italic>
</sub> zygote. MERVL enrichment is reported on y-axis as the fraction of the 861 genes overlapping MERVL sequences at a given position. The same is reported for randomly selected genes (grey, 1,000 randomisations, mean is reported) (<italic>p</italic> &#x3c; 2.2e-16, z-statistics). <bold>(D)</bold> Percentage of upregulated genes in early 2-cell vs. zygote (ZGA minor wave) and 2-cell vs. early 2-cell (ZGA major wave) overlapping gene clusters (darker colours). The same analysis was repeated by shuffling (i.e., randomly permuting) 1,000 times the coordinates of the gene clusters (shuffled clusters, lighter colours). In the shuffled bars mean &#xb1; sem is reported, however, due to the small variability among the 1,000 randomisations they are not visually appreciable. (Shuffled values are 9.94 &#xb1; 0.70 and 9.88 &#xb1; 0.031, respectively. &#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3d; 2.9e-10 and <italic>p</italic> &#x3d; 0.36, respectively, z-statistics).</p>
</caption>
<graphic xlink:href="fcell-11-1124266-g001.tif"/>
</fig>
</sec>
<sec id="s2-20">
<title>Bigwig track generation</title>
<p>Bigwigs track generated in this study for both ATAC-seq and Stacc-seq (Pol-II binding) data are deposited on Zenodo at: <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/record/7807839">https://zenodo.org/record/7807839&#x23;.ZC_N3y8RrEo</ext-link>. The bigwig tracks were generated by using the bamCoverage tool of the deepTools package (v3.5.0, parameters: --binSize 10 --centerReads --ignoreDuplicates --normalizeUsing RPKM --extendReads 250) (<xref ref-type="bibr" rid="B41">Ram&#xed;rez et al., 2016</xref>). In addition, since at these developmental stages chromatin has a very dynamic conformation, bigwig tracks can be very noisy (<xref ref-type="bibr" rid="B55">Wu et al., 2016</xref>) and the signal can be of hard interpretation. To overcome this issue, we also generated bigwig tracks by including only reads overlapping ATAC-seq or Stacc-seq significant peaks previously identified by Genrich. To this end, bamCoverage &#x201c;&#x2014;blackListFileName&#x201d; parameter was provided with a bed file containing all the regions of the mm10 genome not overlapping any significant ATAC-seq/Stacc-seq peak elongated at both ends by 1&#xa0;kb.</p>
</sec>
<sec id="s2-21">
<title>Statistical analysis</title>
<p>All the statistical analyses performed externally to previously reported software (edgeR, topGO) were conducted either in R (v3.6.2) (<xref ref-type="bibr" rid="B40">R Core Team, 2018</xref>) or in python (v3.7.6) (<xref ref-type="bibr" rid="B43">Rossum and Drake, 2001</xref>) taking advantage of the numpy (<xref ref-type="bibr" rid="B21">Harris et al., 2020</xref>) and scipy (<xref ref-type="bibr" rid="B50">SciPy 1.0 Contributors et al., 2020</xref>) libraries. All the plots were generated in R, using either generic R plotting functions or the ggplot2 library (<xref ref-type="bibr" rid="B54">Wickham, 2016</xref>). All the <italic>p</italic>-values were FDR corrected (Benjamini&#x2013;Hochberg). &#x2a;<italic>p</italic> &#x3c; 0.05, &#x2a;&#x2a;<italic>p</italic> &#x3c; 0.01, &#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001.</p>
</sec>
<sec id="s2-22">
<title>Naming of different groups of LINE-1s</title>
<p>In this article, &#x201c;old L1&#x201d; refers to all the LINE-1s not belonging to A, G<sub>
<italic>f</italic>
</sub> and T<sub>
<italic>f</italic>
</sub>, &#x201c;young L1&#x201d; refers to the LINE-1s belonging to A, G<sub>
<italic>f</italic>
</sub> and T<sub>
<italic>f</italic>
</sub> and &#x201c;LINE-1&#x201d; refers to all the LINE-1, with no distinctions among evolutionary ages.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>Genes transcriptionally activated during ZGA minor wave are enriched in MERVL elements and in gene clusters</title>
<p>To identify the genes transcriptionally activated during the minor wave of the murine ZGA, we performed a differentially expressed (DE) gene analysis between the early 2-cell (ZGA minor wave stage) and the zygote (transcriptionally inactive) stages. A total of 1,060 genes resulted differentially expressed between the two conditions with 861 genes being upregulated in early 2-cell compared to zygote (<xref ref-type="fig" rid="F1">Figure 1A</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>, FDR&#x3c;0.05, log2FC &#x3e; 1 or &#x3c; -1). These 861 upregulated genes represented the set of genes transcriptionally activated upon the ZGA minor wave. Consistent with this observation, the 861 upregulated genes were significantly enriched in the &#x201c;2C genes&#x201d; gene set previously defined by Macfarlan and others (<xref ref-type="bibr" rid="B34">Macfarlan et al., 2012</xref>) (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>, <italic>p</italic> &#x3c; 2.2e-16) with several of the well-known <italic>Dux</italic> targets belonging to the set of upregulated genes (e.g., <italic>Zscan4</italic>, <italic>Prame</italic>, <italic>Gm4340, Dub1</italic> [<italic>Usp17la</italic>]<italic>, Zfp352</italic>, <italic>Eif1a</italic>-like gene family members). Gene ontology enrichment analysis revealed that, in agreement with the analysed biological context, the upregulated gene functions were mainly related to GO terms such as transcription regulation, translation initiation, proteolysis and cell death/proliferation (<xref ref-type="fig" rid="F1">Figure 1B</xref>). Confirming previous evidence (<xref ref-type="bibr" rid="B34">Macfarlan et al., 2012</xref>), the 861 ZGA minor wave genes were significantly associated with MERVL (<italic>p</italic> &#x3c; 2.2e-16), with the transcription start site (TSS) of 147 of these genes (17%) located in the proximity (&#x3c;100&#xa0;nt) of annotated MERVLs, while this is expected by chance for less than 1% of genes in the murine genome (<xref ref-type="fig" rid="F1">Figure 1C</xref>; <xref ref-type="sec" rid="s10">Supplementary Tables S1, S2</xref>). Although the percentage of ZGA minor wave genes associated with MERVL sequences is not predominant, ZGA minor wave gene enrichments in MERVL elements resulted both highly significant and specific as these genes resulted enriched in no other TE sequences (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>).</p>
<p>Next, we wondered whether other genomic features, besides MERVL enrichment nearby the TSS, characterised the ZGA minor wave genes. In particular, starting from previous evidence showing how high gene/promoter density facilitates the transcription initiation at zebrafish ZGA (<xref ref-type="bibr" rid="B20">Hadzhiev et al., 2023</xref>), we tested whether the murine ZGA minor wave genes were located in gene-dense compartments. First, gene clusters were defined in the murine genome as genomic compartments characterised by at least 5 consecutive genes associated to the same functional domain (see Methods, <xref ref-type="sec" rid="s10">Supplementary Table S3</xref>). Then, the number of ZGA minor wave genes located within the clusters was calculated. ZGA minor wave genes resulted significantly enriched in gene clusters with respect to the rest of the transcriptome, with 207 of the 861 ZGA minor wave genes (24%) being located within a cluster, while this was expected by chance for only &#x223c;83 genes (10%) (<xref ref-type="fig" rid="F1">Figure 1D</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>, <italic>p</italic> &#x3d; 2.9e-10). To define whether this was a feature specific of the minor wave genes or it was characteristic also of other gene sets, we identified the ZGA major wave genes (upregulated genes in 2-cell vs<italic>.</italic> early 2-cell stage) and overlapped their genomic coordinates with the ones of gene clusters. Of note, cluster enrichment appeared to be ZGA minor wave-specific, as ZGA major wave genes did not display the same enrichment (<xref ref-type="fig" rid="F1">Figure 1D</xref>, <italic>p</italic> &#x3d; 0.36).</p>
</sec>
<sec id="s3-2">
<title>MERVL elements are the most frequently activated TEs at ZGA minor wave</title>
<p>Having identified the genes transcriptionally activated at ZGA minor wave, we sought to define the TEs that were transcriptionally activated at the same stage. To this end, we identified the TE loci differentially expressed between the early 2-cell and the zygote stages. A total of 2,695&#xa0;TEs resulted DE between the two conditions and, of these, 2,589 were upregulated and only 106 were downregulated in early 2-cell compared to zygote (<xref ref-type="fig" rid="F2">Figure 2A</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S4</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S2A</xref>, FDR&#x3c;0.05, log2FC &#x3e; 1 or &#x3c; -1). As expected, MERVL elements were the TEs most frequently upregulated at this stage with 53% (1,382/2,589) of the upregulated TE loci being annotated as MERVL, while they represent only 1% of the total TEs in the murine genome (<xref ref-type="fig" rid="F2">Figure 2B</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S2A</xref>, <italic>p</italic> &#x3c; 2.2e-16). In addition, in line with previous evidence (<xref ref-type="bibr" rid="B48">Taylor and Pik&#xf3;, 1987</xref>; <xref ref-type="bibr" rid="B8">Bachvarova, 1988</xref>; <xref ref-type="bibr" rid="B37">Peaston et al., 2004</xref>), the upregulated TEs resulted enriched also in MaLR (observed 19%, expected 12%, <italic>p</italic> &#x3c; 2.2e-16) and B2 elements (observed 13%, expected 10%, <italic>p</italic> &#x3c; 2.2e-16) (<xref ref-type="fig" rid="F2">Figure 2B</xref>). The remaining upregulated TEs were mostly old LINE-1s (not belonging to A, G<sub>
<italic>f</italic>
</sub> and T<sub>
<italic>f</italic>
</sub> subfamilies), ERVs (ERVK and ERV1) and SINE elements (Alu and B4) (<xref ref-type="fig" rid="F2">Figure 2B</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S2A</xref>). However, these TE subfamilies are amongst the most common in the murine genome and none of these resulted significantly over-represented in the set of upregulated TEs (<xref ref-type="fig" rid="F2">Figure 2B</xref>). The majority of the old LINE-1s resulting significantly upregulated in early 2-cell compared to zygote were evolutionary ancient LINE-1s belonging to the Lx subfamily (<xref ref-type="sec" rid="s10">Supplementary Figure S2B</xref>). Since it is unlikely that these LINE-1s have maintained an autonomous internal promoter, it is reasonable to believe that they are transcribed as part of nearby/overlapping transcripts. This suggestion is supported by the evidence that they result located closer to annotated transcripts than shuffled LINE-1s obtained by randomly permuting these LINE-1 coordinates among the genome (<italic>t</italic>-test <italic>p</italic> &#x3d; 1.84E-08) (<xref ref-type="sec" rid="s10">Supplementary Figure S2C</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>MERVL, but not LINE-1, transcription is detected at ZGA minor wave onset. <bold>(A)</bold> Volcano plot showing DE TE loci between early 2-cell and zygote stages. Up- and downregulated TE loci in early 2-cell vs. zygote stages are reported. log<sub>2</sub>FC is reported on x-axis, whereas -log<sub>10</sub> of the FDR on the y-axis. Significantly downregulated genes are reported in the left part of the plot, the upregulated in the right one (FDR&#x3c;0.05 and log<sub>2</sub>FC &#x3e; 1 or &#x3c; -1). <bold>(B)</bold> Fraction of upregulated TE loci in early 2-cell vs. zygote stages for each TE subfamily compared to the genome occupancy of each TE subfamily. Darker bars indicate the number of TEs upregulated for each TE subfamily divided by the total number of TEs upregulated, multiplied by 100. Lighter bars indicate the number of TEs annotated in the mm10 genome or each TE subfamily divided by the total number of TEs in the mm10 genome, multiplied by 100. The number of upregulated TEs for each TE subfamily is reported above each bar. &#x201c;young LINE-1s&#x201d; are considered LINE-1 elements belonging to A, G<sub>
<italic>f</italic>
</sub> and T<sub>
<italic>f</italic>
</sub> subfamilies. &#x201c;old LINE-1&#x201d; instead refers to all the other LINE-1 subfamilies. (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001, two-proportions z-test). Number of TEs annotated in the mm10 genome (lighter bars) are as follow: old L1 863,564 (23.6%), young L1 41,612 (1.1%), Alu 574,557 (15.7%), B2 372,923 (10.2%), B4 397,726 (10.9%), ERV1 71,980 (2.0%), ERVK 319317 (8.7%), MaLR 454918 (12.4%), MERVL 46168 (1.3%), Other TEs 515,652 (14.1%). <bold>(C)</bold> Volcano plot showing DE TE consensus between early 2-cell and zygote stages. Significantly downregulated genes are reported in the left part of the plot, the upregulated in the right one (FDR&#x3c;0.05 and log<sub>2</sub>FC &#x3e; 1 or &#x3c; -1). TE subfamily names are reported for all the DE TEs. <bold>(D)</bold> Normalised expression levels (TMM) of the young LINE-1 element consensus sequences (subfamilies A, G<sub>
<italic>f</italic>
</sub>, T<sub>
<italic>f</italic>
</sub>) as calculated by TEspeX in the zygote (light grey), early 2-cell (grey) and 2-cell (dark grey) stages. Bars indicate mean of expression across different LINE-1 consensus sequences belonging to the same subfamily (e.g., L1MdA_I, II, III, IV, etc.) and among the two biological replicates (individual points).</p>
</caption>
<graphic xlink:href="fcell-11-1124266-g002.tif"/>
</fig>
<p>Of note, almost no evolutionary young LINE-1 elements (A, G<sub>
<italic>f</italic>
</sub> and T<sub>
<italic>f</italic>
</sub> subfamilies) resulted upregulated in early 2-cell compared to zygote (<xref ref-type="fig" rid="F2">Figure 2B</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S2A</xref>). The same results were confirmed at the TE consensus level by using TEspeX, a tool which discriminates between TE expression and passive transcription of exonised TE fragments embedded in annotated transcripts (<xref ref-type="bibr" rid="B5">Ansaloni et al., 2019</xref>; <xref ref-type="bibr" rid="B4">2022</xref>) (<xref ref-type="fig" rid="F2">Figure 2C</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S5</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S3</xref>). This result might appear contrasting previous observations showing that mature LINE-1 transcripts are required at the 2-cell stage for proper embryo development (<xref ref-type="bibr" rid="B25">Jachowicz et al., 2017</xref>). Here, the small number of biological replicates (i.e., 2) might have compromised the statistical power of the differential expression analysis. However, this apparent discrepancy can be solved when keeping into account that Jachowicz and others collected 2-cell embryos at 46 hpf (<xref ref-type="bibr" rid="B25">Jachowicz et al., 2017</xref>), whereas in the dataset herein analysed early 2-cell embryos were collected at 30 hpf (<xref ref-type="bibr" rid="B55">Wu et al., 2016</xref>). Taken together, these observations indicate that the LINE-1 transcripts needed at 46 hpf are likely not yet transcribed or, at least, mature at 30 hpf.</p>
<p>Furthermore, it is also worth noticing how LINE-1 RNAs are part of the set of transcripts deposited in the oocyte cytoplasm ahead of fertilisation and consequently part of the zygote cytoplasm (<xref ref-type="bibr" rid="B37">Peaston et al., 2004</xref>; <xref ref-type="bibr" rid="B35">Malki et al., 2014</xref>). We confirmed this by measuring the amount of young LINE-1 transcripts in the transcriptionally inactive zygote (<xref ref-type="fig" rid="F2">Figure 2D</xref>) and by analysing embryos where transcription was blocked prior to ZGA with alpha-amanitin (<xref ref-type="sec" rid="s10">Supplementary Figure S4</xref>). From this analysis LINE-1 transcripts resulted detectable in the cytoplasm of the embryos even when zygotic transcription was blocked. LINE-1 transcripts were therefore already present in the zygote prior to ZGA.</p>
<p>The presence of LINE-1 transcripts prior to ZGA makes it impossible to define, by only analysing transcriptomic data, whether LINE-1 transcription is actually started between the zygote and the early 2-cell stage. To overcome these limitations, we decided to integrate ATAC-seq data to investigate the chromatin accessibility of the early embryo genome at ZGA onset especially at LINE-1 loci. This approach is based on the hypothesis that zygotically transcribed mRNAs should require specific chromatin conformations in the zygote facilitating their transcription, while maternal transcripts should not.</p>
</sec>
<sec id="s3-3">
<title>More than 25% of the total open chromatin domains at ZGA minor wave onset reside in LINE-1 elements</title>
<p>By taking advantage of the ATAC-seq dataset from a previous publication (<xref ref-type="bibr" rid="B55">Wu et al., 2016</xref>), we identified the open chromatin domains during mouse preimplantation embryo development (see Methods) (<xref ref-type="sec" rid="s10">Supplementary Table S6</xref>). Our results showed that the number of ATAC-seq peaks increased by 10-fold upon ZGA minor wave (between the early 2-cell &#x2b; alpha-amanitin and the early 2-cell samples), further increasing in correspondence of the ZGA major wave onset (2-cell stage) and then decreasing in 4- and 8-cell stages (<xref ref-type="fig" rid="F3">Figure 3A</xref>). Importantly, at ZGA minor wave onset (early 2-cell), only 50% of the peaks overlapped genic features, with the remaining portion (9,553 peaks) localised in intergenic regions (<xref ref-type="fig" rid="F3">Figure 3B</xref>, <italic>p</italic> &#x3c; 2.2e-16). This is in stark contrast with all the other stages where 70% of the peaks resulted annotated in genic regions. A further investigation of the localisation of these 9,553 early 2-cell intergenic peaks showed that more than 50% of them were in correspondence of LINE-1 elements (5,683 peaks overlapping 3,731 LINE-1s) (<xref ref-type="fig" rid="F3">Figure 3C</xref>). This resulted in a strong enrichment of intergenic open chromatin domains within LINE-1 elements at ZGA minor wave onset (<xref ref-type="fig" rid="F3">Figure 3C</xref>, <italic>p</italic> &#x3c; 2.2e-16). Additionally, such enrichment resulted stage-specific with no other analysed embryonic stage displaying a similar pattern. The results were confirmed either by repeating the analysis of the same dataset with the same tool (bowtie2) or by mapping the reads to the reference genome using other two tools (bwa and bowtie1) to rule out possible biases introduced by the random assignment of non-uniquely mapping reads (<xref ref-type="sec" rid="s10">Supplementary Figure S5</xref>). These results suggest that LINE-1s are associated to chromatin opening and/or to the initiation of transcription at ZGA onset. To discriminate between these two contributions, we investigated the localisation of the ATAC peaks on the 3,731 LINE-1s overlapping early 2-cell intergenic open chromatin domains. Our results showed that the majority of these LINE-1s overlapped open chromatin domains at the 3&#x2019; and not at the 5&#x2019; end (<xref ref-type="fig" rid="F3">Figure 3D</xref>). In addition, these LINE-1s appeared to be marked by open chromatin domains exclusively in the early 2-cell stage (<xref ref-type="fig" rid="F3">Figure 3D</xref>) and no evidence of open chromatin was observed either upstream or downstream the LINE-1s of interest, confirming the specificity of the localisation of these open chromatin domains (<xref ref-type="fig" rid="F3">Figure 3D</xref>). The enrichment of open chromatin at the 3&#x2019; of transcriptional units is of difficult interpretation without additional specific experiments. However, previous evidence suggested that open chromatin at transcription termination sites (TTSs) in early embryos denotes active transcription (<xref ref-type="bibr" rid="B55">Wu et al., 2016</xref>) as it reflects the binding and pausing of factors engaged in transcription termination, including RNA polymerase II (Pol-II) (<xref ref-type="bibr" rid="B18">Glover-Cutter et al., 2008</xref>). In line with this observation, no specific TFs were predicted to bind the open chromatin domains overlapping these LINE-1s (see Methods).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Accessible chromatin at the ZGA onset. <bold>(A)</bold> Number of open chromatin domains during preimplantation embryo development. Number of significant (<italic>p</italic> &#x3c; 0.05) open chromatin domains (ATAC-seq peaks) are reported for each of the analysed stages. Pre-ZGA sample is represented by &#x201c;early2&#x2b;a-aman.&#x201d;, which represents an early 2-cell embryo treated with alpha-amanitin, an inhibitor of Pol-II. ZGA minor wave sample is the early 2-cell, whereas the major ZGA wave occurs in the 2-cell stage. <bold>(B)</bold> Distribution of ATAC-seq peaks respective to genomic features. The distribution of the ATAC-seq peaks respective to transcript genomic features is reported for each of the analyse stages as percentage of the total peaks identified in each analysed stage (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001, two-proportions z-test). <bold>(C)</bold> Fraction of intergenic ATAC-peaks overlapping annotated TEs in each analysed stage. Only LINE-1, ERVL, MaLR, ERVK and Alu subfamilies are reported. All the other TE subfamilies are classified as &#x201c;Others&#x201d; (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001, two-proportions z-test). &#x201c;LINE-1&#x201d; here refers to all the LINE-1s annotated in the mm10 genome with no distinctions between old, young, full-length non-full-length. <bold>(D)</bold> Metagene plot showing the localisation of the ATAC-seq peaks on the genomic loci of the LINE-1 elements overlapping early 2-cell intergenic ATAC peaks identified in <xref ref-type="fig" rid="F3">Figure 3C</xref> (n &#x3d; 3,731). ATAC-seq peak enrichment on LINE-1s of interest is reported on y-axis as percentage of LINE-1s covered by ATAC-seq peak in each position (&#x2b;/- 5&#xa0;kb) of the LINE-1s in analysis.</p>
</caption>
<graphic xlink:href="fcell-11-1124266-g003.tif"/>
</fig>
</sec>
<sec id="s3-4">
<title>Pol-II positioning suggests transcription of LINE-1 elements overlapping intergenic open chromatin domains in the early 2-cell stage might initiate at ZGA minor wave onset</title>
<p>The murine genome is composed by evolutionary old (V, N, Mus and Lx, evolved 4&#x2013;10 MY ago) and young (A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub>, 0.2&#x2013;4 MY) LINE-1 elements and, while the former are usually represented by transcriptionally inactive elements, the latter are more likely to have retained their transcriptional competence (<xref ref-type="bibr" rid="B46">Sookdeo et al., 2013</xref>). Based on this evidence, we classified the previously identified LINE-1 elements overlapping early 2-cell intergenic ATAC-peaks according to the subfamily they belong to, which is a direct indicator of their evolutionary age. The results showed that 92% of such LINE-1s (3,414/3,731) belonged to evolutionary young subfamilies (A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub>), while these subfamilies represent only 4% of the total LINE-1s annotated in the murine genome (<xref ref-type="fig" rid="F4">Figure 4A</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S7</xref>, <italic>p</italic> &#x3c; 2.2e-16). Since most of the LINE-1 elements in the murine genome are 5&#x2019; truncated (<xref ref-type="bibr" rid="B53">Mouse Genome Sequencing Consortium et al., 2002</xref>), these young LINE-1s could still lack an intact promoter thus representing transcriptionally inactive fragments. Therefore, the 3,414 evolutionary young LINE-1s of interest were classified based on the presence or absence of intact A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub> monomers in their internal promoters. To this end, the coordinates of the A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub> monomers in the reference murine genome were retrieved from a previous publication (<xref ref-type="bibr" rid="B59">Zhou and Smith, 2019</xref>) and overlapped with the genomic coordinates of the LINE-1s of interest. Our results showed that 62% of the evolutionary young LINE-1s overlapping intergenic open chromatin domains at ZGA onset (2,116/3,414) contained an intact A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub> monomer, whereas the same feature was displayed by only 30% of the young LINE-1 elements in the reference genome (<xref ref-type="fig" rid="F4">Figure 4B</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S7</xref>, <italic>p</italic> &#x3c; 2.2e-16). These results support the idea that the LINE-1s overlapping early 2-cell intergenic ATAC-peaks could be autonomously transcribed. To define the transcriptional dynamics of these LINE-1s, we interrogated again the RNA-seq data. To this end, we compared the expression levels of the 2,116 intact and 1,298 truncated LINE-1s overlapping early 2-cell intergenic ATAC peaks with the ones of randomly selected young LINE-1s. As expected, when lacking an A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub> monomer in their internal promoters, both groups of LINE-1s resulted not to be expressed (<xref ref-type="fig" rid="F4">Figure 4C</xref>, bottom).When containing A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub> monomers, the 2,116 LINE-1s overlapping early 2-cell intergenic ATAC-seq peaks did not result significantly more expressed in the early 2-cell stage than randomly selected young LINE-1s (<xref ref-type="fig" rid="F4">Figure 4C</xref>, top). Moreover, the expression levels of these 2,116 LINE-1s in the early 2-cell stage were low (&#x3c;1 normalised read), resembling more transcriptional noise rather than a real expression and thus questioning whether these elements, despite showing chromatin accessibility at the early 2-cel stage, are really transcribed in the early 2-cell stage (<xref ref-type="fig" rid="F4">Figure 4C</xref>, top). On the contrary, these LINE-1 RNAs, although lacking chromatin accessibility at these stages (<xref ref-type="sec" rid="s10">Supplementary Figure S6</xref>), were significantly more expressed in the 2-, 4- and 8-cell stages, when compared to randomly selected young LINE-1s (<xref ref-type="fig" rid="F4">Figure 4C</xref>, top). These results highlighted an apparent inconsistency between ATAC-seq and RNA-seq results. To solve this issue, we retrieved publicly available small-scale Tn5-assisted chromatin cleavage with sequencing (Stacc&#x2013;seq) data to profile the Pol-II binding sites in the very same developmental stages (<xref ref-type="bibr" rid="B33">Liu et al., 2020</xref>). First, the genome-wide Pol-II binding sites in the entire dataset (from MII-Oocyte to 8-cell stage) were identified (<xref ref-type="sec" rid="s10">Supplementary Table S8</xref>). Then, Pol-II binding profile nearby the genomic <italic>loci</italic> of the 2,116 LINE-1s of interest was analysed. Coherently with the expectations, Pol-II did not bind the LINE-1s of interest at any stage when they lacked monomers in their internal promoters (<xref ref-type="fig" rid="F4">Figure 4D</xref>, bottom panel). On the contrary, Pol-II appeared to be enriched as early as in the early 2-cell stage on the young LINE-1s of interest when they had a monomer in their internal promoter region (<xref ref-type="fig" rid="F4">Figure 4D</xref> upper panel; <xref ref-type="sec" rid="s10">Supplementary Figure S7A</xref>, <italic>p</italic> &#x3c; 2.2e-16). In addition, Pol-II was enriched also in the monomers of the LINE-1s of interest at the 2-cell stage (<xref ref-type="fig" rid="F4">Figure 4D</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S7A</xref>, <italic>p</italic> &#x3c; 2.2e-16). Of note, at the 2-cell stage, Pol-II was enriched also at the 3&#x2019; end of such LINE-1s (<xref ref-type="fig" rid="F4">Figure 4D</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S7B</xref>, <italic>p</italic> &#x3c; 2.2e-16). Since Pol-II positioning at the transcript 3&#x2019; end marks transcription termination (<xref ref-type="bibr" rid="B18">Glover-Cutter et al., 2008</xref>), it is likely that the transcription of such LINE-1s is terminated at the end of the late 2-cell stage, thus suggesting that the transcription of the LINE-1s of interest was initiated at the early 2- and terminated at the late 2-cell stage. However, Pol-II resulted unexpectedly enriched in the monomers of the 2,116 LINE-1s of interest also at the 8-cell stage (<xref ref-type="fig" rid="F4">Figure 4D</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S7A</xref>, <italic>p</italic> &#x3c; 2.2e-16). Analysis of the specific set of LINE-1 monomers bound by Pol-II at the 8-cell stage revealed how 87% of such monomers were bound by Pol-II already at the early or late 2-cell stage (<xref ref-type="sec" rid="s10">Supplementary Figure S7C</xref>). According to this data, the set of LINE-1s of interest bound by Pol-II at the 8-cell stage is roughly the same which is bound by Pol-II in the early and late 2-cell stages, suggesting the stalling of the Pol-II on these loci at the 8-cell stage, rather than their transcription. This is coherent with previous studies showing LINE-1 transcription exclusively at the 2-cell stage (<xref ref-type="bibr" rid="B25">Jachowicz et al., 2017</xref>; <xref ref-type="bibr" rid="B38">Percharde et al., 2018</xref>), and it is further corroborated by the observation that these LINE-1s showed chromatin accessibility exclusively at the early 2-cell stage (<xref ref-type="fig" rid="F3">Figure 3D</xref>) and that a possible evidence of their transcription termination (Pol-II enrichment at the 3&#x2019; end) is observed exclusively at the late 2-cell stage, and not at the 8-cell stage (<xref ref-type="fig" rid="F4">Figure 4D</xref>; <xref ref-type="sec" rid="s10">Supplementary Figure S7B</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Structural, transcriptional and epigenetic characterisation of LINE-1 elements overlapping early 2-cell intergenic ATAC-seq peaks. <bold>(A)</bold> Subfamily classification of the 3,731 LINE-1s of interest. Left bar represents the percentage of the 3,731 LINE-1s identified in <xref ref-type="fig" rid="F3">Figure 3C</xref> (overlapping intergenic early 2-cell ATAC-seq peaks) belonging to young LINE-1 subfamilies (A, G<sub>
<italic>f</italic>
</sub>, T<sub>
<italic>f</italic>
</sub>, blue) or to other LINE-1 subfamilies (white). Bar on the right represents the same, but for all the LINE-1s annotated in the murine reference genome. Numbers reported within the bars indicate the number of LINE-1s counted for each condition. Numbers above the bars indicate the total number of LINE-1s considered. (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001, two-proportions z-test) <bold>(B)</bold> Presence/absence of monomers in the 3,414 young LINE-1s of interest. Left bar represents the percentage of the 3,414 young LINE-1s of interest (overlapping early 2-cell intergenic ATAC-seq peaks and belonging to young subfamilies) containing or not containing a A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub>, monomer (blue and light green, respectively). The bar on the right represents the same, but for all the young LINE-1s annotated in the murine reference genome. Numbers reported within the bars indicate the number of LINE-1s counted for each condition. Numbers above the bars indicate the total number of LINE-1s considered. (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001, two-proportions z-test) <bold>(C)</bold> Transcriptional characterisation of the 3,414 young LINE-1s of interest (overlapping early 2-cell intergenic ATAC-seq peaks and belonging to young subfamilies). Normalised expression levels (TMM) of the young LINE-1s overlapping early 2-cell intergenic ATAC-seq peaks are reported for each analysed stage for the 2,116 LINE-1 elements with a monomer (upper panel, blue circles) and for the 1,298 LINE-1s without a monomer (bottom panel, light green circles) monomer. The same is depicted for randomly selected young LINE-1 elements (grey triangles). Expression levels are reported as mean between all the analysed LINE-1s and between the biological replicates of each stage. Black arrow indicates the early 2-cell stage, where chromatin opening is observed in correspondence of the LINE-1s of interest 3&#x2019; end, but no significant increased expression is observed compared to randomly selected young LINE-1s. (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001, z-statistics) <bold>(D)</bold> Metagene plot showing the Pol-II enrichment on the genomic loci of the 3,414 young LINE-1s of interest. Pol-II enrichment is calculated on: i) young LINE-1s overlapping early 2-cell intergenic ATAC-seq peaks with a monomer (upper panel, blue, n &#x3d; 2,116), ii) young LINE-1s overlapping early 2-cell intergenic ATAC-seq peaks without a monomer (bottom panel, light green, n &#x3d; 1,298) and iii) LINE-1 coordinates shuffled for 1,000 times (grey). Enrichment of Pol-II is reported as RPKM (number of reads per bin/number of million mapped reads &#x2a; bin length in kb). For the shuffle LINE-1s, mean signal among the randomisations is reported. S &#x3d; start coordinate, E &#x3d; end coordinate.</p>
</caption>
<graphic xlink:href="fcell-11-1124266-g004.tif"/>
</fig>
</sec>
<sec id="s3-5">
<title>YY1 is a potential candidate as transcriptional regulator at ZGA minor wave of young LINE-1s containing a monomer and mapping within intergenic open chromatin</title>
<p>Since our ATAC-seq and Pol-II binding sites analysis suggested that the young LINE-1s overlapping early 2-cell open chromatin domains and containing an A, G<sub>
<italic>f</italic>
</sub> or T<sub>
<italic>f</italic>
</sub> monomer were actively transcribed at ZGA onset, we wondered which TF might be responsible for their transcriptional activation. To this end, a motif enrichment analysis was performed searching for known motifs predicted to bind the monomer sequences of the 2,116 young LINE-1s containing a monomer and overlapping early 2-cell open chromatin domains, using as background the sequences of the 1,298 young LINE-1s overlapping open chromatin domains in the early 2-cell stage but not containing a monomer in their promoter sequence. According to this analysis, the most significantly enriched motif in the monomer containing group was YY1, a well-known TF binding LINE-1 monomers (<xref ref-type="fig" rid="F5">Figure 5A</xref>, <italic>p</italic> &#x3c; 2.2e-16) (<xref ref-type="bibr" rid="B7">Athanikar et al., 2004</xref>). In particular, while 60% (1,259/2,116) of the LINE-1s of interest carrying a monomer were predicted to contain YY1 binding site, this was observed for only 3% (35/1,298) of the LINE-1s with the same features as the ones of interest but lacking a monomer in their promoter (<xref ref-type="fig" rid="F5">Figure 5B</xref>, <italic>p</italic> &#x3c; 2.2e-16). We next sought to define the transcriptional profile of <italic>Yy1</italic> in the preimplantation embryo. According to the analyzed RNA-seq data, <italic>Yy1</italic> mRNA appeared to be part of the maternal transcripts deposited in the oocyte cytoplasm, as its mRNA is already detectable, although at very low levels, in the transcriptionally inactive MII-Oocyte and zygote stages (<xref ref-type="fig" rid="F5">Figure 5C</xref>). In addition, RNA-seq data showed that <italic>Yy1</italic> expression was significantly increased between the early 2- and the 2-cell stage, suggesting that <italic>Yy1</italic> transcription is activated upon ZGA major wave (<xref ref-type="fig" rid="F5">Figure 5C</xref>). However, Pol-II binding data indicated that Pol-II bound <italic>Yy1</italic> TSS as early as in the early 2-cell stage (<xref ref-type="fig" rid="F5">Figure 5D</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>YY1 is predicted to bind LINE-1s of interest. <bold>(A)</bold> YY1 motif. YY1 is the known motif most significantly enriched in the monomer regions of the young LINE-1s containing a monomer and overlapping early 2-cell intergenic ATAC-seq peaks. <bold>(B)</bold> YY1 enrichment in LINE-1s of interest. Left bar (blue) reports the percentage of the 2,116 young LINE-1s containing a monomer and overlapping early 2-cell intergenic ATAC-seq peaks predicted to bind YY1 in their monomer region is reported on y-axis. Right bar (light green) reports the same, but for the 1,298 young LINE-1s overlapping early 2-cell intergenic ATAC-seq peaks and not containing a monomer. (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001, two-proportions z-test). <bold>(C)</bold> <italic>Yy1</italic> expression levels. Normalised expression levels (TMM) of <italic>Yy1</italic> are reported on y-axis for all the analysed stages. Data points indicate the <italic>Yy1</italic> expression in the two replicates analysed for each stage. Dashed line connects the mean expression levels calculated between the two replicates of each stage. Expression levels are reported as log<sub>2</sub>(TMM&#x2b;1). (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x3c; 0.001 and log<sub>2</sub>FC &#x3e; 1). <bold>(D)</bold> Screenshot showing <italic>Yy1</italic> RNA-seq and Pol-II binding sites normalised signal. The first five tracks report the normalised expression levels from RNA-seq data. The last five, the Pol-II binding profile. Annotated genes and TEs (blue) are also reported.</p>
</caption>
<graphic xlink:href="fcell-11-1124266-g005.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>The role of MERVL elements as potent regulators of murine ZGA is well known (<xref ref-type="bibr" rid="B37">Peaston et al., 2004</xref>; <xref ref-type="bibr" rid="B34">Macfarlan et al., 2012</xref>). However, another class of TEs, LINE-1 elements, recently came under the spotlight as key regulator of murine ZGA (<xref ref-type="bibr" rid="B25">Jachowicz et al., 2017</xref>; <xref ref-type="bibr" rid="B38">Percharde et al., 2018</xref>). Yet, how LINE-1 transcription is activated and how, and if, the transcriptional program of these elements differs from the one of MERVL is unclear. To confront these important molecular questions, we integrated RNA-seq, ATAC-seq and Stacc&#x2013;seq data to characterise MERVL and LINE-1 transcriptional dynamics during murine ZGA at the transcriptional and epigenetic levels.</p>
<p>The transcriptional characterisation of the murine ZGA highlights a remarkable number of both genes (861) and TEs (2,589) transcriptionally activated at ZGA minor wave. On the one hand, our results confirm previous evidence highlighting that ZGA minor wave genes are enriched in MERVL elements nearby their TSS (<xref ref-type="bibr" rid="B34">Macfarlan et al., 2012</xref>) and that MERVL undergo transcriptional activation at ZGA onset (<xref ref-type="bibr" rid="B14">De Iaco et al., 2017</xref>; <xref ref-type="bibr" rid="B23">Hendrickson et al., 2017</xref>). On the other hand, our analysis highlights new genomic features characterising the ZGA minor wave genes. In particular, the 861 ZGA minor wave genes result enriched in gene-dense compartments such as gene clusters. Although the function of gene clusters is still unclear, we speculate that, likewise MERVL, gene clusters could act as single regulators controlling multiple genes at the same time. In addition, the colocalization of different copies of similar and co-expressed genes might ensure the production of the needed molecular effector even in cases of mutations inactivating one of the members of the cluster. Moreover, since gene clusters are transcriptionally dense genomic compartments where multiple activators are bound in a tight genomic portion, it could also be proposed that such a high density of activating factors facilitates the initiation of the embryo transcription, as recently observed in zebrafish (<xref ref-type="bibr" rid="B20">Hadzhiev et al., 2023</xref>).</p>
<p>Our data, together with previous evidence (<xref ref-type="bibr" rid="B37">Peaston et al., 2004</xref>; <xref ref-type="bibr" rid="B35">Malki et al., 2014</xref>), show that young LINE-1 transcripts are already present in the embryo cytoplasm ahead of fertilisation, as LINE-1s belong to the set of maternal transcripts deposited in the oocyte cytoplasm, making it difficult to detect upregulated young LINE-1s in early 2-cell compared to zygote stages to identify the elements first transcribed by the embryo. To investigate the possible zygotic transcription upon ZGA minor wave of LINE-1s, we integrated RNA-seq data with ATAC-seq data of the same developmental stages. The analysis of the ATAC-seq data reveals that, specifically at the ZGA onset (early 2-cell), the identified chromatin domains are enriched in intergenic regions (50% of the total peaks). Of the intergenic open chromatin domains identified at ZGA onset, 50% specifically overlapped LINE-1 elements, with no other stages displaying a similar feature. These results indicate that more than 25% of the total open chromatin domains identified at ZGA onset are located in intergenic regions in correspondence of LINE-1s. The investigation of the localisation of these open chromatin domains with respect to LINE-1 <italic>loci</italic>, reveals that the ATAC-seq peaks are preferentially located at the 3&#x2019; end of the LINE-1s of interest. Although the interpretation of open chromatin at the 3&#x2019; end of genes/TEs still remains elusive, previous evidence suggests that this phenomenon, at least in early embryos, denotes active transcription (<xref ref-type="bibr" rid="B55">Wu et al., 2016</xref>) as it reflects the binding and pausing of factors engaged in transcription termination, including RNA polymerase II (Pol-II) (<xref ref-type="bibr" rid="B18">Glover-Cutter et al., 2008</xref>). In addition, LINE-1s within intergenic open chromatin domains result enriched in evolutionary young elements containing an intact monomer in their internal promoter, a pre-requisite for active transcription. These results support the idea that LINE-1s are transcribed at ZGA onset. However, when interrogating again the RNA-seq dataset comparing the expression levels of these LINE-1s with a set of randomly selected elements, no significant differences were observed at the early 2-cell stage, but only in the 2- 4- and 8-cell stages, although chromatin accessibility on these LINE-1s is observed specifically at the early 2-cell stage.</p>
<p>This lack of consistency between RNA-seq and ATAC-seq data can be explained in a model in which the protein of a transcriptional activator for these LINE-1s, such as YY1, is not present at the early 2-cell stage. Interestingly, proteomic data (<xref ref-type="bibr" rid="B24">Israel et al., 2019</xref>) show that YY1 expression reaches its top of expression at the 2-cell stage (<xref ref-type="bibr" rid="B24">Israel et al., 2019</xref>) supporting the proposed model. This model, however, does not explain how at the 2-, 4- and 8-cell stages the transcription of the LINE-1s of interest occurs despite the lack of accessible chromatin. Here methodological differences can be taken into consideration. In the analysed RNA-seq library, the sequenced transcripts were selected based on the presence of the poly-A tail, thus the sequenced library was composed exclusively of mature transcripts. LINE-1 transcripts detected at 2-, 4- and 8-cell stage should not be considered necessarily transcribed in the very same stage as their transcription could have begun in earlier stages (<italic>i.e.</italic>, early 2-cell) while the maturation happened in later ones. We therefore believe that, at least in this highly dynamic time-course dataset, it is difficult to make direct correlations between ATAC-seq and RNA-seq dataset as the former is probably measuring chromatin accessibility (transcription occurrence/priming) whereas the latter is possibly detecting mature transcripts (transcription termination). Although reasonable, our model is speculative and based exclusively on meta-analysis and therefore needs to be properly validated.</p>
<p>To further investigate the transcriptional dynamics of the intergenic LINE-1s containing monomers localized within open chromatin at ZGA, we retrieved Pol-II Stacc-seq data identifying Pol-II binding during mouse preimplantation development. Remarkably, Pol-II is significantly enriched in the monomers of the LINE-1s of interest with respect to the rest of the genome as early as at ZGA onset, whereas it does not bind LINE-1s lacking a monomer in their internal promoters. In addition, Pol-II is enriched at both 5&#x2019; and 3&#x2019; ends of the LINE-1s of interest at the 2-cell stage and at the 5&#x2019; end of these LINE-1s at the 8-cell stage. Since Pol-II enrichment at the TTS marks transcriptional termination and that the LINE-1s elements bound by Pol-II at the 8-cell stage were the same bound at early 2- or late 2-cell stage, we hypothesise that LINE-1 transcription started at the early 2-cell stage and terminated at the late 2-cell stage with Pol-II stalling on the monomers of a portion of these LINE-1s, without transcribing them, at the 8-cell stage.</p>
<p>Finally, our motif enrichment analyses predicts that YY1, a well-known TF binding LINE-1 monomers (<xref ref-type="bibr" rid="B7">Athanikar et al., 2004</xref>), could be the TF activating the transcription of these LINE-1s. Although this observation only results from an <italic>in silico</italic> prediction, it is interesting to observe how YY1 was previously described to contribute to the silencing of ERVs in mouse embryonic stem cells (<xref ref-type="bibr" rid="B19">Guallar et al., 2012</xref>; <xref ref-type="bibr" rid="B44">Schlesinger et al., 2013</xref>). Thus, it is possible that YY1 might participate to the repression of the <italic>Dux</italic>/MERVL transcriptional program by acting through two pathways. On the one hand, YY1 indirectly switches-off the <italic>Dux</italic>/MERVL pathway by activating the transcription of LINE-1s, whose RNAs participate to <italic>Dux</italic>/MERVL pathway silencing (<xref ref-type="bibr" rid="B38">Percharde et al., 2018</xref>). On the other one, YY1 directly participates to <italic>Dux</italic>/MERVL silencing as described in mouse embryonic stem cells (<xref ref-type="bibr" rid="B19">Guallar et al., 2012</xref>; <xref ref-type="bibr" rid="B44">Schlesinger et al., 2013</xref>) reinforcing the LINE-1 silencing activity. Again, it is worth mentioning that these results derive from a pure computational analysis thus in need of further experimental validation.</p>
<p>The major limitation of our computational study is given by the usage of short reads to infer signals from transposable elements. Using short reads to map the activity of TEs is difficult due to the repetitive nature of these elements. Being sure of the location of a read from a repetitive element is not always possible. There is no solution to this problem, especially considering that the use of uniquely mapping reads alone often prevents the identification of signals coming from certain portions of repeated elements, generally the youngest and most active ones. However, the lack of a solution to this problem should not prevent the execution of exploratory analyses and their usage to propose models to be validated with alternative and more specific techniques. Models that were unknown before and that could be biologically important to increase our knowledge in a given topic. In our study, the uncertainty about the specific position of a certain portion of reads was mitigated by the robustness of the identified enrichments that resulted to characterize the majority of the TEs belonging to a specific subgroup (young LINE-1s) in a specific developmental stage (early 2-cell). Even if we could not be sure about the exact location of each specific elements active at a given time, our analysis allowed us to understand that young LINE-1 elements show specific features as a group, which, according to us, paves the way for future and more focused analysis.</p>
<p>In summary, we have identified a set of evolutionary young LINE-1 elements ready to be transcribed at ZGA minor wave onset marked by chromatin accessibility at the 3&#x2019; ends and Pol II binding at the promoter regions. In particular, these LINE-1s are confined in intergenic regions, in contrast with <italic>Dux</italic>/MERVL targets whose transcription is started from gene-dense compartments. Altogether, our results suggest that <italic>Dux</italic>/MERVL and LINE-1 transcriptional pathways are two distinct and antagonistic pathways whose activation at ZGA minor wave is started from spatially separated genomic compartments of the murine genome.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors. All the analysed-omic datasets were retrieved from previous publications: RNA-seq: <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ena/browser/view/PRJNA277361">https://www.ebi.ac.uk/ena/browser/view/PRJNA277361</ext-link> ATAC-seq: <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ena/browser/view/PRJNA277362">https://www.ebi.ac.uk/ena/browser/view/PRJNA277362</ext-link> Stacc-seq: <ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/ena/browser/view/PRJNA558961">https://www.ebi.ac.uk/ena/browser/view/PRJNA558961</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>FA conceived the study, performed all the analysis, interpreted the results, wrote the manuscript. SG supervised the study. RS conceived and supervised the study, wrote the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work has been funded by intramural grants to RS by SISSA and to SG by IIT.</p>
</sec>
<ack>
<p>We thank Geppino Falco and Pellegrino Mazzone for discussion and interpretation of results on gene clusters. We are indebted to all the members of the RS and SG laboratories for data discussion and interpretation. The authors would like to thank the reviewers for their constructive comments and all researchers who made the data used in this work publicly available, without their data this study would have not been possible. We acknowledge that the research activity herein was carried out using the IIT HPC infrastructure.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fcell.2023.1124266/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fcell.2023.1124266/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material>
<label>Supplementary Figure S1</label>
<caption>
<p>TE enrichment in the TSS of the minor wave genes. TSS coordinates of the 861 early 2-cell / zygote up-regulated genes (minor wave genes) have been elongated by 100&#x00a0;nt at both ends and overlapped with the coordinates of all the TEs annotated in the mm10 genome. The number of overlaps for each TE subfamily is indicated by the dark-coloured bars (Observed). The same analysis was repeated on randomly selected genes (Expected, light-coloured bars, 1,000 randomisations, mean &#x2b;/&#x2212; sd is reported). Only enriched, and not depletion, are considered as significant (&#x2a;&#x2a;&#x2a;<italic>p</italic>&#x003c;2.2e-16, z-statistics).</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Figure S2</label>
<caption>
<p>Number of down- and up-regulated TEs in early 2-cell vs zygote. <bold>(A)</bold> Number of TE loci down (left panel) and up-regulated (right panel) in early 2-cell vs zygote for each TE subfamily. The number of DE loci is also reported on the top of each bar. &#x201c;young LINE-1&#x201d; refers to LINE-1 elements belonging to A, Gf or Tf subfamilies. &#x201c;old LINE-1&#x201d; refers to all the other LINE-1 elements. <bold>(B)</bold> Number of old LINE-1s up-regulated between early 2-cell and zygote stages. The number of up-regulated LINE-1 loci for each old LINE-1s subfamily is reported on y-axis. The old LINE-1s here represented are the same LINE-1s classified as &#x201c;old LINE-1&#x201d; in panel A (<italic>n</italic> &#x2d; 136). <bold>(C)</bold> Distance of the 136 old LINE-1s from the closest gene. Histogram reporting the distance in nucleotides between each of the 136 up-regulated old LINE-1s and the closest annotated gene. Each bin groups 3&#x00a0;kb.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Figure S3</label>
<caption>
<p>TE consensus expression levels. Heatmap reporting the normalised expression levels for all the TE consensus annotated in the murine genome. TE consensus with no expression in any of the analysed stages were removed. Expression values are reported as TMM (edgeR&#x2019;s normalised counts) in log2 scale. Pseudocount of 1 was summed to TMM to prevent infinite values after log transformation. Names of the TE consensus are reported on the right of the heatmap. TEs are grouped according to major TE subfamilies (Alu, IAP, MERVL, other ERV, young LINE-1 and all the other LINE-1 elements).</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Figure S4</label>
<caption>
<p>Detection of young LINE-1 transcripts in transcriptionally blocked embryos. Normalised expression levels (TMM) of young LINE-1 element consensus sequences are reported for embryos treated with alpha-amanitin. Alpha-amanitin treatment was started at oocyte stage and RNA-seq sequencing performed at 1-cell (left panel) and late 2-cell stage (right panel). The different TE consensus sequences of each young LINE-1 subfamilies are reported on x-axis. Data points indicated the expression values for each biological replica (<italic>n</italic> &#x3d; 4). In these embryos the endogenous transcription, and consequently ZGA, has been blocked. Therefore, the signal detected from LINE-1 transcripts derives exclusively from maternally supplied transcripts originally deposited into the oocyte cytoplasm.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Figure S5</label>
<caption>
<p>LINE-1 enrichment in intergenic regions at early 2-cell is confirmed by three independent analyses. <bold>(A, B)</bold> The same analysis performed to generate panels A and B of <xref ref-type="fig" rid="F3">Figure 3</xref> was repeated by performing an independent analysis by mapping the reads to the reference genome by using bwa. <bold>(C, D)</bold> Same as in A and B, but reads were mapped to the reference genome by bowtie2. <bold>(E, F)</bold> Same as in A and B, but reads were mapped to the reference genome by bowtie1. This was done in order to rule out possible biases introduced by the random assignment of non-uniquely mapping reads.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Figure S6</label>
<caption>
<p>Number of 2,116 LINE-1s of interest overlapping ATAC-seq peaks. The number of the 2,116 LINE-1s of interest (young LINE-1s with a monomer, overlapping early 2-cell intergenic ATAC-peaks) overlapping ATAC-seq peaks in each analysed stage are reported on x-axis.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Figure S7</label>
<caption>
<p>Enrichment of Pol-II in the 2,116 young LINE-1s with a monomer overlapping early 2-cell intergenic ATAC-seq peaks. <bold>(A)</bold> Pol-II enrichment in the monomers of the LINE-1s of interest. Blue bars represent the number of young LINE-1s early 2-cell intergenic ATAC-positive containing a monomer which overlap Pol-II binding sites (&#x2b;/&#x2212; 100&#x00a0;nt). Grey bars represent the same, but the overlap was performed between the coordinates of the monomer sequences of the LINE-1s of interest and shuffled Pol-II coordinates (1,000 times). In shuffled bars, mean &#x2b;/&#x2212; sem among the 1,000 randomisations is reported <bold>(B)</bold> Same as in <bold>(A)</bold>, but 3&#x2019; end coordinates (TTS &#x2b;/&#x2212; 100&#x00a0;nt), instead of monomer, were considered. <bold>(C)</bold> Upset plot representing the number of monomers of the 2,116 LINE-1s of interest that are bound by Pol-II in early 2-cell, late 2-cell and 8-cell stages. &#x201c;Set size&#x201d; indicates the total number of monomers of the LINE-1s of interest bound by Pol-II (the same as indicated by blue bars in panel A). Vertical bars indicate the number of monomers commonly or individually bound by Pol-II in each of the three stages of interest. (&#x2a;&#x2a;&#x2a;<italic>p</italic> &#x003c; 0.001, z-statistics, in both A and B).</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Table S1</label>
<caption>
<p>DE gene analysis results. Beside statistics from the edgeR analysis, the normalized expression levels (TMM) are reported. In addition, each gene was marked as Macfarlan positive or negative, ERVL positive or negative and the localization inside or outside a gene cluster is also reported.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Table S2</label>
<caption>
<p>TEs enriched nearby the TSS (&#x2b;/&#x2212;100&#x00a0;nt) of the 861 ZGA minor wave genes.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Table S3</label>
<caption>
<p>Genomic coordinates of the gene clusters as identified by ClusterScan.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Table S4</label>
<caption>
<p>DE results, TE loci analysis (SQuIRE).</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Table S5</label>
<caption>
<p>DE results, TE consensus analysis (TEspeX).</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Table S6</label>
<caption>
<p>Genomic coordinates of the significantly identified ATAC-seq peaks.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Table S7</label>
<caption>
<p>Genomic coordinates of the 3,731 LINE-1s overlapping intergenic early 2-cell ATAC-seq peaks.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>Supplementary Table S8</label>
<caption>
<p>Genomic coordinates of the significantly identified Stacc-seq Pol-II binding sites.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image3.TIFF" id="SM1" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image1.TIFF" id="SM2" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet4.CSV" id="SM3" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet6.CSV" id="SM4" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet3.CSV" id="SM5" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image5.TIFF" id="SM6" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet8.CSV" id="SM7" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet7.CSV" id="SM8" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image6.TIFF" id="SM9" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet5.CSV" id="SM10" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image2.TIFF" id="SM11" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image4.TIFF" id="SM12" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.CSV" id="SM13" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image7.TIFF" id="SM14" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet2.CSV" id="SM15" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Alexa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rahnenfuhrer</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <source>topGO: Enrichment analysis for gene ontology</source>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Anders</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pyl</surname>
<given-names>P. T.</given-names>
</name>
<name>
<surname>Huber</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>HTSeq-a Python framework to work with high-throughput sequencing data</article-title>. <source>Bioinformatics</source> <volume>31</volume>, <fpage>166</fpage>&#x2013;<lpage>169</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu638</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Andrews</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2010</year>). <source>FastQC: A quality control tool for high throughput sequence data</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/">http://www.bioinformatics.babraham.ac.uk/projects/fastqc/</ext-link>
</comment>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ansaloni</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Gualandi</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Esposito</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gustincich</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sanges</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>TEspeX: Consensus-specific quantification of transposable element expression preventing biases from exonized fragments</article-title>. <source>Bioinformatics</source> <volume>38</volume>, <fpage>4430</fpage>&#x2013;<lpage>4433</lpage>. <comment>btac526</comment>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btac526</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ansaloni</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Scarpato</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Di Schiavi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Gustincich</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sanges</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Exploratory analysis of transposable elements expression in the <italic>C. elegans</italic> early embryo</article-title>. <source>BMC Bioinforma.</source> <volume>20</volume>, <fpage>484</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-3088-7</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Asami</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lam</surname>
<given-names>B. Y. H.</given-names>
</name>
<name>
<surname>Hoffmann</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yoshida</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>A program of successive gene expression in mouse one-cell embryos</article-title>. <source>Cell Rep.</source> <volume>42</volume>, <fpage>112023</fpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2023.112023</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Athanikar</surname>
<given-names>J. N.</given-names>
</name>
<name>
<surname>Badge</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Moran</surname>
<given-names>J. V.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>A YY1-binding site is required for accurate human LINE-1 transcription initiation</article-title>. <source>Nucleic Acids Res.</source> <volume>32</volume>, <fpage>3846</fpage>&#x2013;<lpage>3855</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkh698</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bachvarova</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>1988</year>). <article-title>Small B2 RNAs in mouse oocytes, embryos, and somatic tissues</article-title>. <source>Dev. Biol.</source> <volume>130</volume>, <fpage>513</fpage>&#x2013;<lpage>523</lpage>. <pub-id pub-id-type="doi">10.1016/0012-1606(88)90346-6</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kojima</surname>
<given-names>K. K.</given-names>
</name>
<name>
<surname>Kohany</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Repbase Update, a database of repetitive elements in eukaryotic genomes</article-title>. <source>Mob. DNA</source> <volume>6</volume>, <fpage>11</fpage>. <pub-id pub-id-type="doi">10.1186/s13100-015-0041-9</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bolger</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Lohse</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Usadel</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Trimmomatic: A flexible trimmer for illumina sequence data</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>2114</fpage>&#x2013;<lpage>2120</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu170</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Conway</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Lex</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gehlenborg</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>UpSetR: an R package for the visualization of intersecting sets and their properties</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>2938</fpage>&#x2013;<lpage>2940</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx364</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dale</surname>
<given-names>R. K.</given-names>
</name>
<name>
<surname>Pedersen</surname>
<given-names>B. S.</given-names>
</name>
<name>
<surname>Quinlan</surname>
<given-names>A. R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Pybedtools: A flexible Python library for manipulating genomic datasets and annotations</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>3423</fpage>&#x2013;<lpage>3424</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr539</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Iaco</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Planet</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Coluccio</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Verp</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Duc</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Trono</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>DUX-family transcription factors regulate zygotic genome activation in placental mammals</article-title>. <source>Nat. Genet.</source> <volume>49</volume>, <fpage>941</fpage>&#x2013;<lpage>945</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3858</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dobin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Davis</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Schlesinger</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Drenkow</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zaleski</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jha</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Star: Ultrafast universal RNA-seq aligner</article-title>. <source>Bioinformatics</source> <volume>29</volume>, <fpage>15</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bts635</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eckersley-Maslin</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Alda-Catalinas</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Reik</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Dynamics of the epigenetic landscape during the maternal-to-zygotic transition</article-title>. <source>Nat. Rev. Mol. Cell Biol.</source> <volume>19</volume>, <fpage>436</fpage>&#x2013;<lpage>450</lpage>. <pub-id pub-id-type="doi">10.1038/s41580-018-0008-z</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Glover-Cutter</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Espinosa</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bentley</surname>
<given-names>D. L.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>RNA polymerase II pauses and associates with pre-mRNA processing factors at both ends of genes</article-title>. <source>Nat. Struct. Mol. Biol.</source> <volume>15</volume>, <fpage>71</fpage>&#x2013;<lpage>78</lpage>. <pub-id pub-id-type="doi">10.1038/nsmb1352</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guallar</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>P&#xe9;rez-Palacios</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Climent</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mart&#xed;nez-Abad&#xed;a</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Larraga</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez-Juan</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Expression of endogenous retroviruses is negatively regulated by the pluripotency marker Rex1/Zfp42</article-title>. <source>Nucleic Acids Res.</source> <volume>40</volume>, <fpage>8993</fpage>&#x2013;<lpage>9007</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks686</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hadzhiev</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wheatley</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cooper</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ansaloni</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Whalley</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>The miR-430 locus with extreme promoter density forms a transcription body during the minor wave of zygotic genome activation</article-title>. <source>Dev. Cell</source> <volume>58</volume>, <fpage>155</fpage>&#x2013;<lpage>170.e8</lpage>. <pub-id pub-id-type="doi">10.1016/j.devcel.2022.12.007</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Harris</surname>
<given-names>C. R.</given-names>
</name>
<name>
<surname>Millman</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>van der Walt</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Gommers</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Virtanen</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cournapeau</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Array programming with NumPy</article-title>. <source>Nature</source> <volume>585</volume>, <fpage>357</fpage>&#x2013;<lpage>362</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2649-2</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heinz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Benner</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Spann</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Bertolino</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y. C.</given-names>
</name>
<name>
<surname>Laslo</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities</article-title>. <source>Mol. Cell</source> <volume>38</volume>, <fpage>576</fpage>&#x2013;<lpage>589</lpage>. <pub-id pub-id-type="doi">10.1016/j.molcel.2010.05.004</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hendrickson</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Dor&#xe1;is</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Grow</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Whiddon</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Lim</surname>
<given-names>J.-W.</given-names>
</name>
<name>
<surname>Wike</surname>
<given-names>C. L.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Conserved roles of mouse DUX and human DUX4 in activating cleavage-stage genes and MERVL/HERVL retrotransposons</article-title>. <source>Nat. Genet.</source> <volume>49</volume>, <fpage>925</fpage>&#x2013;<lpage>934</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3844</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Israel</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ernst</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Psathaki</surname>
<given-names>O. E.</given-names>
</name>
<name>
<surname>Drexler</surname>
<given-names>H. C. A.</given-names>
</name>
<name>
<surname>Casser</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>An integrated genome-wide multi-omics analysis of gene expression dynamics in the preimplantation mouse embryo</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>13356</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-019-49817-3</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jachowicz</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Bing</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Pontabry</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bo&#x161;kovi&#x107;</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rando</surname>
<given-names>O. J.</given-names>
</name>
<name>
<surname>Torres-Padilla</surname>
<given-names>M.-E.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>LINE-1 activation after fertilization regulates global chromatin accessibility in the early mouse embryo</article-title>. <source>Nat. Genet.</source> <volume>49</volume>, <fpage>1502</fpage>&#x2013;<lpage>1510</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3945</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jukam</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Shariati</surname>
<given-names>S. A. M.</given-names>
</name>
<name>
<surname>Skotheim</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Zygotic genome activation in vertebrates</article-title>. <source>Dev. Cell</source> <volume>42</volume>, <fpage>316</fpage>&#x2013;<lpage>332</lpage>. <pub-id pub-id-type="doi">10.1016/j.devcel.2017.07.026</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Krassowski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Arts</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lagger</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Max</surname>
</name>
</person-group> (<year>2022</year>). <source>krassowski/complex-upset: v1.3.5</source>. <pub-id pub-id-type="doi">10.5281/ZENODO.3700590</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langmead</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Salzberg</surname>
<given-names>S. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Fast gapped-read alignment with Bowtie 2</article-title>. <source>Nat. Methods</source> <volume>9</volume>, <fpage>357</fpage>&#x2013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lawrence</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Huber</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Pag&#xe8;s</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Aboyoun</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Carlson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gentleman</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Software for computing and annotating genomic ranges</article-title>. <source>PLoS Comput. Biol.</source> <volume>9</volume>, <fpage>e1003118</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1003118</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Bonneau</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Giraldez</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Zygotic genome activation during the maternal-to-zygotic transition</article-title>. <source>Annu. Rev. Cell Dev. Biol.</source> <volume>30</volume>, <fpage>581</fpage>&#x2013;<lpage>613</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-cellbio-100913-013027</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lex</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gehlenborg</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Strobelt</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Vuillemot</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pfister</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>UpSet: Visualization of intersecting sets</article-title>. <source>IEEE Trans. Vis. Comput. Graph.</source> <volume>20</volume>, <fpage>1983</fpage>&#x2013;<lpage>1992</lpage>. <pub-id pub-id-type="doi">10.1109/TVCG.2014.2346248</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Handsaker</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wysoker</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fennell</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ruan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Homer</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>The sequence alignment/map format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume>, <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>The landscape of RNA Pol II binding reveals a stepwise transition during ZGA</article-title>. <source>Nature</source> <volume>587</volume>, <fpage>139</fpage>&#x2013;<lpage>144</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2847-y</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Macfarlan</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Gifford</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Driscoll</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lettieri</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Rowe</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Bonanomi</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Embryonic stem cell potency fluctuates with endogenous retrovirus activity</article-title>. <source>Nature</source> <volume>487</volume>, <fpage>57</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.1038/nature11244</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Malki</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>van der Heijden</surname>
<given-names>G. W.</given-names>
</name>
<name>
<surname>O&#x2019;Donnell</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Bortvin</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A role for retrotransposon LINE-1 in fetal oocyte attrition in mice</article-title>. <source>Dev. Cell</source> <volume>29</volume>, <fpage>521</fpage>&#x2013;<lpage>533</lpage>. <pub-id pub-id-type="doi">10.1016/j.devcel.2014.04.027</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McLeay</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Bailey</surname>
<given-names>T. L.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Motif enrichment analysis: A unified framework and an evaluation on ChIP data</article-title>. <source>BMC Bioinforma.</source> <volume>11</volume>, <fpage>165</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-11-165</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peaston</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Evsikov</surname>
<given-names>A. V.</given-names>
</name>
<name>
<surname>Graber</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>de Vries</surname>
<given-names>W. N.</given-names>
</name>
<name>
<surname>Holbrook</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Solter</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2004</year>). <article-title>Retrotransposons regulate host genes in mouse oocytes and preimplantation embryos</article-title>. <source>Dev. Cell</source> <volume>7</volume>, <fpage>597</fpage>&#x2013;<lpage>606</lpage>. <pub-id pub-id-type="doi">10.1016/j.devcel.2004.09.004</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Percharde</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C.-J.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Peixoto</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Bulut-Karslioglu</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>A LINE1-nucleolin partnership regulates early development and ESC identity</article-title>. <source>Cell</source> <volume>174</volume>, <fpage>391</fpage>&#x2013;<lpage>405</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2018.05.043</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quinlan</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Hall</surname>
<given-names>I. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>BEDTools: A flexible suite of utilities for comparing genomic features</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>841</fpage>&#x2013;<lpage>842</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq033</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="book">
<collab>R Core Team</collab> (<year>2018</year>). <source>R: A language and environment for statistical computing</source>. <publisher-loc>Vienna, Austria</publisher-loc>: <publisher-name>R Foundation for Statistical Computing</publisher-name>.</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ram&#xed;rez</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ryan</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Gr&#xfc;ning</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bhardwaj</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Kilpert</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Richter</surname>
<given-names>A. S.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>deepTools2: a next generation web server for deep-sequencing data analysis</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume>, <fpage>W160</fpage>&#x2013;<lpage>W165</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkw257</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Robinson</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>McCarthy</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Smyth</surname>
<given-names>G. K.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>edgeR: a Bioconductor package for differential expression analysis of digital gene expression data</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>139</fpage>&#x2013;<lpage>140</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp616</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rossum</surname>
<given-names>G. V.</given-names>
</name>
<name>
<surname>Drake</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2001</year>). <source>Pyhton reference manual</source>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schlesinger</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G. Z.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Goff</surname>
<given-names>S. P.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Proviral silencing in embryonic cells is regulated by Yin Yang 1</article-title>. <source>Cell Rep.</source> <volume>4</volume>, <fpage>50</fpage>&#x2013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1016/j.celrep.2013.06.003</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schulz</surname>
<given-names>K. N.</given-names>
</name>
<name>
<surname>Harrison</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Mechanisms regulating zygotic genome activation</article-title>. <source>Nat. Rev. Genet.</source> <volume>20</volume>, <fpage>221</fpage>&#x2013;<lpage>234</lpage>. <pub-id pub-id-type="doi">10.1038/s41576-018-0087-x</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sookdeo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hepp</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>McClure</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Boissinot</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Revisiting the evolution of mouse LINE-1 in the genomic era</article-title>. <source>Mob. DNA</source> <volume>4</volume>, <fpage>3</fpage>. <pub-id pub-id-type="doi">10.1186/1759-8753-4-3</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tadros</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Lipshitz</surname>
<given-names>H. D.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>The maternal-to-zygotic transition: A play in two acts</article-title>. <source>Development</source> <volume>136</volume>, <fpage>3033</fpage>&#x2013;<lpage>3042</lpage>. <pub-id pub-id-type="doi">10.1242/dev.033183</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taylor</surname>
<given-names>K. D.</given-names>
</name>
<name>
<surname>Pik&#xf3;</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>Patterns of mRNA prevalence and expression of B1 and B2 transcripts in early mouse embryos</article-title>. <source>Development</source> <volume>101</volume>, <fpage>877</fpage>&#x2013;<lpage>892</lpage>. <pub-id pub-id-type="doi">10.1242/dev.101.4.877</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Torres-Padilla</surname>
<given-names>M.-E.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>On transposons and totipotency</article-title>. <source>Phil. Trans. R. Soc. B</source> <volume>375</volume>, <fpage>20190339</fpage>. <pub-id pub-id-type="doi">10.1098/rstb.2019.0339</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<collab>SciPy 1.0 Contributors</collab>
<person-group person-group-type="author">
<name>
<surname>Virtanen</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Gommers</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Oliphant</surname>
<given-names>T. E.</given-names>
</name>
<name>
<surname>Haberland</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Reddy</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>SciPy 1.0: Fundamental algorithms for scientific computing in Python</article-title>. <source>Nat. Methods</source> <volume>17</volume>, <fpage>261</fpage>&#x2013;<lpage>272</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0686-2</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Volpe</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Miralto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gustincich</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sanges</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>ClusterScan: Simple and generalistic identification of genomic clusters</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>3921</fpage>&#x2013;<lpage>3923</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty486</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Paulson</surname>
<given-names>E. E.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ross</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Schultz</surname>
<given-names>R. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Paternal genome rescues mouse preimplantation embryo development in the absence of maternally-recruited EZH2 activity</article-title>. <source>Epigenetics</source> <volume>14</volume>, <fpage>94</fpage>&#x2013;<lpage>108</lpage>. <pub-id pub-id-type="doi">10.1080/15592294.2019.1570771</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<collab>Mouse Genome Sequencing Consortium</collab>
<person-group person-group-type="author">
<name>
<surname>Waterston</surname>
<given-names>R. H.</given-names>
</name>
<name>
<surname>Lindblad-Toh</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Birney</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Rogers</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Abril</surname>
<given-names>J. F.</given-names>
</name>
<etal/>
</person-group> (<year>2002</year>). <article-title>Initial sequencing and comparative analysis of the mouse genome</article-title>. <source>Nature</source> <volume>420</volume>, <fpage>520</fpage>&#x2013;<lpage>562</lpage>. <pub-id pub-id-type="doi">10.1038/nature01262</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wickham</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2016</year>). <source>ggplot2: Elegant graphics for data analysis</source>.</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>The landscape of accessible chromatin in mammalian preimplantation embryos</article-title>. <source>Nature</source> <volume>534</volume>, <fpage>652</fpage>&#x2013;<lpage>657</lpage>. <pub-id pub-id-type="doi">10.1038/nature18606</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>W. R.</given-names>
</name>
<name>
<surname>Ardeljan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Pacyna</surname>
<given-names>C. N.</given-names>
</name>
<name>
<surname>Payer</surname>
<given-names>L. M.</given-names>
</name>
<name>
<surname>Burns</surname>
<given-names>K. H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>SQuIRE reveals locus-specific regulation of interspersed repeat expression</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume>, <fpage>e27</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gky1301</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.-G.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Q.-Y.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>ChIPseeker: An R/bioconductor package for ChIP peak annotation, comparison and visualization</article-title>. <source>Bioinformatics</source> <volume>31</volume>, <fpage>2382</fpage>&#x2013;<lpage>2383</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv145</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zerbino</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Achuthan</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Akanni</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Amode</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Barrell</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Bhai</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Ensembl 2018</article-title>. <source>Nucleic Acids Res.</source> <volume>46</volume>, <fpage>D754</fpage>&#x2013;<lpage>D761</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx1098</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>A. D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Subtype classification and functional annotation of L1Md retrotransposon promoters</article-title>. <source>Mob. DNA</source> <volume>10</volume>, <fpage>14</fpage>. <pub-id pub-id-type="doi">10.1186/s13100-019-0156-5</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>