<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1265808</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2023.1265808</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>QUFIND: tool for comparative prediction and mining of G4 quadruplexes overlapping with CpG islands</article-title>
<alt-title alt-title-type="left-running-head">Kaur et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2023.1265808">10.3389/fgene.2023.1265808</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Kaur</surname>
<given-names>Baljeet</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sharma</surname>
<given-names>Priya</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Arora</surname>
<given-names>Pooja</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1004382/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Sood</surname>
<given-names>Vikas</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/232334/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Computer Science</institution>, <institution>Hansraj College</institution>, <institution>University of Delhi</institution>, <addr-line>Malka Ganj</addr-line>, <country>India</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Biochemistry</institution>, <institution>Jamia Hamdard</institution>, <addr-line>Delhi</addr-line>, <country>India</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Zoology</institution>, <institution>Hansraj College</institution>, <institution>University of Delhi</institution>, <addr-line>Malka Ganj</addr-line>, <country>India</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1887963/overview">Federico Zambelli</ext-link>, University of Milan, Italy</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/555378/overview">Anja Kovanda</ext-link>, University Medical Centre Ljubljana, Slovenia</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2321123/overview">Matja Zalar</ext-link>, University of Maribor, Slovenia</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Baljeet Kaur, <email>baljeetkaur26@hotmail.com</email>; Vikas Sood, <email>vikas1101@gmail.com</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>10</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1265808</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>08</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>09</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Kaur, Sharma, Arora and Sood.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Kaur, Sharma, Arora and Sood</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>G-quadruplexes (G4s) are secondary structures in DNA that have been shown to be involved in gene regulation. They play a vital role in the cellular processes and several pathogens including bacteria, fungi, and viruses have also been shown to possess G4s that help them in their pathogenesis. Additionally, cross-talk among the CpG islands and G4s has been shown to influence biological processes. The virus-encoded G4s are affected by the mutational landscape leading to the formation/deletion of these G4s. Therefore, understanding and predicting these multivariate effects on traditional and non-traditional quadruplexes forms an important area of research, that is, yet to be investigated. We have designed a user-friendly webserver QUFIND (<ext-link ext-link-type="uri" xlink:href="http://soodlab.com/qufinder/">http://soodlab.com/qufinder/</ext-link>) that can predict traditional as well as non-traditional quadruplexes in a given sequence. QUFIND is connected with ENSEMBL and NCBI so that the sequences can be fetched in a real-time manner. The algorithm is designed in such a way that the user is provided with multiple options to customize the base (A, T, G, or C), size of the stem (2&#x2013;5), loop length (1&#x2013;30), number of bulges (1&#x2013;5) as well as the number of mismatches (0&#x2013;2) enabling the identification of any of the secondary structure as per their interest. QUFIND is designed to predict both CpG islands as well as G4s in a given sequence. Since G4s are very short as compared to the CpG islands, hence, QUFIND can also predict the overlapping G4s within CpG islands. Therefore, the user has the flexibility to identify either overlapping or non-overlapping G4s along with the CpG islands. Additionally, one section of QUFIND is dedicated to comparing the G4s in two viral sequences. The visualization is designed in such a manner that the user is able to see the unique quadruplexes in both the input sequences. The efficiency of QUFIND is calculated on G4s obtained from G4 high throughput sequencing data (<italic>n</italic> &#x3d; 1000) or experimentally validated G4s (<italic>n</italic> &#x3d; 329). Our results revealed that QUFIND is able to predict G4-quadruplexes obtained from G4-sequencing data with 90.06% prediction accuracy whereas experimentally validated quadruplexes were predicted with 97.26% prediction accuracy.</p>
</abstract>
<kwd-group>
<kwd>G4 quadruplexes</kwd>
<kwd>CpG islands</kwd>
<kwd>guanine tetrads</kwd>
<kwd>G4 quadruplex and viruses</kwd>
<kwd>G4 quadruplex prediction</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Genomics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>The most widely accepted DNA structure is the classical B-DNA form, which is a right-handed double helix in nature and contains the hydrogen bonds between the nucleobases as described by Watson and Crick in 1953 (<xref ref-type="bibr" rid="B57">Watson and Crick, 1953</xref>). Yet, it is evident that DNA is structurally dynamic and can also adopt alternative secondary structures like guanine-rich tetrads (<xref ref-type="bibr" rid="B16">Gellert et al., 1962</xref>) and non-guanine rich tetrads (<xref ref-type="bibr" rid="B30">Liu et al., 2018</xref>). Guanine-rich DNA strands are capable of folding into four-stranded helical structures, called G-quadruplexes (G4s). The four guanine residues present in the core of a G4 are bonded through the Hoogsteen hydrogen bonds and are stabilized by monovalent cations like K<sup>&#x2b;</sup> and Na<sup>&#x2b;</sup> to attain a planar form (<xref ref-type="bibr" rid="B16">Gellert et al., 1962</xref>; <xref ref-type="bibr" rid="B43">Sen and Gilbert, 1990</xref>). A minimum of three consecutive guanine residues is stacked four times successively (called G-tracts or G-runs) with the intervening sequences extruded as loops and taking the shape of a G4 scaffold (<xref ref-type="bibr" rid="B4">Bochman et al., 2012</xref>). It is believed that longer perfect G-tracts increase the stability of the G4 structure while unusual G-tracts tend to decrease the stability of the structure (<xref ref-type="bibr" rid="B21">Huppert and Balasubramanian, 2005</xref>). On the contrary, new models have predicted the robustness of the unusual G-tracts (<xref ref-type="bibr" rid="B52">Varizhuk et al., 2017</xref>; <xref ref-type="bibr" rid="B11">Doluca, 2019</xref>). Furthermore, in the early models of G4 structures, it was assumed that loop lengths up to 7 bases could form stable quadruplexes (<xref ref-type="bibr" rid="B21">Huppert and Balasubramanian, 2005</xref>). However, it has been since then observed that the quadruplexes with several long loop lengths (up to 30&#xa0;bp) could also exist (<xref ref-type="bibr" rid="B17">Guedin et al., 2010</xref>). It has been shown that RNA can also adopt this type of non-canonical structure under physiological conditions (<xref ref-type="bibr" rid="B10">Davis, 2004</xref>). G4s can be unimolecular or multi-molecular and can attain a variety of topologies arising from different combinations of strand direction, length, and loop composition and both intramolecular and intermolecular G4s can also be observed (<xref ref-type="bibr" rid="B45">Spiegel et al., 2020</xref>). During the last few years, there has been a lot of growing interest in the scientific community in exploring these G4 structures and their regulatory roles among biological processes (<xref ref-type="bibr" rid="B31">Maizels, 2006</xref>; <xref ref-type="bibr" rid="B40">Rhodes and Lipps, 2015</xref>; <xref ref-type="bibr" rid="B19">Hansel-Hertsch et al., 2017</xref>; <xref ref-type="bibr" rid="B45">Spiegel et al., 2020</xref>). They are widely distributed in prokaryotes, eukaryotes, and viruses and play key roles in regulating several physiological and pathological processes. Some of the biological processes known to be regulated by G4 structures include DNA replication (<xref ref-type="bibr" rid="B50">Valton and Prioleau, 2016</xref>), damage and repair system (<xref ref-type="bibr" rid="B13">Fleming et al., 2017</xref>), genomic instability (<xref ref-type="bibr" rid="B56">Wang et al., 2019</xref>), gene expression (<xref ref-type="bibr" rid="B6">Cave and Willis, 2022</xref>), chromatin rearrangement (<xref ref-type="bibr" rid="B39">Reina and Cavalieri, 2020</xref>), and viral latency (<xref ref-type="bibr" rid="B41">Ruggiero and Richter, 2018</xref>). Their role in diverse biological processes renders them interesting potential therapeutic targets (<xref ref-type="bibr" rid="B41">Ruggiero and Richter, 2018</xref>; <xref ref-type="bibr" rid="B5">Carvalho et al., 2020</xref>). Different studies in various organisms have revealed that G4 secondary structures are located in a non-random manner within genomes and tend to cluster in particular/functional genomic regions like telomeres (<xref ref-type="bibr" rid="B46">Sundquist and Klug, 1989</xref>), promoters (<xref ref-type="bibr" rid="B44">Siddiqui-Jain et al., 2002</xref>; <xref ref-type="bibr" rid="B9">Dai et al., 2006</xref>; <xref ref-type="bibr" rid="B12">Fernando et al., 2006</xref>; <xref ref-type="bibr" rid="B59">Xu and Sugiyama, 2006</xref>), and untranslated regions (UTRs) of mRNA (<xref ref-type="bibr" rid="B22">Huppert et al., 2008</xref>). The role of G4s is associated with several diseases including cancer (<xref ref-type="bibr" rid="B53">Varshney et al., 2020</xref>), neurogenerative disorders (<xref ref-type="bibr" rid="B55">Wang et al., 2021</xref>; <xref ref-type="bibr" rid="B54">Vijay Kumar et al., 2023</xref>) and rare genetic disorders including fragile X syndrome (<xref ref-type="bibr" rid="B1">Asamitsu et al., 2021</xref>).</p>
<p>Initial <italic>in silico</italic> approaches for the prediction of putative G4 structures on a genome scale were based on the experiments conducted biophysically (<xref ref-type="bibr" rid="B21">Huppert and Balasubramanian, 2005</xref>; <xref ref-type="bibr" rid="B48">Todd et al., 2005</xref>). The ongoing era of next-generation sequencing has made whole genome sequencing relatively easy and affordable thereby creating a wealth of genomic data which can be used to obtain a bird&#x2019;s eye view of the cellular processes. Using techniques like rG4-seq and G4-seq, scientists have developed the transcriptome-wide (<xref ref-type="bibr" rid="B26">Kwok et al., 2016</xref>), genome-wide (<xref ref-type="bibr" rid="B7">Chambers et al., 2015</xref>) experimental map of G4s in humans, and recently, the more exhaustive whole-genome landscape of G4s in 12 species (<xref ref-type="bibr" rid="B33">Marsico et al., 2019</xref>). Several improved computational algorithms have been developed by employing the G4-seq dataset to train a machine learning model for the characterization of G4 structures on a genomic level (<xref ref-type="bibr" rid="B14">Garant et al., 2017</xref>; <xref ref-type="bibr" rid="B20">Hon et al., 2017</xref>; <xref ref-type="bibr" rid="B42">Sahakyan et al., 2017</xref>). Notably, the vast majority (80%&#x2013;90%) of the G4 structures predicted by these improved computational approaches were confirmed to exist in genomes by the G4-seq approach (<xref ref-type="bibr" rid="B33">Marsico et al., 2019</xref>). A powerful tool called pqsfinder (<xref ref-type="bibr" rid="B20">Hon et al., 2017</xref>) provides a flexible framework for its users and allows them to define the custom criteria for scoring and matching. It allows the user to input up to three imperfections (mismatches, bulges in G-runs, and/or long loops &#x3e;9&#xa0;nt) in a single sequence of DNA or RNA and has the advantage of assigning a score to each predicted G4 sequence. The scoring scheme emphasizes the stability of the predicted structure because it gives a bonus score to the perfect G-tetrad stacking and a penalty score in case of mismatch and bulges. Quadron (<xref ref-type="bibr" rid="B42">Sahakyan et al., 2017</xref>) is a machine learning (ML) model based on a tree gradient boosting machine and trained on the G4-seq data for the human genome, which allows the user to predict G4 structures in DNA as well as RNA sequences. G4RNA screener (<xref ref-type="bibr" rid="B14">Garant et al., 2017</xref>) applies an ML model based on an artificial neural network and trained on experimentally validated G4s from sequences deposited in the G4RNA database. It allows the user to predict G4s in RNA sequences only and incorporates the cG/cC and G4 hunter algorithms for better or comparable outcomes. Additionally, ImGQfinder (<xref ref-type="bibr" rid="B51">Varizhuk et al., 2014</xref>) is another tool where a user can predict G-quadruplexes. This tool allows one mismatch or bulge in G-tract. QPARSE (<xref ref-type="bibr" rid="B3">Berselli et al., 2020</xref>) is a graph-based search algorithm where users can look for monomeric and multimeric quadruplex forming sequences and G4s with long, hairpin loops. Users are allowed to enter the query sequence of a maximum 10,000&#xa0;bp length or upload a fasta file of a maximum 15&#xa0;Kb size. G4-iM Grinder (<xref ref-type="bibr" rid="B2">Belmonte-Reche and Morales, 2020</xref>) looks for G4s and i-Motifs within a given DNA or RNA sequence. It has three distinct methods: the G4 search engine with 13 customizable functions (for example, showing G4 on both strands, loop sequence, size, <italic>etc.</italic>), G4 qualification functions, and quantification functions. It incorporates cG/cC and G4 hunter algorithms to evaluate better results. The continuous progress in literature providing evidence on the <italic>in-vitro</italic> existence of G4 structures containing more than four G-tracts (<xref ref-type="bibr" rid="B37">Phan et al., 2005</xref>; <xref ref-type="bibr" rid="B36">Omaga et al., 2018</xref>) and G4 structures containing all the possible tetrads, A:T:A:T tetrads and bulged nucleotides in one single structure (<xref ref-type="bibr" rid="B30">Liu et al., 2018</xref>) still remained to be incorporated into the search algorithm.</p>
<p>Anotpther interesting role of G4 structures is to influence the methylation at CpG islands (CGIs), which are guanine-cytosine-rich regions and are usually hypomethylated. The CGIs are widespread at the promoters of housekeeping, tissue-specific, and developmental genes and co-localize with G4s in these actively transcribed regions for gene regulation (<xref ref-type="bibr" rid="B23">Jara-Espejo and Line, 2020</xref>). Recently, it was proposed that the G4 structures protect the CGIs from methylation by sequestering and inhibiting DNA methyltransferases and hold an important place in epigenetic control mechanisms (<xref ref-type="bibr" rid="B8">Cree et al., 2016</xref>; <xref ref-type="bibr" rid="B32">Mao et al., 2018</xref>). As both G4s and CGIs are tightly associated with actively transcribed regions, their accurate identification in the genome is of great significance.</p>
<p>Several computational tools for the identification of CGI in a given DNA sequence are accessible to users nowadays. Three of the widely used conditions for CGI analysis are as follows 1) moving window should be of 200 nucleotides, 2) GC content higher than 50%, and 3) CpG O/E (Observed/Expected) ratio higher than 0.6 (<xref ref-type="bibr" rid="B15">Gardiner-Garden and Frommer, 1987</xref>). Some improved versions for CGI identification are also available which include the additional parameters (<xref ref-type="bibr" rid="B27">Larsen et al., 1992</xref>; <xref ref-type="bibr" rid="B38">Ponger and Mouchiroud, 2002</xref>; <xref ref-type="bibr" rid="B47">Takai and Jones, 2003</xref>; <xref ref-type="bibr" rid="B18">Hackenberg et al., 2006</xref>).</p>
<p>The presence of stable and conserved G4s in all known human viruses and their variants has been successfully presented by <xref ref-type="bibr" rid="B28">Lavezzo et al. (2018)</xref> but their analysis is mainly based on the reference genomes (RefSeq). Additionally, the analysis pipeline has failed to provide any criteria where user-defined sequences can be handled. It is evident that mutations in viruses have played a major role in evolution (<xref ref-type="bibr" rid="B34">Moelling and Broecker, 2019</xref>) enabling them to evade host immune responses efficiently (<xref ref-type="bibr" rid="B58">Xia et al., 2018</xref>). Several studies related to mutations in quadruplex-forming structures have shown that mutations in G4s may hinder normal cellular activities (<xref ref-type="bibr" rid="B24">Khristich and Mirkin, 2020</xref>). Therefore, understanding the potential effects of mutations on quadruplex-forming structures warrants further research.</p>
<p>All quadruplexes and CGIs search models have limitations despite the advancements in the field as none of the tools has been explicitly designed to detect and analyse all possible G4s. Keeping in mind that G4 forming sequences within a genome harbour CpG sites, we have developed a web-based server, QUFIND (QUadruplex FINDer), where users are allowed to predict either the G4 structures or CGIs and G4 structures simultaneously, in a wide range of organisms. This is the first-ever tool that allows users to find the G4 forming sequences within CGIs so that the user can analyse G4 secondary structures in context with the CGIs. The web server interfaces with ENSEMBL&#x2019;s REST API and NCBI API to mine CGIs and/or G4 secondary structures. It provides options to search the entire ENSEMBL and NCBI databases in order to retrieve the desired nucleotide sequence entries for analysis. The web server is organized in such a way that users are free to choose as many possible parameters by themselves. The web program is divided into three modules: QUFINDU which allows querying CGIs and/or G4 secondary structures in all the species for which the sequences are available in the ENSEMBL and NCBI database, QUFINDV which allows querying G4 secondary structures in viruses and their variants and lastly, QUFIND which allowed CGIs and/or G4 secondary structures to search in user-provided sequences. QUFINDV offers an interactive graphical representation of the G4 sequences in viruses and their variants for comparative studies on a single screen. The program is also designed to handle the analysis of non-guanine-based quadruplexes as well as the prediction of overlapping G4 structures among the CpG islands.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>Methodology</title>
<sec id="s2-1">
<title>Definition of model</title>
<p>The design of this tool is focused on the mining of G4 secondary structures contained within the CGIs. For this study, CpG regions were defined as the moving segment of DNA or RNA of 200&#xa0;bp constant length, GC content in that region should be greater than 50%, and the O/E (Observed/Expected) ratio should be higher than 0.6. Observed CpG is the number of CpG dinucleotides in the segment and expected CpG is calculated by multiplying the number of &#x201c;C&#x201d;s and the number of &#x201c;G&#x201d;s in the segment and then dividing the product by the length of the segment (<xref ref-type="bibr" rid="B15">Gardiner-Garden and Frommer, 1987</xref>). The typical G4 secondary structures are identified using the following configuration:<disp-formula id="equ1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mi mathvariant="normal">x</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="normal">y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mi mathvariant="normal">x</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="normal">y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mi mathvariant="normal">x</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="normal">y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="normal">G</mml:mi>
<mml:mi mathvariant="normal">x</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>In the motif represented above, G<sub>x</sub> is the continuous stretch of guanine (G) bases repeated &#x201c;x&#x201d; times [where x &#x2208; (<xref ref-type="bibr" rid="B16">Gellert et al., 1962</xref>; <xref ref-type="bibr" rid="B4">Bochman et al., 2012</xref>)], called G-tract also referred to as stem length and N<sub>1-y</sub> represents the loop which ranges from one to &#x201c;y&#x201d; and y &#x2208; (<xref ref-type="bibr" rid="B57">Watson and Crick, 1953</xref>). The G4 motif is composed of four G-tracts and three loops but some defects like bulges and mismatches also exist in G-tracts, and give rise to atypical G4 secondary structures. In these, continuous stacking of guanine bases is interrupted by non-guanine bases in the form of bulges (<xref ref-type="bibr" rid="B35">Mukundan and Phan, 2013</xref>), and substitution of a non-guanine base for one of the guanine bases in a G-tract can also occur (<xref ref-type="bibr" rid="B49">Toma&#x161;ko et al., 2009</xref>). The other type of mismatch is the vacancy of a guanine base in one of the G-tracts (<xref ref-type="bibr" rid="B29">Li et al., 2015</xref>). The users are provided with an option to search for these atypical G4 secondary structures. The G-tract configuration for atypical motifs changes to G<sub>x</sub>DG<sub>n-x</sub> (in case of bulged tracts) and G<sub>x</sub>DG<sub>n-x-1</sub> (in case of mismatched tracts) where 2 &#x2264; x &#x2264; n, &#x201c;n&#x201d; is the maximum length of the G-tract and &#x2018;D&#x2019; is the defect. In this section, users are required to select the type of defect and the number of defects. We have limited the number of bulges up to 5 and the number of mismatches up to 2. The strategy to mine typical and atypical G4 secondary structures is based upon a &#x201c;regular expression&#x201d; which is purely dependent upon the selection of parameters and search within a query sequence. The algorithm mines overlapping or non-overlapping CGIs/G4 secondary structures in a given sequence. Overlapping G4 secondary structures may contain multiple internal G4s but non-overlapping G4 secondary structures will not have coinciding G4 coordinates. The same algorithm is applied to search for other tetrads including T, A and C.</p>
</sec>
<sec id="s2-2">
<title>Architecture and features</title>
<p>QUFIND web server backend is written in Python programming language using Flask microframework and all visualizations are generated using Matplotlib library. The front end is written using HTML, CSS, JavaScript, and the jQuery library of JavaScript. The web server interfaces with ENSEMBL&#x2019;s REST API and NCBI&#x2019;s E-utilities to fetch a nucleotide sequence in FASTA format from the database and analyze it for the presence of quadruplexes in an overlapping or non-overlapping model. This keeps the server up to date with ENSEMBL and NCBI&#x2019;s latest release. The individual modules of the web server are described in the following subsections.</p>
</sec>
<sec id="s2-3">
<title>QUFINDU (QUadruplex FINDer for UserID)</title>
<p>This model allows users to mine G4 secondary structures in all species available in the two databases, ENSEMBL and NCBI. By default, the ENSEMBL is selected and the user can mine typical or atypical secondary structures by providing the ENSEMBL ID of the interested gene or sequence (<xref ref-type="fig" rid="F1">Figure 1A</xref>). The user can change the database for fetching the sequence. In the case of NCBI, the user can enter the accession number of a sequence. The server is set to search for G4s only by default but the users can opt to search for G4s along with CGIs alike QUFIND. The server can mine overlapping or non-overlapping CGIs/G4s (G4 secondary structures search occurs internally in case of CGIs) for both the searching options. The G4 secondary structure motif configuration parameters in this module are the same as described in QUFIND. Various options to predict secondary structures are represented in <xref ref-type="fig" rid="F1">Figure 1B</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Screenshot of example input. <bold>(A)</bold> The screenshot shows the input interface for the QUFINDU module. The user needs to select the database (1) (&#x201c;Ensembl&#x201d; is set by default) and then paste multiple Ensembl IDs of genes in the text box. In the case of NCBI, a single accession number of a sequence can be pasted in the provided box. The users can click on &#x201c;Yes&#x201d; if they want to search for CpG islands first (the CpG islands option is not selected by default) (2) and then select the search model to mine G4 motifs: Non-overlapping or overlapping (3). <bold>(B)</bold> The screenshot showing the different parameters for G4 motif configuration. The users can directly click on the &#x201c;Submit&#x201d; button with the default parameters or can change the parameters according to their interests. The different parameters include Nucleobase to search, minimum stem size, maximum stem size, minimum loop length, maximum loop length, and strand option. The user can also choose to search for defects in the G-tract or stem. Defects are of two types: bulges and mismatches. Both types of defects cannot be chosen simultaneously.</p>
</caption>
<graphic xlink:href="fgene-14-1265808-g001.tif"/>
</fig>
<p>On submission, a result page is displayed on which the user can click on any sequence ID to view its result in the form of a table and can download the sequence-specific result. The user can also click on the &#x201c;Show Plot&#x201d; button to view the image (in the form of a &#x201c;Lollipop&#x201d; chart) representing G4 secondary structures with their annotated length and position (i.e., length, position) (<xref ref-type="fig" rid="F2">Figure 2A</xref>). If the CpG islands option is selected then a result page is displayed on which the users can click on &#x201c;CpG Positive&#x201d; to view the table containing CGIs rich region positions, their GC content, and CpG ratio value and can download the CGIs-specific result. The position of G4 secondary structures that lie within CGIs can be found in the form of an image by clicking on the hyperlink given on CGIs positions (<xref ref-type="fig" rid="F2">Figure 2B</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Screenshots of example output. <bold>(A)</bold> The result page of QUFINDU/QUFIND module. The result page shows the gene or sequence IDs, clicking on which a table is shown for the selected strand (s) (i.e., &#x201c;Motif Positive&#x201d; or &#x201c;Motif Negative&#x201d; or both). The table contains all the information obtained for mined G4 motifs. The users can click on the &#x201c;show plot&#x201d; button to view the plot and a click on &#x201c;Download Result&#x201d; will download the sequence and strand-specific results. <bold>(B)</bold> The result page of QUFINDU or QUFIND module if the &#x201c;CpG islands&#x201d; option is selected. Every ID contains a sub-option &#x201c;CpG Positive&#x201d; or &#x201c;CpG Negative&#x201d; or both depending on the selection made by the user, clicking on which a table is shown containing detected regions of CpG islands, their GC content, and CpG ratio. In this case, the G4 motifs mining process occurs internally and hence the detected regions of CpG islands are provided with a hyperlink. Clicking on the hyperlink opens an image showing the start and end position of G4 motifs contained within that CpG island.</p>
</caption>
<graphic xlink:href="fgene-14-1265808-g002.tif"/>
</fig>
</sec>
<sec id="s2-4">
<title>QUFINDV (QUadruplex FINDer for viruses)</title>
<p>QUFINDV allow querying quadruplexes in viruses and their variants. The appearance of mutation in the genome of a virus leads to the generation or disruption of the quadruplexes. Hence, the detection of unique G-quadruplexes in the variants of concern should be explored. The users need to first align the two query sequences using any sequence alignment tool (e.g., Clustal W) and then trim the overhangs found in the respective sequence. The trimming step is performed to make the two sequences equal in length. After trimming, users are required to paste/upload the first query sequence in the two text boxes for comparison (<xref ref-type="fig" rid="F3">Figure 3A</xref>). The user then can choose the model of an algorithm for fetching the G4 secondary structures, i. e., either overlapping or non-overlapping. The motif configuration parameters in this module are the same as those used in the QUFIND (<xref ref-type="fig" rid="F3">Figure 3B</xref>). This module offers an interactive graphical representation of the G4s in viruses and their variants for comparative studies on a single screen.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Screenshot of QUFINDV module. <bold>(A)</bold> The figure shows the input interface for QUFINDV module. The user can paste or upload the first sequence in the box placed at left and second sequence in the box placed at right. These two sequences should be equal in length for comparison. Then, select the search model to mine G4 motifs: Non-overlapping or overlapping <bold>(B)</bold> the screenshot showing the different parameters for G4 motif configuration. The users can directly click on &#x2018;Submit&#x2019; button with the default parameters or can change the parameters according to their interest. The different parameters includes: Nucleobase to search, minimum stem size, maximum stem size, minimum loop length, maximum loop length and strand option. The user can also choose to search for defects in the G-tract or stem. Defects are of two types: bulges and mismatches. Both types of defect cannot be chosen simultaneously.</p>
</caption>
<graphic xlink:href="fgene-14-1265808-g003.tif"/>
</fig>
<p>On submitting a query, an image representing unique quadruplexes is displayed with its annotation (<xref ref-type="fig" rid="F4">Figure 4A</xref>) and a file containing information related to the detected quadruplexes can be easily downloaded (<xref ref-type="fig" rid="F4">Figure 4B</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Screenshot of QUFINDV output page. <bold>(A)</bold> A &#x201c;lollipop&#x201d; plot representing the unique quadruplexes with its annotated length and position (i.e., length, position). The stems in the &#x2b;<italic>y</italic>-axis represent the quadruplexes in the first viral sequence (NCBI Ref seq ID: NC_045512.2) and the stems in the&#x2013;y-axis represent the quadruplexes in the second viral sequence (NCBI ID: BS001249.1). <bold>(B)</bold> Detected unique quadruplexes information in the form of a table for each ID.</p>
</caption>
<graphic xlink:href="fgene-14-1265808-g004.tif"/>
</fig>
</sec>
<sec id="s2-5">
<title>QUFIND</title>
<p>It is the core search module of the web server, that is, responsible for performing G4 quadruplex search in custom sequences. It allows users to search for their query sequences by either uploading sequences containing files in FASTA format or pasting multiple FASTA formatted sequences. The users can either choose to run CGIs with G4s or G4s alone. The server can mine overlapping or non-overlapping CGIs/G4s (G4 secondary structure search occurs internally in case of CGIs) in both search options. The users have the flexibility to choose the nucleobase G, C, A, or T (G is used by default). The users can then define the minimum and maximum stem length of Gs required per G-tract, the minimum and maximum size of the loops, and the maximum number of bulges/mismatches allowed. The default value for the minimum and maximum stem length of Gs is set at 2 and 4 and the minimum and maximum size of the loops for the default condition is set at 1 and 7. The user can select up to 5 maximum bulges or up to 2 maximum mismatches. The presence of bulges and mismatches at the same time is not allowed.</p>
<p>Upon the submission of a query, a progress page is displayed and users can bookmark it to access the results of their submitted sequences later on. After the analysis is complete, a result page is displayed where users can find the information related to all the submitted sequences along with the visualization. By clicking on any sequence ID, its corresponding nucleic acid secondary structure can be found and its representative figure shows the position of the mined quadruplexes in the sequence. The users can also download results for all the sequences and/or only specific sequences in CSV format. In the CSV file, the sequence, and length of mined quadruplexes is given along with the start and end position.</p>
</sec>
<sec id="s2-6">
<title>Evaluation of QUFIND</title>
<p>The model was tested on 1000 positive sequences (<xref ref-type="sec" rid="s9">Supplementary Table S1</xref>) obtained from the high-throughput dataset in which quadruplexes can be formed (<xref ref-type="bibr" rid="B7">Chambers et al., 2015</xref>; <xref ref-type="bibr" rid="B25">Klimentova et al., 2020</xref>). Another dataset consisting of experimentally validated 329 sequences (<xref ref-type="sec" rid="s9">Supplementary Table S2</xref>) was prepared through literature mining. The validation of the model was performed by simply passing the G-quadruplex containing sequences which were obtained via 1) high throughput sequencing (<italic>n</italic> &#x3d; 1000) and 2) experimental validation (<italic>n</italic> &#x3d; 329) through the tool. The number of correct sequences is represented as the percentage of the total sequences and is presented as the prediction accuracy. The tool was able to predict G4 quadruplexes obtained from a high-throughput dataset with 90.06% prediction accuracy whereas experimentally validated quadruplexes were predicted with 97.26% prediction (<xref ref-type="fig" rid="F5">Figure 5</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Graphical representation of the performance of validation data. <bold>(A)</bold> Prediction accuracy for high throughput G4 sequences (907/1000 &#x3d; 90.70%) and <bold>(B)</bold> Prediction accuracy for experimentally validated G4 sequences (320/329 &#x3d; 97.26%).</p>
</caption>
<graphic xlink:href="fgene-14-1265808-g005.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="conclusion" id="s3">
<title>Conclusion</title>
<p>QUFIND is the first web server that allows users to find quadruplexes either without or within the CGIs. The tool provides an opportunity for the users to analyze G4s in the context of the CGIs. In addition, the server is able to mine G4s in viruses and their variants. This web server is connected with ENSEMBL&#x2019;s REST API and NCBI&#x2019;s e-utilities to get the latest gene models or genomic assembly of any organism to detect the presence of secondary structures in them. This server also provides an interactive graphical representation of the mined secondary structures and the results can be downloaded in a convenient format.</p>
<p>The methodology of secondary structure mining allows flexible customization of stem length, loop length, nucleobase, and inclusion of defects. Generally, motifs with three or four nucleic acid-tracts and a loop length of 7 nucleotides are considered to be more stable but due to continuous developments in literature, unusual G4-forming structures can also be seen. Hence, QUFIND is meant to be a flexible and comprehensive tool for investigating G4s.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s4">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s9">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s5">
<title>Author contributions</title>
<p>BK: Writing&#x2013;original draft, Writing&#x2013;review and editing, Investigation, Methodology, Software. PS: Writing&#x2013;original draft, Investigation, Formal Analysis. PA: Methodology, Project administration, Writing&#x2013;review and editing. VS: Conceptualization, Funding acquisition, Project administration, Resources, Supervision, Writing&#x2013;original draft, Writing&#x2013;review and editing.</p>
</sec>
<sec id="s6">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<ack>
<p>PS gratefully acknowledges Ph.D. funding from CSIR. VS is thankful to UGC for the Faculty Recharge award and start-up grant. We are thankful to Ishpreet Singh for his assistance in optimizing the functionality.</p>
</ack>
<sec sec-type="COI-statement" id="s7">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s9">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2023.1265808/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2023.1265808/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table2.docx" id="SM2" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Asamitsu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yabuki</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ikenoshita</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kawakubo</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kawasaki</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Usuki</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>CGG repeat RNA G-quadruplexes interact with FMRpolyG to cause neuronal dysfunction in fragile X-related tremor/ataxia syndrome</article-title>. <source>Sci. Adv.</source> <volume>7</volume>, <fpage>eabd9440</fpage>. <pub-id pub-id-type="doi">10.1126/sciadv.abd9440</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Belmonte-Reche</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Morales</surname>
<given-names>J. C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>G4-iM Grinder: when size and frequency matter. G-Quadruplex, i-Motif and higher order structure search and analysis tool</article-title>. <source>NAR Genomics Bioinforma.</source> <volume>2</volume>, <fpage>lqz005</fpage>. <pub-id pub-id-type="doi">10.1093/nargab/lqz005</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Berselli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lavezzo</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Toppo</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>QPARSE: searching for long-looped or multimeric G-quadruplexes potentially distinctive and druggable</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>393</fpage>&#x2013;<lpage>399</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz569</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bochman</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Paeschke</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zakian</surname>
<given-names>V. A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>DNA secondary structures: stability and function of G-quadruplex structures</article-title>. <source>Nat. Rev. Genet.</source> <volume>13</volume>, <fpage>770</fpage>&#x2013;<lpage>780</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3296</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carvalho</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mergny</surname>
<given-names>J.-L.</given-names>
</name>
<name>
<surname>Salgado</surname>
<given-names>G. F.</given-names>
</name>
<name>
<surname>Queiroz</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Cruz</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>G-quadruplex, Friend or Foe: the role of the g-quartet in anticancer strategies</article-title>. <source>Trends Mol. Med.</source> <volume>26</volume>, <fpage>848</fpage>&#x2013;<lpage>861</lpage>. <pub-id pub-id-type="doi">10.1016/j.molmed.2020.05.002</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cave</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Willis</surname>
<given-names>D. E.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>G&#x2010;quadruplex regulation of neural gene expression</article-title>. <source>FEBS J.</source> <volume>289</volume>, <fpage>3284</fpage>&#x2013;<lpage>3303</lpage>. <pub-id pub-id-type="doi">10.1111/febs.15900</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chambers</surname>
<given-names>V. S.</given-names>
</name>
<name>
<surname>Marsico</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Boutell</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Di Antonio</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>G. P.</given-names>
</name>
<name>
<surname>Balasubramanian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>High-throughput sequencing of DNA G-quadruplex structures in the human genome</article-title>. <source>Nat. Biotechnol.</source> <volume>33</volume>, <fpage>877</fpage>&#x2013;<lpage>881</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.3295</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cree</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Fredericks</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pearce</surname>
<given-names>F. G.</given-names>
</name>
<name>
<surname>Filichev</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Fee</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>DNA G-quadruplexes show strong interaction with DNA methyltransferases <italic>in vitro</italic>
</article-title>. <source>FEBS Lett.</source> <volume>590</volume>, <fpage>2870</fpage>&#x2013;<lpage>2883</lpage>. <pub-id pub-id-type="doi">10.1002/1873-3468.12331</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Hurley</surname>
<given-names>L. H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>NMR solution structure of the major G-quadruplex structure formed in the human BCL2 promoter region</article-title>. <source>Nucleic Acids Res.</source> <volume>34</volume>, <fpage>5133</fpage>&#x2013;<lpage>5144</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkl610</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Davis</surname>
<given-names>J. T.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>G-quartets 40 years later: from 5&#x2032;-GMP to molecular biology and supramolecular chemistry</article-title>. <source>Angew. Chem. Int. Ed.</source> <volume>43</volume>, <fpage>668</fpage>&#x2013;<lpage>698</lpage>. <pub-id pub-id-type="doi">10.1002/anie.200300589</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Doluca</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>G4Catchall: a G-quadruplex prediction approach considering atypical features</article-title>. <source>J. Theor. Biol.</source> <volume>463</volume>, <fpage>92</fpage>&#x2013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1016/j.jtbi.2018.12.007</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fernando</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Reszka</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Huppert</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ladame</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rankin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Venkitaraman</surname>
<given-names>A. R.</given-names>
</name>
<etal/>
</person-group> (<year>2006</year>). <article-title>A conserved quadruplex motif located in a transcription activation site of the human c-kit oncogene</article-title>. <source>Biochemistry</source> <volume>45</volume>, <fpage>7854</fpage>&#x2013;<lpage>7860</lpage>. <pub-id pub-id-type="doi">10.1021/bi0601510</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fleming</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Burrows</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Oxidative DNA damage is epigenetic by regulating gene transcription via base excision repair</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>114</volume>, <fpage>2604</fpage>&#x2013;<lpage>2609</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1619809114</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garant</surname>
<given-names>J.-M.</given-names>
</name>
<name>
<surname>Perreault</surname>
<given-names>J.-P.</given-names>
</name>
<name>
<surname>Scott</surname>
<given-names>M. S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Motif independent identification of potential RNA G-quadruplexes by G4RNA screener</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>3532</fpage>&#x2013;<lpage>3537</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx498</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gardiner-Garden</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Frommer</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>CpG islands in vertebrate genomes</article-title>. <source>J. Mol. Biol.</source> <volume>196</volume>, <fpage>261</fpage>&#x2013;<lpage>282</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(87)90689-9</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gellert</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lipsett</surname>
<given-names>M. N.</given-names>
</name>
<name>
<surname>Davies</surname>
<given-names>D. R.</given-names>
</name>
</person-group> (<year>1962</year>). <article-title>Helix formation by guanylic acid</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>48</volume>, <fpage>2013</fpage>&#x2013;<lpage>2018</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.48.12.2013</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guedin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gros</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Alberti</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mergny</surname>
<given-names>J. L.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>How long is too long? Effects of loop size on G-quadruplex stability</article-title>. <source>Nucleic Acids Res.</source> <volume>38</volume>, <fpage>7858</fpage>&#x2013;<lpage>7868</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkq639</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hackenberg</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Previti</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Luque-Escamilla</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Carpena</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mart&#xed;nez-Aroza</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Oliver</surname>
<given-names>J. L.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>CpGcluster: a distance-based algorithm for CpG-island detection</article-title>. <source>BMC Bioinforma.</source> <volume>7</volume>, <fpage>446</fpage>&#x2013;<lpage>513</lpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-7-446</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hansel-Hertsch</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Di Antonio</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Balasubramanian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>DNA G-quadruplexes in the human genome: detection, functions and therapeutic potential</article-title>. <source>Nat. Rev. Mol. Cell Biol.</source> <volume>18</volume>, <fpage>279</fpage>&#x2013;<lpage>284</lpage>. <pub-id pub-id-type="doi">10.1038/nrm.2017.3</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hon</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mart&#xed;nek</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zendulka</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lexa</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>pqsfinder: an exhaustive and imperfection-tolerant search tool for potential quadruplex-forming sequences in R</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>3373</fpage>&#x2013;<lpage>3379</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx413</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huppert</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Balasubramanian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Prevalence of quadruplexes in the human genome</article-title>. <source>Nucleic Acids Res.</source> <volume>33</volume>, <fpage>2908</fpage>&#x2013;<lpage>2916</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gki609</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huppert</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Bugaut</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kumari</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Balasubramanian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>G-quadruplexes: the beginning and end of UTRs</article-title>. <source>Nucleic Acids Res.</source> <volume>36</volume>, <fpage>6260</fpage>&#x2013;<lpage>6268</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkn511</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jara-Espejo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Line</surname>
<given-names>S. R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>DNA G-quadruplex stability, position and chromatin accessibility are associated with CpG island methylation</article-title>. <source>FEBS J.</source> <volume>287</volume>, <fpage>483</fpage>&#x2013;<lpage>495</lpage>. <pub-id pub-id-type="doi">10.1111/febs.15065</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khristich</surname>
<given-names>A. N.</given-names>
</name>
<name>
<surname>Mirkin</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>On the wrong DNA track: molecular mechanisms of repeat-mediated genome instability</article-title>. <source>J. Biol. Chem.</source> <volume>295</volume>, <fpage>4134</fpage>&#x2013;<lpage>4170</lpage>. <pub-id pub-id-type="doi">10.1074/jbc.REV119.007678</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Klimentova</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Polacek</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Simecek</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Alexiou</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>PENGUINN: precise exploration of nuclear g-quadruplexes using interpretable neural networks</article-title>. <source>Front. Genet.</source> <volume>11</volume>, <fpage>568546</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2020.568546</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kwok</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Marsico</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sahakyan</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>Chambers</surname>
<given-names>V. S.</given-names>
</name>
<name>
<surname>Balasubramanian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>rG4-seq reveals widespread formation of G-quadruplex structures in the human transcriptome</article-title>. <source>Nat. Methods</source> <volume>13</volume>, <fpage>841</fpage>&#x2013;<lpage>844</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.3965</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Larsen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Gundersen</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lopez</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Prydz</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>1992</year>). <article-title>CpG islands as gene markers in the human genome</article-title>. <source>Genomics</source> <volume>13</volume>, <fpage>1095</fpage>&#x2013;<lpage>1107</lpage>. <pub-id pub-id-type="doi">10.1016/0888-7543(92)90024-m</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lavezzo</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Berselli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Frasson</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Perrone</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pal&#xf9;</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Brazzale</surname>
<given-names>A. R.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>G-quadruplex forming sequences in the genome of all known human viruses: a comprehensive guide</article-title>. <source>PLoS Comput. Biol.</source> <volume>14</volume>, <fpage>e1006675</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1006675</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H. h.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y. d.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>B. f.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Guanine-vacancy&#x2013;bearing G-quadruplexes responsive to guanine derivatives</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>112</volume>, <fpage>14581</fpage>&#x2013;<lpage>14586</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1516925112</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Lan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Haruehanroengra</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>High-resolution DNA quadruplex structure containing all the A-G-C-T-tetrads</article-title>. <source>Nucleic Acids Res.</source> <volume>46</volume>, <fpage>11627</fpage>&#x2013;<lpage>11638</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gky902</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maizels</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Dynamic roles for G4 DNA in the biology of eukaryotic cells</article-title>. <source>Nat. Struct. Mol. Biol.</source> <volume>13</volume>, <fpage>1055</fpage>&#x2013;<lpage>1059</lpage>. <pub-id pub-id-type="doi">10.1038/nsmb1171</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mao</surname>
<given-names>S.-Q.</given-names>
</name>
<name>
<surname>Ghanbarian</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Spiegel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mart&#xed;nez Cuesta</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Beraldi</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Di Antonio</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>DNA G-quadruplex structures mold the DNA methylome</article-title>. <source>Nat. Struct. Mol. Biol.</source> <volume>25</volume>, <fpage>951</fpage>&#x2013;<lpage>957</lpage>. <pub-id pub-id-type="doi">10.1038/s41594-018-0131-8</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marsico</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chambers</surname>
<given-names>V. S.</given-names>
</name>
<name>
<surname>Sahakyan</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>McCauley</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Boutell</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Antonio</surname>
<given-names>M. D.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Whole genome experimental maps of DNA G-quadruplexes in multiple species</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume>, <fpage>3862</fpage>&#x2013;<lpage>3874</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz179</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moelling</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Broecker</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Viruses and evolution&#x2013;viruses first? A personal perspective</article-title>. <source>Front. Microbiol.</source> <volume>10</volume>, <fpage>523</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2019.00523</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mukundan</surname>
<given-names>V. T.</given-names>
</name>
<name>
<surname>Phan</surname>
<given-names>A. T.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Bulges in G-quadruplexes: broadening the definition of G-quadruplex-forming sequences</article-title>. <source>J. Am. Chem. Soc.</source> <volume>135</volume>, <fpage>5017</fpage>&#x2013;<lpage>5028</lpage>. <pub-id pub-id-type="doi">10.1021/ja310251r</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Omaga</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Fleming</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Burrows</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The fifth domain in the G-quadruplex-forming sequence of the human NEIL3 promoter locks DNA folding in response to oxidative damage</article-title>. <source>Biochemistry</source> <volume>57</volume>, <fpage>2958</fpage>&#x2013;<lpage>2970</lpage>. <pub-id pub-id-type="doi">10.1021/acs.biochem.8b00226</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Phan</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Kuryavyi</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Gaw</surname>
<given-names>H. Y.</given-names>
</name>
<name>
<surname>Patel</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Small-molecule interaction with a five-guanine-tract G-quadruplex structure from the human MYC promoter</article-title>. <source>Nat. Chem. Biol.</source> <volume>1</volume>, <fpage>167</fpage>&#x2013;<lpage>173</lpage>. <pub-id pub-id-type="doi">10.1038/nchembio723</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ponger</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Mouchiroud</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>CpGProD: identifying CpG islands associated with transcription start sites in large genomic mammalian sequences</article-title>. <source>Bioinformatics</source> <volume>18</volume>, <fpage>631</fpage>&#x2013;<lpage>633</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/18.4.631</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reina</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cavalieri</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Epigenetic modulation of chromatin states and gene expression by G-quadruplex structures</article-title>. <source>Int. J. Mol. Sci.</source> <volume>21</volume>, <fpage>4172</fpage>. <pub-id pub-id-type="doi">10.3390/ijms21114172</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rhodes</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Lipps</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>G-quadruplexes and their regulatory roles in biology</article-title>. <source>Nucleic acids Res.</source> <volume>43</volume>, <fpage>8627</fpage>&#x2013;<lpage>8637</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv862</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ruggiero</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Richter</surname>
<given-names>S. N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>G-quadruplexes and G-quadruplex ligands: targets and tools in antiviral therapy</article-title>. <source>Nucleic Acids Res.</source> <volume>46</volume>, <fpage>3270</fpage>&#x2013;<lpage>3283</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gky187</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sahakyan</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>Chambers</surname>
<given-names>V. S.</given-names>
</name>
<name>
<surname>Marsico</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Santner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Di Antonio</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Balasubramanian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Machine learning model for sequence-driven DNA G-quadruplex formation</article-title>. <source>Sci. Rep.</source> <volume>7</volume>, <fpage>14535</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-017-14017-4</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gilbert</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>1990</year>). <article-title>A sodium-potassium switch in the formation of four-stranded G4-DNA</article-title>. <source>Nature</source> <volume>344</volume>, <fpage>410</fpage>&#x2013;<lpage>414</lpage>. <pub-id pub-id-type="doi">10.1038/344410a0</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Siddiqui-Jain</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Grand</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Bearss</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Hurley</surname>
<given-names>L. H.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Direct evidence for a G-quadruplex in a promoter region and its targeting with a small molecule to repress c-MYC transcription</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>99</volume>, <fpage>11593</fpage>&#x2013;<lpage>11598</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.182256799</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Spiegel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Adhikari</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Balasubramanian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The structure and function of DNA G-quadruplexes</article-title>. <source>Trends Chem.</source> <volume>2</volume>, <fpage>123</fpage>&#x2013;<lpage>136</lpage>. <pub-id pub-id-type="doi">10.1016/j.trechm.2019.07.002</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sundquist</surname>
<given-names>W. I.</given-names>
</name>
<name>
<surname>Klug</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>1989</year>). <article-title>Telomeric DNA dimerizes by formation of guanine tetrads between hairpin loops</article-title>. <source>Nature</source> <volume>342</volume>, <fpage>825</fpage>&#x2013;<lpage>829</lpage>. <pub-id pub-id-type="doi">10.1038/342825a0</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Takai</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>P. A.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>The CpG island searcher: a new WWW resource</article-title>. <source>silico Biol.</source> <volume>3</volume>, <fpage>235</fpage>&#x2013;<lpage>240</lpage>.</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Todd</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Johnston</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Neidle</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Highly prevalent putative quadruplex sequence motifs in human DNA</article-title>. <source>Nucleic Acids Res.</source> <volume>33</volume>, <fpage>2901</fpage>&#x2013;<lpage>2907</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gki553</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Toma&#x161;ko</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Vorl&#xed;&#x10d;kov&#xe1;</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sagi</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Substitution of adenine for guanine in the quadruplex-forming human telomere DNA sequence G3 (T2AG3) 3</article-title>. <source>Biochimie</source> <volume>91</volume>, <fpage>171</fpage>&#x2013;<lpage>179</lpage>. <pub-id pub-id-type="doi">10.1016/j.biochi.2008.07.012</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Valton</surname>
<given-names>A.-L.</given-names>
</name>
<name>
<surname>Prioleau</surname>
<given-names>M.-N.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>G-quadruplexes in DNA replication: a problem or a necessity?</article-title> <source>Trends Genet.</source> <volume>32</volume>, <fpage>697</fpage>&#x2013;<lpage>706</lpage>. <pub-id pub-id-type="doi">10.1016/j.tig.2016.09.004</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Varizhuk</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ischenko</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Smirnov</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>An improved search algorithm to find G-quadruplexes in genome sequences</article-title>. <source>bioRxiv</source>, <fpage>001990</fpage>. <pub-id pub-id-type="doi">10.1101/001990</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Varizhuk</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ischenko</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Tsvetkov</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Novikov</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kulemin</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Kaluzhny</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>The expanding repertoire of G4 DNA structures</article-title>. <source>Biochimie</source> <volume>135</volume>, <fpage>54</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1016/j.biochi.2017.01.003</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Varshney</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Spiegel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zyner</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Tannahill</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Balasubramanian</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The regulation and functions of DNA and RNA G-quadruplexes</article-title>. <source>Nat. Rev. Mol. Cell Biol.</source> <volume>21</volume>, <fpage>459</fpage>&#x2013;<lpage>474</lpage>. <pub-id pub-id-type="doi">10.1038/s41580-020-0236-x</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vijay Kumar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Morales</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Tsvetkov</surname>
<given-names>A. S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>G-quadruplexes and associated proteins in aging and Alzheimer&#x2019;s disease</article-title>. <source>Front. Aging</source> <volume>4</volume>, <fpage>1164057</fpage>. <pub-id pub-id-type="doi">10.3389/fragi.2023.1164057</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Thombre</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Latanich</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>G-Quadruplexes as pathogenic drivers in neurodegenerative disorders</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume>, <fpage>4816</fpage>&#x2013;<lpage>4830</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab164</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wild</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>W. H.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Danussi</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>G-quadruplex DNA drives genomic instability and represents a targetable molecular abnormality in ATRX-deficient malignant glioma</article-title>. <source>Nat. Commun.</source> <volume>10</volume>, <fpage>943</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-019-08905-8</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Watson</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Crick</surname>
<given-names>F. H.</given-names>
</name>
</person-group> (<year>1953</year>). <article-title>Molecular structure of nucleic acids: a structure for deoxyribose nucleic acid</article-title>. <source>Nature</source> <volume>171</volume>, <fpage>3</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1097/BLO.0b013e31814b9304</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xia</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Shan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Muruato</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Nunes</surname>
<given-names>B. T. D.</given-names>
</name>
<name>
<surname>Medeiros</surname>
<given-names>D. B. A.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>An evolutionary NS1 mutation enhances Zika virus evasion of host interferon induction</article-title>. <source>Nat. Commun.</source> <volume>9</volume>, <fpage>414</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-017-02816-2</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Formation of the G-quadruplex and i-motif structures in retinoblastoma susceptibility genes (Rb)</article-title>. <source>Nucleic Acids Res.</source> <volume>34</volume>, <fpage>949</fpage>&#x2013;<lpage>954</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkj485</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>