<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1118183</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2022.1118183</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>DeteX: A highly accurate software for detecting SNV and InDel in single and paired NGS data in cancer research</article-title>
<alt-title alt-title-type="left-running-head">Cui et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2022.1118183">10.3389/fgene.2022.1118183</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Cui</surname>
<given-names>Yunlong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2145145/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Hongfeng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Pengfei</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Hailong</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Zhenzhen</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Qu</surname>
<given-names>Hongzhu</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/881977/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Tian</surname>
<given-names>Caijuan</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Fang</surname>
<given-names>Xiangdong</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2130483/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Hepatobiliary Oncology</institution>, <institution>National Clinical Research Center for Cancer</institution>, <institution>Key Laboratory of Cancer Prevention and Therapy of Tianjin</institution>, <institution>Tianjin&#x2019;s Clinical Research Center for Cancer</institution>, <institution>Tianjin Medical University Cancer Institute and Hospital</institution>, <addr-line>Tianjin</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Clinical Laboratory</institution>, <institution>Tianjin Academy of Traditional Chinese Medicine Affiliated Hospital</institution>, <addr-line>Tianjin</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Oncology</institution>, <institution>Tianjin Academy of Traditional Chinese Medicine Affiliated Hospital</institution>, <addr-line>Tianjin</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Tianjin Marvel Medical Laboratory</institution>, <institution>Tianjin Marvelbio Technology Co., Ltd.</institution>, <addr-line>Tianjin</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Beijing Institute of Genomics</institution>, <institution>Chinese Academy of Sciences/China National Center for Bioinformation</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2112396/overview">Xiaozhou Yu</ext-link>, Northwestern University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1378836/overview">Ti Zhang</ext-link>, Fudan University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1336346/overview">Chunzhi Zhang</ext-link>, Tianjin Hospital, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Caijuan Tian, <email>tiancaijuan06@mails.ucas.ac.cn</email>; Xiangdong Fang, <email>fangxd@big.ac.cn</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>&#x2020;</label>
<p>These authors have contributed equally to this work and share first authorship</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Human and Medical Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>06</day>
<month>01</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1118183</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>12</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>12</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Cui, Li, Liu, Wang, Zhang, Qu, Tian and Fang.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Cui, Li, Liu, Wang, Zhang, Qu, Tian and Fang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Background:</bold> Genetic testing is becoming more and more accepted in the auxiliary diagnosis and treatment of tumors. Due to the different performance of the existing bioinformatics software and the different analysis results, the needs of clinical diagnosis and treatment cannot be met. To this end, we combined Bayesian classification model (BC) and fisher exact test (FET), and develop an efficient software DeteX to detect SNV and InDel mutations. It can detect the somatic mutations in tumor-normal paired samples as well as mutations in a single sample.</p>
<p>
<bold>Methods:</bold> Combination of Bayesian classification model (BC) and fisher exact test (FET).</p>
<p>
<bold>Results:</bold> We detected SNVs and InDels in 11 TCGA glioma samples, 28 clinically targeted capture samples and 2 NCCL-EQA standard samples with DeteX, VarDict, Mutect, VarScan and GatkSNV. The results show that, among the three groups of samples, DeteX has higher sensitivity and precision whether it detects SNVs or InDels than other callers and the F1 value of DeteX is the highest. Especially in the detection of substitution and complex mutations, only DeteX can accurately detect these two kinds of mutations. In terms of single-sample mutation detection, DeteX is much more sensitive than the HaplotypeCaller program in Gatk. In addition, although DeteX has higher mutation detection capabilities, its running time is only .609 of VarDict, which is .704 and .343 longer than VarScan and MuTect, respectively.</p>
<p>
<bold>Conclusion:</bold> In this study, we developed DeteX to detect SNV and InDel mutations in single and paired samples. DeteX has high sensitivity and precision especially in the detection of substitution and complex mutations. In summary, DeteX from NGS data is a good SNV and InDel caller.</p>
</abstract>
<abstract abstract-type="graphical">
<title>Graphical Abstract</title>
<p>
<fig>
<caption>
<p>Software running process and output results.</p>
</caption>
<graphic xlink:href="FGENE_fgene-2022-1118183_wc_abs.tif" position="anchor"/>
</fig>
</p>
</abstract>
<kwd-group>
<kwd>snv/InDel caller</kwd>
<kwd>NGS</kwd>
<kwd>substitution and complex mutations</kwd>
<kwd>substitution</kwd>
<kwd>mutations</kwd>
</kwd-group>
<contract-num rid="cn001">2016YFC0901700 2018YFC0910700</contract-num>
<contract-sponsor id="cn001">National Key Research and Development Program of China<named-content content-type="fundref-id">10.13039/501100012166</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Background</title>
<p>After a long period of extensive and intensive research, it was believed that tumor is a disease driven by genetic mutations, which is closely related to clinical diagnosis and personalized therapy (<xref ref-type="bibr" rid="B28">Stratton, 2011</xref>; <xref ref-type="bibr" rid="B7">Ding et al., 2012</xref>; <xref ref-type="bibr" rid="B10">Frampton et al., 2013</xref>; <xref ref-type="bibr" rid="B11">Kandoth et al., 2013</xref>; <xref ref-type="bibr" rid="B15">Landau et al., 2013</xref>). Although the next-generation sequencing (NGS) has become more widely accessible in cancer research for its high-throughput advantages, the accuracy of this technology depends to a large extent on the optimization of data analysis. Cancer genome analysis is expected to reveal the patterns of genetic alterations, including single nucleotide variantions (SNVs), multi-nucleotide variantions (MNVs), insertion and deletions (InDels), complex variantions etc. Among all mutation types, SNVs and InDels get the most attention from variant callers (<xref ref-type="bibr" rid="B20">McKenna et al., 2010</xref>; <xref ref-type="bibr" rid="B13">Koboldt et al., 2012</xref>; <xref ref-type="bibr" rid="B5">Cibulskis et al., 2013</xref>; <xref ref-type="bibr" rid="B14">Lai et al., 2016</xref>). Due to the variable sample material, the rare frequency of alteration and the complex mutation event, it is a tough and urgent need to accurately detect mutations from NGS data (<xref ref-type="bibr" rid="B27">Stransky et al., 2011</xref>; <xref ref-type="bibr" rid="B2">Banerji et al., 2012</xref>; <xref ref-type="bibr" rid="B3">Carter et al., 2012</xref>).</p>
<p>Errors in bioinformatics processes and experimental steps may confuse the real variants of clinical samples (<xref ref-type="bibr" rid="B23">Robasky et al., 2014</xref>; <xref ref-type="bibr" rid="B22">O&#x27;Rawe et al., 2013</xref>; <xref ref-type="bibr" rid="B12">Kircher et al., 2011</xref>; <xref ref-type="bibr" rid="B21">Metzker, 2010</xref>). As a consequence, several regularly cited tools such as VarScan2, VarDict, MuTect and GATK, have been developed to solve or partially solve these problems. VarScan2 and VarDict apply the Fisher&#x2019;s Exact Test (FET) to detect mutations (<xref ref-type="bibr" rid="B13">Koboldt et al., 2012</xref>; <xref ref-type="bibr" rid="B14">Lai et al., 2016</xref>). Even though FET performs well at calling crucial and slight variables that other callers are likely to miss or ignore, the accuracy is still insufficient due to inadequate false positive filtering. In addition, VarDict&#x2019;s versatile capability drags down its running speed performance at computing the specific type of mutation.</p>
<p>Another statistical method is using Bayesian Classifier (BC) to detect somatic point alterations (<xref ref-type="bibr" rid="B5">Cibulskis et al., 2013</xref>), such as MuTect. MuTect is a highly sensitive mutation caller, only requiring a few supporting reads to detect, and a series of filters are used to ensure its specificity. Meanwhile, MuTect applies severe penalties to somatic variant candidates if the variant sites are also found in the matched normal. While this approach filters out the most false positives of germline variants, it adversely affects sensitivity in cases when the normal sample is contaminated. GATK is a comprehensive variants caller that combines multiple methods to detect germline or somatic mutations (<xref ref-type="bibr" rid="B20">McKenna et al., 2010</xref>). However, GATK has poor sensitivity and accuracy in detecting low-frequency variants, especially those with variant allele frequencies (VAFs) less than 5%. Generally, most of these approaches would be confounded by false negative or false positives owing to impure sample compositions, deviations in experimental operation and imperfect detecting strategies.</p>
<p>To address these problems mentioned above, we have developed a high-confidence variant caller, DeteX which can detect variants in tumor-only or tumor-normal matched NGS data. This software is developed by integrating FET and BC algorithms in Perl language to adapt to different conditions and purposes. Considering the artifacts caused by polymerase chain reaction (PCR) and mutations occurring around InDels, many filtering conditions were optimized to ensure the accuracy of detecting mutations, and the efficiency of DeteX is improved by running sub-regions in parallel. In this study, <italic>F</italic>1 score was used as a quantitative indicator of accuracy to compare the performance of DeteX with that of other callers. Moreover, three group sample datasets, including 11 pairs of TCGA glioma samples, 28 pairs of real tumor targeted sequencing samples and two standard samples from External Quality Assessment of High-throughput Sequencing for Tumor Somatic Mutation in China (NCCL-EQA), were used to assess the performance of DeteX. This study proved that DeteX can improve the accuracy of mutation detection, especially in detecting substitution and complex mutations, and it could be used as a convenient tool to replace the multiplex calling and filtration pipeline.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and Method</title>
<sec id="s2-1">
<title>Datasets</title>
<p>In order to evaluate the performance of DeteX, we used a range of real and standard datasets as shown below:<list list-type="simple">
<list-item>
<p>1 The public whole genome sequencing datasets from TCGA: 11 pairs of glioma data in sam format, with average sequencing depth of 50X-100X. The variant set of Mutect2 software for these 11 samples (<xref ref-type="sec" rid="s12">Supplementary Table S2</xref>) was obtained from the TCGA (<ext-link ext-link-type="uri" xlink:href="https://portal.gdc.cancer.gov/">https://portal.gdc.cancer.gov/</ext-link>) website was used as the standard variant set for subsequent analysis.</p>
</list-item>
<list-item>
<p>2 The real clinical targeted sequencing data: Blood from 28 pairs from clinical patients with lung cancer and intestinal cancer were selected. ctDNA from blood samples was used as tumor samples, and leukocytes were used as normal samples. These DNA samples were extracted, library was constructed and sequenced to obtain sequencing data in fastq format.</p>
</list-item>
</list>
</p>
<p>The data were obtained by the process of filtering, Bwa (.7.12-r1039) (<xref ref-type="bibr" rid="B20">McKenna et al., 2010</xref>) comparison, marker duplication by Picard MarkDuplicates.jar (1.119) package, re-matching by Gatk (<xref ref-type="bibr" rid="B14">Lai et al., 2016</xref>) and correction of base mass values to obtain the bam data of the samples. The average sequencing depth of samples ranged from 500X to 2500X.<list list-type="simple">
<list-item>
<p>3 The NCCL-EQA data: Illumina Hiseq platform data of NCCL-EQA in 2017 and 2019 were analyzed to obtain bam data through the same process of reads filtering, alignment, marking duplication, realignment and correction of base quality value.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2-2">
<title>Requirements and implementation</title>
<p>DeteX starts with Binary Alignment/Map (BAM) file, which is generated from NGS reads alignment or procedures, such as BWA (<xref ref-type="bibr" rid="B19">Li and Durbin, 2009</xref>), TopHat (<xref ref-type="bibr" rid="B29">Trapnell et al., 2009</xref>; <xref ref-type="bibr" rid="B8">Eenst et al., 2017</xref>), Bowtie (<xref ref-type="bibr" rid="B17">Langmead et al., 2009</xref>), and Bowtie2 (<xref ref-type="bibr" rid="B16">Langmead and Salzberg, 2012</xref>; <xref ref-type="bibr" rid="B18">Langmead et al., 2019</xref>). It is developed in Perl language. The source code can be downloaded at <ext-link ext-link-type="uri" xlink:href="https://github.com/mvlzwtd/DeteX">https://github.com/mvlzwtd/DeteX</ext-link>. This website is maintained by Marvelbio O&#x26;M team.</p>
</sec>
<sec id="s2-3">
<title>Structure and workflow</title>
<p>DeteX can detect somatic and germline mutations in tumor-only or somatic mutations in tumor-normal paired samples. The detection strategy includes the following steps:<list list-type="simple">
<list-item>
<p>(i) Filtering reads with low base quality, low mapping quality, duplicates, multiple mapped and outside of the detection interval.</p>
</list-item>
<list-item>
<p>(ii) Detecting variants, if there is a control sample, the process of filtering variation should be added to the control sample.</p>
</list-item>
<list-item>
<p>(iii) After filtering the false positive variants based on multiple filtering conditions, high confidence variants are ultimately output. The workflow of DeteX is illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
</list-item>
</list>
</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The workflow of DeteX. DeteX uses BAM format file as input. The preliminary filtering is mainly concerned with base quality and mapping quality. Then it applies Bayesian classifier (BC) method for mutation calling. For tumor-normal matched data, Fisher&#x2019;s exact test (FET) is further performed to identify the candidate variant. By reducing false positive by meeting several screening criteria (<xref ref-type="sec" rid="s12">Supplementary Table S1</xref>) the final high-confidence variants is obtained. SigVar indicates single tumor variant and SomVar indicates somatic variant.</p>
</caption>
<graphic xlink:href="fgene-13-1118183-g001.tif"/>
</fig>
</sec>
</sec>
<sec id="s3">
<title>Variant detection principle</title>
<sec id="s3-1">
<title>Reads filtering</title>
<p>In order to reduce the number of false positive variants caused by non-independence errors and sequencing errors, we have set various filtering conditions in our software, which are derived from the experience of developers in data analysis and suggestions of other software. The filtering conditions applied in our software, including reads and variants filtering, are shown in <xref ref-type="sec" rid="s12">Supplementary Table S1</xref>. These conditional values can be set by parameters except for mapping type.</p>
<p>Meeting all the reads filtering and the first 4 variants filtering conditions in <xref ref-type="sec" rid="s12">Supplementary Table S1</xref> and then these reads can be used to detect mutations by the following BC and FET model.</p>
</sec>
<sec id="s3-2">
<title>SNV detection</title>
<p>We calculate the ratio of the maximum likelihood values (LOD) of the two models for each variant locus by using the model introduced in the MuTect supplemental method (<xref ref-type="bibr" rid="B5">Cibulskis et al., 2013</xref>). The larger LOD value of a variant, the more reliable it is. We calculate the LOD value for each variant in the tumor sample, and filter out the variants with LOD value less than 3.9.</p>
<p>If there is no control sample, perform variations filtering according to the conditions 5, 6, and 7 in <xref ref-type="sec" rid="s12">Supplementary Table S1</xref>, and output the final variations in VCF format. The results contain germline and somatic variants. If there is no control samples, all we need is the results of somatic mutation. We can screen out somatic mutations by dbSNP (<xref ref-type="bibr" rid="B25">Smigielski et al., 2000</xref>; <xref ref-type="bibr" rid="B24">Sherry et al., 2001</xref>; <xref ref-type="bibr" rid="B4">Chiang et al., 2017</xref>; <xref ref-type="bibr" rid="B1">Arifuzzaman et al., 2020</xref>) (<ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/snp/">https://www.ncbi.nlm.nih.gov/snp/</ext-link>), Cosmic (<xref ref-type="bibr" rid="B9">Forbes et al., 2017</xref>) (<ext-link ext-link-type="uri" xlink:href="https://cancer.sanger.ac.uk/cosmic">https://cancer.sanger.ac.uk/cosmic</ext-link>), 1000G (<ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/variation/tools/1000genomes/">https://www.ncbi.nlm.nih.gov/variation/tools/1000genomes/</ext-link>), gnomAD (<ext-link ext-link-type="uri" xlink:href="http://hgdownload.cse.ucsc.edu/gbdb/hg19/gnomAD/vcf/">http://hgdownload.cse.ucsc.edu/gbdb/hg19/gnomAD/vcf/</ext-link>), EXAC (<xref ref-type="bibr" rid="B26">Song et al., 2016</xref>) (<ext-link ext-link-type="uri" xlink:href="http://exac">http://exac</ext-link>. <ext-link ext-link-type="uri" xlink:href="http://broadinstitute.org">broadinstitute.org</ext-link>) or other databases. The dbSNP, 1000G, gnomAD and EXAC databases are healthy human variant databases. If this variant exists in these databases, and the population variant frequency is greater than or equal to 1%, then this variant is considered as a germline variant, otherwise it is considered as somatic mutation. Cosmic database is database of tumor variants. If there is a variant, it is considered to be somatic mutation. If a variant is considered as both somatic and germline variant, it means that both possibilities exist.</p>
<p>If there is a control sample, the frequency of variation in the control sample needs to be less than the value of parameter &#x201c;-nf&#x201d; or .2 times frequency in the tumor sample. The variants that are not in control sample or supporting reads in the same direction are directly output. The LOD value of a variation in the control sample whose frequency is less than .5 times that in the tumor is calculated. If the LOD value is less than 3.9, the variation will be output. If it is greater than or equal to 3.9, the significance <italic>p</italic>-value of the variation is calculated according to Fisher&#x2019;s exact test. If the <italic>p</italic>-value is less than or equal to the parameter setting value, the variation is output, otherwise it is not output. The <italic>p</italic>-value of Fisher&#x2019;s exact test of a variation is calculated as follows: We denote supporting variant reads in normal sample as <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, supporting reference reads in normal sample as <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, supporting variant reads in tumor sample as <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, supporting reference reads in normal sample as <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The significance value <italic>p</italic> is given by<disp-formula id="equ1">
<mml:math id="m5">
<mml:mrow>
<mml:mi mathvariant="normal">p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>!</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>!</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>!</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>!</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>!</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>!</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>!</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>!</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>!</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Next, according to the conditions 5, 6, 7. and 8 in <xref ref-type="sec" rid="s12">Supplementary Table S1</xref>, variation filtering is performed, and then the final variations in VCF format are outputted. If two adjacent loci are mutated at the same time, which shows that they are mutated in the same reads, we call this type mutation a substitution mutation. According to this features, we can correctly output these type mutations. Delete reads with more than or equal to three mutations, most of which are caused by alignment errors.</p>
</sec>
<sec id="s3-3">
<title>InDel detection</title>
<p>The software applies Fisher&#x2019;s exact test algorithm to detect InDel. After meeting the reads filtering conditions, reads with one or two gaps/insert sequences (inss) are retained. If SNV variants exist SNV in the same reads within five bases distance to gap/ins, they are combined into one variant. If the gap or the distance among the gaps or inss in the same reads is less than 10 bases, they are also combined into a variant. If the two mutations are in the different reads, they are considered to be two variants. These conditions ensure the detection of complex mutation correctly. Variant filter conditions 5, 6, 7, 8, 9, and 10 in <xref ref-type="sec" rid="s12">Supplementary Table S1</xref> are also applicable to InDel detection.</p>
</sec>
<sec id="s3-4">
<title>Variation frequency calculation</title>
<p>Due to PCR amplification, duplicate reads may occur at the variant site. Duplicate reads are counted as one in variation frequency calculation. In repeated reads, if the proportion of mutated reads is greater than .8, it is considered as a read supporting variation. The maximum base quality value of mutated reads at the variation site is considered as the base quality value of this site. Only reads that cover all the InDel sites are counted in InDel frequency calculation. Final variation frequency VAF is given by<disp-formula id="equ2">
<mml:math id="m6">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">u</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
<sec id="s3-5">
<title>Softwares to detect SNV/InDel</title>
<p>To evaluate the performance of DeteX, we used DeteX, VarScan, MuTect and VarDict to detect SNV and DeteX, VarScan, Gatk and VarDict to detect InDel. In the detection of SNVs and InDels of three groups of datasets, DeteX and Mutect were used the default value of the parameters. We added parameters of &#x201c;--min-avg-qual 13, --min-tumor-freq .01, --max-normal-freq .02&#x201d; to VarScan, &#x201c;-filter T_INDEL_F &#x3c; .01&#x201d; to Gatk, &#x201c;-m 5 -O 40 -V .02 -x 0 -k 0 -X 3 -c 1 -S 2 -E 3 -g 4&#x201d; to VarDict.</p>
</sec>
<sec id="s3-6">
<title>Performance evaluation metrics</title>
<p>We use sensitivity, precision and F1 value to evaluate the performance of the software. Sensitivity is the proportion of detected true variantions in all the true positive variations. The higher the sensitivity, the lower the rate of missed detection. Precision is the proportion of detected true variantions in all the observed variations. The higher the precision is, the accuracy is higher. The value of F1 is the products of sensitivity and precision, which comprehensively reflects the performance of variation detection of softwares.</p>
</sec>
</sec>
<sec sec-type="results" id="s4">
<title>Results</title>
<sec id="s4-1">
<title>Benchmark variant datasets</title>
<p>Benchmark variant data is very important for assessing the performance of variant detection software. A few typical real samples containing all types of mutations, and simulation data usually can not contain all kinds of random errors in sequencing, public authority samples and standard samples can be used to evaluate software performance. In this paper, three groups of samples are selected. Each group of sample has different ways to get the benchmark variant data.</p>
<p>11 TCGA samples. The Mutect2 software can obtain the most reliable variant results. So we used variants detected by Mutect2 as the final benchmark variant dataset. The mutations with sequencing depth below eight and less than 2 variant reads are dropped. Finally, 760 mutations, including 37 InDels and 723 SNVs (<xref ref-type="sec" rid="s12">Supplementary Table S2</xref>) were found in 11 glioma samples with Mutect2-labeled. Because substitution and complex mutations are considered as two mutations in the public mutation database, 763 variants are displayed in the table. We put the substitution mutations into the SNV set and the complex mutations into the InDel set.</p>
<p>28 real clinical samples. All SNVs and InDels with variant frequency &#x2265;1% were detected. Those detected by at least two softwares were put into the final benchmark variant set. The categorization method is the same as TCGA sample variants, with substitution mutations categorized as SNV and complex mutations categorized as InDel. There are 1,481 variants (S<xref ref-type="sec" rid="s12">Supplementary Table S3</xref>) including 1,045 SNVs and 436 InDel in the benchmark variant set.</p>
<p>NCCL-EQA external quality assessment data. There is a set of standard variables to assess the testing capabilities of laboratories across China. 94 variants are available in 2017 sample (<xref ref-type="sec" rid="s12">Supplementary Table S4</xref>) and 23 variants are available in 2019 sample (<xref ref-type="sec" rid="s12">Supplementary Table S5</xref>). These standard datasets enable accurately evaluate the performance of the software.</p>
</sec>
<sec id="s4-2">
<title>Higher precision and sensitivity of DeteX in SNVs and InDel detection</title>
<p>We compared the accuracy and sensitivity of each software in detecting SNVs and InDels in 11 pairs of TCGA samples (<xref ref-type="table" rid="T1">Table 1</xref>). In the table, in the SNV detection, VarScan.hc and MuTect have the best precision but at the expense of sensitivity. In contrast, VarScan has the best performance in sensitivity, but it contains a large number of false positive variants, resulting in the lowest accuracy. VarDict and DeteX show relatively balanced in both metrics, but DeteX slightly outperforms vardict in all metrics. So the score is the highest among all the software when it measured by F1 values. In the InDel detection, VarScan has a similar performance in detecting SNV with the lowest accuracy. VarScan.hc has a relatively high precision but has the lowest sensitivity. DeteX has great advantage over other software in all metrics, and it can reach a maximum F1 of one.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Results of each caller to detect variants in TCGA samples.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Mutation type</th>
<th align="center">Caller</th>
<th align="center">Benchmark mutation num</th>
<th align="center">Detected mutation num</th>
<th align="center">True positive mutation num</th>
<th align="center">Precision (%)</th>
<th align="center">Sensitivity (%)</th>
<th align="center">F1</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="5" align="center">SNV</td>
<td align="center">VarDict</td>
<td rowspan="5" align="center">723</td>
<td align="center">681</td>
<td align="center">673</td>
<td align="center">98.83</td>
<td align="center">93.08</td>
<td align="center">.96</td>
</tr>
<tr>
<td align="center">DeteX</td>
<td align="center">686</td>
<td align="center">684</td>
<td align="center">99.71</td>
<td align="center">94.61</td>
<td align="center">.97</td>
</tr>
<tr>
<td align="center">VarScan.hc</td>
<td align="center">610</td>
<td align="center">610</td>
<td align="center">100.00</td>
<td align="center">84.37</td>
<td align="center">.92</td>
</tr>
<tr>
<td align="center">VarScan</td>
<td align="center">934</td>
<td align="center">707</td>
<td align="center">75.70</td>
<td align="center">97.79</td>
<td align="center">.85</td>
</tr>
<tr>
<td align="center">MuTect</td>
<td align="center">654</td>
<td align="center">654</td>
<td align="center">100.00</td>
<td align="center">90.46</td>
<td align="center">.95</td>
</tr>
<tr>
<td rowspan="5" align="center">InDel</td>
<td align="center">VarDict</td>
<td rowspan="5" align="center">37</td>
<td align="center">39</td>
<td align="center">35</td>
<td align="center">89.74</td>
<td align="center">94.59</td>
<td align="center">.92</td>
</tr>
<tr>
<td align="center">DeteX</td>
<td align="center">37</td>
<td align="center">37</td>
<td align="center">100.00</td>
<td align="center">100.00</td>
<td align="center">1.00</td>
</tr>
<tr>
<td align="center">VarScan.hc</td>
<td align="center">31</td>
<td align="center">29</td>
<td align="center">93.55</td>
<td align="center">78.38</td>
<td align="center">.85</td>
</tr>
<tr>
<td align="center">VarScan</td>
<td align="center">49</td>
<td align="center">35</td>
<td align="center">71.43</td>
<td align="center">94.59</td>
<td align="center">.81</td>
</tr>
<tr>
<td align="center">Gatk</td>
<td align="center">44</td>
<td align="center">35</td>
<td align="center">79.55</td>
<td align="center">94.59</td>
<td align="center">.86</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>We also compared the results of each software for SNVs and InDels in 28 pairs of clinically targeted sequencing samples (<xref ref-type="table" rid="T2">Table 2</xref>; <xref ref-type="fig" rid="F2">Figure 2</xref>). As shown in <xref ref-type="table" rid="T2">Table 2</xref>, DeteX and VarDict have similar sensitivity in SNV detection and they are significantly higher than VarScan and MuTect. The accuracy of DeteX is lower than MuTect, but higher than VarDict and VarScan.Finally DeteX has the highest F1 value. In the InDel detection, DeteX and VarDict have similar sensitivity, which is significantly higher than VarScan and Gatk. The accuracy of DeteX is moderate. The final the F1 value of DeteX is also the highest. <xref ref-type="fig" rid="F2">Figure 2</xref> shows the sensitivity and accuracy of each software in detecting SNV and InDel in 28 samples in more intuitive and detailed way. In some samples, there is no variant detected by MuTect. The precision of it in these samples is marked 0, so the status of 1 and 0 are appeared in <xref ref-type="fig" rid="F2">Figure 2A</xref>. All variants marked somatic including &#x201c;StrongSomatic&#x201d; and &#x201c;LikelySomatic&#x201d; are counted in the VarDict results. Therefore, in InDel detection, the precision of VarDict performs the worst (<xref ref-type="fig" rid="F2">Figure 2C</xref>). DeteX and VarDict are significantly better than the other two softwares in terms of sensitivity both in detecting SNV and InDel (<xref ref-type="fig" rid="F2">Figures 2B, D</xref>).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Results of each caller to detect variants in clinical samples.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Mutation type</th>
<th align="center">Caller</th>
<th align="center">Detected mutation num</th>
<th align="center">True positive mutation num</th>
<th align="center">Benchmark mutation num</th>
<th align="center">Precision</th>
<th align="center">Sensitivity</th>
<th align="center">F1</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="4" align="center">SNV</td>
<td align="center">DeteX</td>
<td align="center">1,445</td>
<td align="center">968</td>
<td rowspan="4" align="center">1,045</td>
<td align="center">.67</td>
<td align="center">.93</td>
<td align="center">.62</td>
</tr>
<tr>
<td align="center">VarDict</td>
<td align="center">1,637</td>
<td align="center">962</td>
<td align="center">.59</td>
<td align="center">.92</td>
<td align="center">.54</td>
</tr>
<tr>
<td align="center">VarScan</td>
<td align="center">1,424</td>
<td align="center">693</td>
<td align="center">.49</td>
<td align="center">.66</td>
<td align="center">.32</td>
</tr>
<tr>
<td align="center">Mutect</td>
<td align="center">485</td>
<td align="center">484</td>
<td align="center">1.00</td>
<td align="center">.46</td>
<td align="center">.46</td>
</tr>
<tr>
<td rowspan="4" align="center">InDel</td>
<td align="center">DeteX</td>
<td align="center">854</td>
<td align="center">355</td>
<td rowspan="4" align="center">436</td>
<td align="center">.42</td>
<td align="center">.81</td>
<td align="center">.34</td>
</tr>
<tr>
<td align="center">VarDict</td>
<td align="center">2371</td>
<td align="center">375</td>
<td align="center">.16</td>
<td align="center">.86</td>
<td align="center">.14</td>
</tr>
<tr>
<td align="center">VarScan</td>
<td align="center">260</td>
<td align="center">151</td>
<td align="center">.58</td>
<td align="center">.35</td>
<td align="center">.20</td>
</tr>
<tr>
<td align="center">Gatk</td>
<td align="center">152</td>
<td align="center">96</td>
<td align="center">.63</td>
<td align="center">.22</td>
<td align="center">.14</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Sensitivity and precision of each software for the detection of SNV and InDel in 28 clinical samples. <bold>(A)</bold> Precision of SNVs, <bold>(B)</bold> Sensitivity of SNVs, <bold>(C)</bold> Precision of InDels, <bold>(D)</bold> Sensitivity of InDels.</p>
</caption>
<graphic xlink:href="fgene-13-1118183-g002.tif"/>
</fig>
<p>We also analyzed the variant results of each software for the NCCL-EQA external quality assessment samples. Each software could detect SNVs accurately (<xref ref-type="sec" rid="s12">Supplementary Table S6</xref>). For InDel, substitution and complex mutations (<xref ref-type="table" rid="T4">Table 4</xref>), only DeteX detected all of them correctly, VarDict detected 22 and both VarScan and Gatk detected 16.</p>
</sec>
<sec id="s4-3">
<title>DeteX detects substitutions and complex mutations more accurately</title>
<p>It shows clearly in <xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F3">Figure 3</xref> to detect substitution and complex mutations in TCGA samples by each software. From these results, it can be seen that for substitution mutations such as the variant one in <xref ref-type="table" rid="T3">Table 3</xref> (<xref ref-type="fig" rid="F3">Figure 3A</xref>), two adjacent bases are mutated in the same reads, and the standard result is chr10:28409253&#x2013;28409254, CA- &#x3e; AG, which was detected as two adjacent mutations by VarScan and MuTect, but accurately detected by VarDict and DeteX. If two adjacent bases are mutated, but one of them also mutated in the normal sample, it is not a substitution mutation. For example, the standard result of the variant 2 in <xref ref-type="table" rid="T3">Table3</xref> (<xref ref-type="fig" rid="F3">Figure 3B</xref>) is chr21:46074201, C- &#x3e; T. VarDict detected as CA- &#x3e; TG, the other softwares detected correctly. The above two cases indicate that VarScan, MuTect and VarDict have certain detection errors for the mutations occurring in adjacent sites. For complex mutations, variant three in <xref ref-type="table" rid="T3">Table3</xref> (<xref ref-type="fig" rid="F3">Figure 3C</xref>) is a deletion accompanied by a SNV variant. The standard result is chr8:145540703, GG&#x3e;A.The results show that only DeteX could detect it correctly. VarDict detected it as two mutations. VarScan and Gatk detected a deletion only. MuTect did not detect the mutation. The variant 4 in <xref ref-type="table" rid="T3">Table 3</xref> (<xref ref-type="fig" rid="F3">Figure 3D</xref>) is similar to the variant 2 except that the two mutanted loci are not adjacent to each other. So the result is the same as the variant 2.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Four substitution and complex variations in TCGA glioma data.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Caller</th>
<th colspan="4" align="center">Variation I</th>
<th colspan="4" align="center">Variation II</th>
<th colspan="4" align="center">Variation III</th>
<th colspan="4" align="center">Variation IV</th>
</tr>
<tr>
<th align="left">Chr</th>
<th align="left">Pos</th>
<th align="left">Ref</th>
<th align="center">Var</th>
<th align="center">Chr</th>
<th align="center">Pos</th>
<th align="center">Ref</th>
<th align="center">Var</th>
<th align="center">Chr</th>
<th align="center">Pos</th>
<th align="center">Ref</th>
<th align="center">Var</th>
<th align="center">Chr</th>
<th align="center">Pos</th>
<th align="center">Ref</th>
<th align="center">Var</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="center">VarScan</td>
<td align="center">chr10</td>
<td align="center">28409253</td>
<td align="center">C</td>
<td align="center">A</td>
<td rowspan="2" align="center">chr21</td>
<td rowspan="2" align="center">46074201</td>
<td rowspan="2" align="center">C</td>
<td rowspan="2" align="center">T</td>
<td rowspan="2" align="center">chr8</td>
<td rowspan="2" align="center">145540703</td>
<td rowspan="2" align="center">G</td>
<td rowspan="2" align="center">&#x2014;</td>
<td rowspan="2" align="center">chr8</td>
<td rowspan="2" align="center">25287394</td>
<td rowspan="2" align="center">G</td>
<td rowspan="2" align="center">A</td>
</tr>
<tr>
<td align="center">chr10</td>
<td align="center">28409254</td>
<td align="center">A</td>
<td align="center">G</td>
</tr>
<tr>
<td rowspan="2" align="center">Mutect</td>
<td align="center">chr10</td>
<td align="center">28409253</td>
<td align="center">C</td>
<td align="center">A</td>
<td rowspan="2" align="center">chr21</td>
<td rowspan="2" align="center">46074201</td>
<td rowspan="2" align="center">C</td>
<td rowspan="2" align="center">T</td>
<td rowspan="2" align="center">&#x2014;</td>
<td rowspan="2" align="center">&#x2014;</td>
<td rowspan="2" align="center">&#x2014;</td>
<td rowspan="2" align="center">&#x2014;</td>
<td rowspan="2" align="center">chr8</td>
<td rowspan="2" align="center">25287394</td>
<td rowspan="2" align="center">G</td>
<td rowspan="2" align="center">A</td>
</tr>
<tr>
<td align="center">chr10</td>
<td align="center">28409254</td>
<td align="center">A</td>
<td align="center">G</td>
</tr>
<tr>
<td rowspan="2" align="center">VarDict</td>
<td rowspan="2" align="center">chr10</td>
<td rowspan="2" align="center">28409253</td>
<td rowspan="2" align="center">CA</td>
<td rowspan="2" align="center">AG</td>
<td rowspan="2" align="center">chr21</td>
<td rowspan="2" align="center">46074201</td>
<td rowspan="2" align="center">CA</td>
<td rowspan="2" align="center">TG</td>
<td align="center">chr8</td>
<td align="center">145540703</td>
<td align="center">G</td>
<td align="center">&#x2014;</td>
<td rowspan="2" align="center">chr8</td>
<td rowspan="2" align="center">25287394</td>
<td rowspan="2" align="center">GTGT</td>
<td rowspan="2" align="center">ATGC</td>
</tr>
<tr>
<td align="center">chr8</td>
<td align="center">145540704</td>
<td align="center">G</td>
<td align="center">A</td>
</tr>
<tr>
<td align="center">Gatk</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">chr8</td>
<td align="center">145540703</td>
<td align="center">G</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
</tr>
<tr>
<td align="center">DeteX</td>
<td align="center">chr10</td>
<td align="center">28409253</td>
<td align="center">CA</td>
<td align="center">AG</td>
<td align="center">chr21</td>
<td align="center">46074201</td>
<td align="center">C</td>
<td align="center">T</td>
<td align="center">chr8</td>
<td align="center">145540703</td>
<td align="center">GG</td>
<td align="center">A</td>
<td align="center">chr8</td>
<td align="center">25287394</td>
<td align="center">G</td>
<td align="center">A</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The distribution of reads for four variants in the TCGA sample (obtained by samtools tview program). <bold>(A)</bold> The distribution of reads for CA-&#x003e;AG in the TCGA sample. <bold>(B)</bold> The distribution of reads for C-&#x003e;T in the TCGA sample. <bold>(C)</bold> The distribution of reads for GC-&#x003e;A in the TCGA sample. <bold>(D)</bold> The distribution of reads for G-&#x003e;A in the TCGA sample.</p>
</caption>
<graphic xlink:href="fgene-13-1118183-g003.tif"/>
</fig>
<p>The results of NCCL-EQA (<xref ref-type="table" rid="T4">Table 4</xref>) once again confirmed the excellent detection ability of DeteX for complex mutations and substitution mutations. Only DeteX detected all of them. VarDict could accurately detect substitution mutations, while complex mutations can only be partially detected or not. The other two software could partially detect or fail to detect these two kinds of mutations. The detection ability of DeteX is excellent especially in complex mutations which contain SNV and gaps. For example, four events AAdel, TTmap, AAGAGAAGCAdel, and A- &#x3e; C occurred simultaneously in the mutation chr7: 55242467&#x2013;55242481 AATTAAGAGAAGCAA- &#x3e; TTC (<xref ref-type="fig" rid="F4">Figure 4</xref>). It is difficult to output it accurately without considering the adjacent InDel and SNV together.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>The results of InDel, substitution, and complex mutations in two NCCL-EQA samples by each software.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">SampleID</th>
<th align="center">Num</th>
<th align="center">Chr</th>
<th align="center">Start</th>
<th align="center">End</th>
<th align="center">Ref</th>
<th align="center">Var</th>
<th align="center">Gene</th>
<th align="center">Type</th>
<th align="center">VAF (%)</th>
<th align="center">VarScan</th>
<th align="center">GATK</th>
<th align="center">DeteX</th>
<th align="center">VarDict</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="20" align="center">201711</td>
<td align="center">1</td>
<td align="center">1</td>
<td align="center">115256520</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">CCCGGCAC</td>
<td align="center">NRAS</td>
<td align="center">Insertion</td>
<td align="center">2</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">2</td>
<td align="center">29445271</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">CGT</td>
<td align="center">ALK</td>
<td align="center">Insertion</td>
<td align="center">15</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">3</td>
<td align="center">37089070</td>
<td align="center">37089072</td>
<td align="center">ACA</td>
<td align="center">&#x2014;</td>
<td align="center">MLH1</td>
<td align="center">Deletion</td>
<td align="center">5</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">3</td>
<td align="center">41266107</td>
<td align="center">41266108</td>
<td align="center">TC</td>
<td align="center">AA</td>
<td align="center">CTNNB1</td>
<td align="center">Complex</td>
<td align="center">2</td>
<td align="center">&#xd7;</td>
<td align="center">&#xd7;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">3</td>
<td align="center">178916946</td>
<td align="center">178916948</td>
<td align="center">GAT</td>
<td align="center">&#x2014;</td>
<td align="center">PIK3CA</td>
<td align="center">Deletion</td>
<td align="center">7</td>
<td align="center">&#x221a;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">4</td>
<td align="center">55152092</td>
<td align="center">55152100</td>
<td align="center">GACATCATG</td>
<td align="center">&#x2014;</td>
<td align="center">PDGFRA</td>
<td align="center">Deletion</td>
<td align="center">6</td>
<td align="center">&#x2014;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">4</td>
<td align="center">106180857</td>
<td align="center">106180857</td>
<td align="center">C</td>
<td align="center">&#x2014;</td>
<td align="center">TET2</td>
<td align="center">Deletion</td>
<td align="center">10</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">8</td>
<td align="center">5</td>
<td align="center">112175210</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">A</td>
<td align="center">APC</td>
<td align="center">Insertion</td>
<td align="center">6</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">9</td>
<td align="center">7</td>
<td align="center">55242467</td>
<td align="center">55242481</td>
<td align="center">AATTAAGAGAAGCAA</td>
<td align="center">TTC</td>
<td align="center">EGFR</td>
<td align="center">Complex</td>
<td align="center">3</td>
<td align="center">&#xd7;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x221a;</td>
<td align="center">&#xd7;</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">7</td>
<td align="center">140453132</td>
<td align="center">140453136</td>
<td align="center">TTTCA</td>
<td align="center">AT</td>
<td align="center">BRAF</td>
<td align="center">Complex</td>
<td align="center">5</td>
<td align="center">&#x2014;</td>
<td align="center">&#xd7;</td>
<td align="center">&#x221a;</td>
<td align="center">&#xd7;</td>
</tr>
<tr>
<td align="center">11</td>
<td align="center">9</td>
<td align="center">5070022</td>
<td align="center">5070027</td>
<td align="center">TCACAA</td>
<td align="center">&#x2014;</td>
<td align="center">JAK2</td>
<td align="center">Deletion</td>
<td align="center">4</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">12</td>
<td align="center">10</td>
<td align="center">89692837</td>
<td align="center">89692842</td>
<td align="center">TCTTGA</td>
<td align="center">&#x2014;</td>
<td align="center">PTEN</td>
<td align="center">Deletion</td>
<td align="center">2</td>
<td align="center">&#x221a;</td>
<td align="center">-</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">13</td>
<td align="center">10</td>
<td align="center">123247618</td>
<td align="center">123247620</td>
<td align="center">GAT</td>
<td align="center">&#x2014;</td>
<td align="center">FGFR2</td>
<td align="center">Deletion</td>
<td align="center">3</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">14</td>
<td align="center">11</td>
<td align="center">32417910</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">ACCGT</td>
<td align="center">WT1</td>
<td align="center">Insertion</td>
<td align="center">5</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">11</td>
<td align="center">64575435</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">CTGT</td>
<td align="center">MEN1</td>
<td align="center">Insertion</td>
<td align="center">5</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">16</td>
<td align="center">11</td>
<td align="center">108170483</td>
<td align="center">108170487</td>
<td align="center">TCTCT</td>
<td align="center">&#x2014;</td>
<td align="center">ATM</td>
<td align="center">Deletion</td>
<td align="center">8</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">17</td>
<td align="center">12</td>
<td align="center">25380259</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">TGCACTGTACTCCTC</td>
<td align="center">KRAS</td>
<td align="center">Insertion</td>
<td align="center">3</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">18</td>
<td align="center">13</td>
<td align="center">28608104</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">AAGCACCTGATCCTAG TACCT</td>
<td align="center">FLT3</td>
<td align="center">Insertion</td>
<td align="center">7</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">19</td>
<td align="center">17</td>
<td align="center">7577105</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">GA</td>
<td align="center">TP53</td>
<td align="center">Insertion</td>
<td align="center">13</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">20</td>
<td align="center">22</td>
<td align="center">30032780</td>
<td align="center">30032801</td>
<td align="center">GGA&#x200b;CTC&#x200b;TGG&#x200b;GGC&#x200b;TCC&#x200b;GAG&#x200b;AAA&#x200b;C</td>
<td align="center">&#x2014;</td>
<td align="center">NF2</td>
<td align="center">Deletion</td>
<td align="center">3</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td rowspan="5" align="center">201911</td>
<td align="center">1</td>
<td align="center">2</td>
<td align="center">148683693</td>
<td align="center">148683693</td>
<td align="center">A</td>
<td align="center">&#x2014;</td>
<td align="center">ACVR2A</td>
<td align="center">Deletion</td>
<td align="center">16</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">2</td>
<td align="center">209113112</td>
<td align="center">209113113</td>
<td align="center">CG</td>
<td align="center">GA</td>
<td align="center">IDH1</td>
<td align="center">Complex</td>
<td align="center">14</td>
<td align="center">&#xd7;</td>
<td align="center">&#xd7;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">7</td>
<td align="center">55242470</td>
<td align="center">55242495</td>
<td align="center">TAA&#x200b;GAG&#x200b;AAG&#x200b;CAA&#x200b;CAT&#x200b;CTC&#x200b;CGA&#x200b;AAG&#x200b;CC</td>
<td align="center">CGAAAGG</td>
<td align="center">EGFR</td>
<td align="center">Complex</td>
<td align="center">15</td>
<td align="center">&#xd7;</td>
<td align="center">&#xd7;</td>
<td align="center">&#x221a;</td>
<td align="center">&#xd7;</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">7</td>
<td align="center">140453135</td>
<td align="center">140453136</td>
<td align="center">CA</td>
<td align="center">GT</td>
<td align="center">BRAF</td>
<td align="center">Complex</td>
<td align="center">24</td>
<td align="center">&#xd7;</td>
<td align="center">&#xd7;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">20</td>
<td align="center">31022449</td>
<td align="center">&#x2014;</td>
<td align="center">&#x2014;</td>
<td align="center">G</td>
<td align="center">ASXL1</td>
<td align="center">Insertion</td>
<td align="center">27</td>
<td align="center">&#x2014;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x221a;&#x2019; indicates a correct result, &#x201c;-&#x201d; indicates no result, and &#x201c;&#xd7;&#x201d; indicates part of result.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>The readsmapping of one complexmutation. The sites in the red box represents from 55242466 to 55242481 of chr7. &#x201c; &#x3d; &#x201d; indicates that it is identical to the reference base in the positive strand. &#x201c;&#x7c;&#x201d; indicates that it is identical to the reference base in the negative strand. &#x201c;c&#x201d; indicates negative strandmismatch. &#x201c;C&#x201d; indicates positive strand mismatch. Blank indicates deletion.</p>
</caption>
<graphic xlink:href="fgene-13-1118183-g004.tif"/>
</fig>
</sec>
<sec id="s4-4">
<title>Single sample testing</title>
<p>We analyzed the SNVs and InDels results of DeteX and the HaplotypeCaller (<xref ref-type="bibr" rid="B6">DePristo et al., 2011</xref>) program in Gatk in tumor sample from the 2017 NCCL-EQA external quality assessment (<xref ref-type="table" rid="T5">Table 5</xref>). DeteX detected all the 94 variants in the standard variant set, but only 29 variation were detected by HaplotypeCaller, most of which were more than 10%. This indicates that DeteX is more sensitive than HaplotypeCaller in detecting somatic variants in a single sample.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>The results of DeteX and HaplotypeCaller to detect variants in tumor sample from the 2017 NCCL-EQA.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Chr</th>
<th align="left">Start</th>
<th align="left">End</th>
<th align="left">Ref</th>
<th align="left">Obs</th>
<th align="left">Gene_Symbol</th>
<th align="left">MType</th>
<th align="left">Freq (%)</th>
<th align="left">Gatk</th>
<th align="left">DeteX</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">chr1</td>
<td align="left">10412784</td>
<td align="left">10412785</td>
<td align="left">A</td>
<td align="left">G</td>
<td align="left">KIF1B</td>
<td align="left">SNV</td>
<td align="left">3.08</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr1</td>
<td align="left">11190665</td>
<td align="left">11190666</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">MTOR</td>
<td align="left">SNV</td>
<td align="left">5.94</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr1</td>
<td align="left">43812575</td>
<td align="left">43812576</td>
<td align="left">T</td>
<td align="left">C</td>
<td align="left">MPL</td>
<td align="left">SNV</td>
<td align="left">6.8</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr1</td>
<td align="left">115256520</td>
<td align="left">&#x2014;</td>
<td align="left">&#x2014;</td>
<td align="left">AGGCCAGG CCCGGCAC TG</td>
<td align="left">NRAS</td>
<td align="left">Insertion</td>
<td align="left">1.7</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr2</td>
<td align="left">29445271</td>
<td align="left">&#x2014;</td>
<td align="left">&#x2014;</td>
<td align="left">CGT</td>
<td align="left">ALK</td>
<td align="left">Insertion</td>
<td align="left">13.72</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr2</td>
<td align="left">29519890</td>
<td align="left">29519891</td>
<td align="left">C</td>
<td align="left">G</td>
<td align="left">ALK</td>
<td align="left">SNV</td>
<td align="left">6.16</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr2</td>
<td align="left">29541234</td>
<td align="left">29541235</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">ALK</td>
<td align="left">SNV</td>
<td align="left">6.12</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr2</td>
<td align="left">209108165</td>
<td align="left">209108166</td>
<td align="left">T</td>
<td align="left">C</td>
<td align="left">IDH1</td>
<td align="left">SNV</td>
<td align="left">3.29</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr2</td>
<td align="left">212566836</td>
<td align="left">212566837</td>
<td align="left">C</td>
<td align="left">A</td>
<td align="left">ERBB4</td>
<td align="left">SNV</td>
<td align="left">3.48</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr3</td>
<td align="left">37089070</td>
<td align="left">37089072</td>
<td align="left">ACA</td>
<td align="left">&#x2014;</td>
<td align="left">MLH1</td>
<td align="left">Deletion</td>
<td align="left">4.38</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr3</td>
<td align="left">41266107</td>
<td align="left">41266108</td>
<td align="left">TC</td>
<td align="left">AA</td>
<td align="left">CTNNB1</td>
<td align="left">Complex</td>
<td align="left">1.82</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr3</td>
<td align="left">47164278</td>
<td align="left">47164279</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">SETD2</td>
<td align="left">SNV</td>
<td align="left">7.49</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr3</td>
<td align="left">142224005</td>
<td align="left">142224006</td>
<td align="left">C</td>
<td align="left">A</td>
<td align="left">ATR</td>
<td align="left">SNV</td>
<td align="left">9.17</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr3</td>
<td align="left">142232475</td>
<td align="left">142232476</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">ATR</td>
<td align="left">SNV</td>
<td align="left">3.99</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr3</td>
<td align="left">178916946</td>
<td align="left">178916948</td>
<td align="left">GAT</td>
<td align="left">&#x2014;</td>
<td align="left">PIK3CA</td>
<td align="left">Deletion</td>
<td align="left">7.59</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr3</td>
<td align="left">187447267</td>
<td align="left">187447268</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">BCL6</td>
<td align="left">SNV</td>
<td align="left">14.39</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr4</td>
<td align="left">1805528</td>
<td align="left">1805529</td>
<td align="left">T</td>
<td align="left">G</td>
<td align="left">FGFR3</td>
<td align="left">SNV</td>
<td align="left">5.01</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr4</td>
<td align="left">1808929</td>
<td align="left">1808930</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">FGFR3</td>
<td align="left">SNV</td>
<td align="left">3.79</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr4</td>
<td align="left">55131212</td>
<td align="left">55131213</td>
<td align="left">A</td>
<td align="left">T</td>
<td align="left">PDGFRA</td>
<td align="left">SNV</td>
<td align="left">17.99</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr4</td>
<td align="left">55152092</td>
<td align="left">55152100</td>
<td align="left">GACATCATG</td>
<td align="left">-</td>
<td align="left">PDGFRA</td>
<td align="left">Deletion</td>
<td align="left">5.61</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr4</td>
<td align="left">68610385</td>
<td align="left">68610386</td>
<td align="left">G</td>
<td align="left">T</td>
<td align="left">GNRHR</td>
<td align="left">SNV</td>
<td align="left">13</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr4</td>
<td align="left">106180857</td>
<td align="left">106180857</td>
<td align="left">C</td>
<td align="left">-</td>
<td align="left">TET2</td>
<td align="left">Deletion</td>
<td align="left">9.39</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr4</td>
<td align="left">106190823</td>
<td align="left">106190824</td>
<td align="left">T</td>
<td align="left">C</td>
<td align="left">TET2</td>
<td align="left">SNV</td>
<td align="left">11.83</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr4</td>
<td align="left">106197175</td>
<td align="left">106197176</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">TET2</td>
<td align="left">SNV</td>
<td align="left">11.3</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr5</td>
<td align="left">112175210</td>
<td align="left">&#x2014;</td>
<td align="left">&#x2014;</td>
<td align="left">A</td>
<td align="left">APC</td>
<td align="left">Insertion</td>
<td align="left">5.7</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr5</td>
<td align="left">112176361</td>
<td align="left">112176362</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">APC</td>
<td align="left">SNV</td>
<td align="left">5.23</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr5</td>
<td align="left">149501448</td>
<td align="left">149501449</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">PDGFRB</td>
<td align="left">SNV</td>
<td align="left">4.31</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr5</td>
<td align="left">176517792</td>
<td align="left">176517793</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">FGFR4</td>
<td align="left">SNV</td>
<td align="left">6.2</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr5</td>
<td align="left">180057074</td>
<td align="left">180057075</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">FLT4</td>
<td align="left">SNV</td>
<td align="left">13.05</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr6</td>
<td align="left">30862397</td>
<td align="left">30862398</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">DDR1</td>
<td align="left">SNV</td>
<td align="left">2.17</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr6</td>
<td align="left">117700257</td>
<td align="left">117700258</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">ROS1</td>
<td align="left">SNV</td>
<td align="left">5.66</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr6</td>
<td align="left">117714444</td>
<td align="left">117714445</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">ROS1</td>
<td align="left">SNV</td>
<td align="left">11.47</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr7</td>
<td align="left">55242467</td>
<td align="left">55242481</td>
<td align="left">AATTAAGAGAAGCAA</td>
<td align="left">TTC</td>
<td align="left">EGFR</td>
<td align="left">Complex</td>
<td align="left">1.2</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr7</td>
<td align="left">55249037</td>
<td align="left">55249038</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">EGFR</td>
<td align="left">SNV</td>
<td align="left">2.46</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr7</td>
<td align="left">55249091</td>
<td align="left">55249092</td>
<td align="left">G</td>
<td align="left">C</td>
<td align="left">EGFR</td>
<td align="left">SNV</td>
<td align="left">3.53</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr7</td>
<td align="left">55259514</td>
<td align="left">55259515</td>
<td align="left">T</td>
<td align="left">G</td>
<td align="left">EGFR</td>
<td align="left">SNV</td>
<td align="left">2.16</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr7</td>
<td align="left">116414979</td>
<td align="left">116414980</td>
<td align="left">A</td>
<td align="left">T</td>
<td align="left">MET</td>
<td align="left">SNV</td>
<td align="left">7.79</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr7</td>
<td align="left">140453132</td>
<td align="left">140453136</td>
<td align="left">TTTCA</td>
<td align="left">AT</td>
<td align="left">BRAF</td>
<td align="left">Complex</td>
<td align="left">4.93</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr7</td>
<td align="left">140494237</td>
<td align="left">140494238</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">BRAF</td>
<td align="left">SNV</td>
<td align="left">1.47</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr8</td>
<td align="left">92982976</td>
<td align="left">92982977</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">RUNX1T1</td>
<td align="left">SNV</td>
<td align="left">21.35</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr8</td>
<td align="left">128753093</td>
<td align="left">128753094</td>
<td align="left">C</td>
<td align="left">A</td>
<td align="left">MYC</td>
<td align="left">SNV</td>
<td align="left">4.48</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr9</td>
<td align="left">5070022</td>
<td align="left">5070027</td>
<td align="left">TCACAA</td>
<td align="left">-</td>
<td align="left">JAK2</td>
<td align="left">Deletion</td>
<td align="left">4.07</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr9</td>
<td align="left">8500789</td>
<td align="left">8500790</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">PTPRD</td>
<td align="left">SNV</td>
<td align="left">4.07</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr9</td>
<td align="left">21970965</td>
<td align="left">21970966</td>
<td align="left">C</td>
<td align="left">A</td>
<td align="left">CDKN2A</td>
<td align="left">SNV</td>
<td align="left">9.2</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr9</td>
<td align="left">133760366</td>
<td align="left">133760367</td>
<td align="left">C</td>
<td align="left">A</td>
<td align="left">ABL1</td>
<td align="left">SNV</td>
<td align="left">3.07</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr9</td>
<td align="left">133760951</td>
<td align="left">133760952</td>
<td align="left">A</td>
<td align="left">G</td>
<td align="left">ABL1</td>
<td align="left">SNV</td>
<td align="left">2.16</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr9</td>
<td align="left">139391525</td>
<td align="left">139391526</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">NOTCH1</td>
<td align="left">SNV</td>
<td align="left">6.15</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr9</td>
<td align="left">139396741</td>
<td align="left">139396742</td>
<td align="left">T</td>
<td align="left">C</td>
<td align="left">NOTCH1</td>
<td align="left">SNV</td>
<td align="left">10.74</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr9</td>
<td align="left">139409810</td>
<td align="left">139409811</td>
<td align="left">G</td>
<td align="left">T</td>
<td align="left">NOTCH1</td>
<td align="left">SNV</td>
<td align="left">8.93</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr10</td>
<td align="left">43596167</td>
<td align="left">43596168</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">RET</td>
<td align="left">SNV</td>
<td align="left">4.26</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr10</td>
<td align="left">63851815</td>
<td align="left">63851816</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">ARID5B</td>
<td align="left">SNV</td>
<td align="left">3.65</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr10</td>
<td align="left">89692837</td>
<td align="left">89692842</td>
<td align="left">TCTTGA</td>
<td align="left">&#x2014;</td>
<td align="left">PTEN</td>
<td align="left">Deletion</td>
<td align="left">2.46</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr10</td>
<td align="left">89717612</td>
<td align="left">89717613</td>
<td align="left">C</td>
<td align="left">A</td>
<td align="left">PTEN</td>
<td align="left">SNV</td>
<td align="left">2.61</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr10</td>
<td align="left">123247618</td>
<td align="left">123247620</td>
<td align="left">GAT</td>
<td align="left">&#x2014;</td>
<td align="left">FGFR2</td>
<td align="left">Deletion</td>
<td align="left">2.67</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr11</td>
<td align="left">32417910</td>
<td align="left">&#x2014;</td>
<td align="left">&#x2014;</td>
<td align="left">ACCGT</td>
<td align="left">WT1</td>
<td align="left">Insertion</td>
<td align="left">5.26</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr11</td>
<td align="left">32439125</td>
<td align="left">32439126</td>
<td align="left">T</td>
<td align="left">G</td>
<td align="left">WT1</td>
<td align="left">SNV</td>
<td align="left">5.93</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr11</td>
<td align="left">64573739</td>
<td align="left">64573740</td>
<td align="left">A</td>
<td align="left">G</td>
<td align="left">MEN1</td>
<td align="left">SNV</td>
<td align="left">4.52</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr11</td>
<td align="left">64575435</td>
<td align="left">&#x2014;</td>
<td align="left">&#x2014;</td>
<td align="left">CTGT</td>
<td align="left">MEN1</td>
<td align="left">Insertion</td>
<td align="left">3.63</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr11</td>
<td align="left">108114802</td>
<td align="left">108114803</td>
<td align="left">C</td>
<td align="left">G</td>
<td align="left">ATM</td>
<td align="left">SNV</td>
<td align="left">5.82</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr11</td>
<td align="left">108170483</td>
<td align="left">108170487</td>
<td align="left">TCTCT</td>
<td align="left">&#x2014;</td>
<td align="left">ATM</td>
<td align="left">Deletion</td>
<td align="left">7.41</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr11</td>
<td align="left">108206581</td>
<td align="left">108206582</td>
<td align="left">A</td>
<td align="left">T</td>
<td align="left">ATM</td>
<td align="left">SNV</td>
<td align="left">2.45</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr12</td>
<td align="left">6704522</td>
<td align="left">6704523</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">CHD4</td>
<td align="left">SNV</td>
<td align="left">1.47</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr12</td>
<td align="left">25378590</td>
<td align="left">25378591</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">KRAS</td>
<td align="left">SNV</td>
<td align="left">11.68</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr12</td>
<td align="left">25380259</td>
<td align="left">&#x2014;</td>
<td align="left">&#x2014;</td>
<td align="left">TGCACTGTACTCCTC</td>
<td align="left">KRAS</td>
<td align="left">Insertion</td>
<td align="left">2.53</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr12</td>
<td align="left">58144504</td>
<td align="left">58144505</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">CDK4</td>
<td align="left">SNV</td>
<td align="left">10.31</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr12</td>
<td align="left">115109751</td>
<td align="left">115109752</td>
<td align="left">T</td>
<td align="left">A</td>
<td align="left">TBX3</td>
<td align="left">SNV</td>
<td align="left">9.39</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr13</td>
<td align="left">28599039</td>
<td align="left">28599040</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">FLT3</td>
<td align="left">SNV</td>
<td align="left">9.13</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr13</td>
<td align="left">28608104</td>
<td align="left">&#x2014;</td>
<td align="left">&#x2014;</td>
<td align="left">AAGCACCTGATCCTAG TACCT</td>
<td align="left">FLT3</td>
<td align="left">Insertion</td>
<td align="left">4.49</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr13</td>
<td align="left">28895609</td>
<td align="left">28895610</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">FLT1</td>
<td align="left">SNV</td>
<td align="left">7.31</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr13</td>
<td align="left">28897044</td>
<td align="left">28897045</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">FLT1</td>
<td align="left">SNV</td>
<td align="left">7.75</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr13</td>
<td align="left">48934220</td>
<td align="left">48934221</td>
<td align="left">T</td>
<td align="left">C</td>
<td align="left">RB1</td>
<td align="left">SNV</td>
<td align="left">10.32</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr13</td>
<td align="left">48937054</td>
<td align="left">48937055</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">RB1</td>
<td align="left">SNV</td>
<td align="left">12.29</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr14</td>
<td align="left">75514889</td>
<td align="left">75514890</td>
<td align="left">G</td>
<td align="left">T</td>
<td align="left">MLH3</td>
<td align="left">SNV</td>
<td align="left">18.81</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr14</td>
<td align="left">105246489</td>
<td align="left">105246490</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">AKT1</td>
<td align="left">SNV</td>
<td align="left">2.07</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr15</td>
<td align="left">67358628</td>
<td align="left">67358629</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">SMAD3</td>
<td align="left">SNV</td>
<td align="left">16.51</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr15</td>
<td align="left">67457294</td>
<td align="left">67457295</td>
<td align="left">G</td>
<td align="left">T</td>
<td align="left">SMAD3</td>
<td align="left">SNV</td>
<td align="left">17.73</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr16</td>
<td align="left">2126135</td>
<td align="left">2126136</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">TSC2</td>
<td align="left">SNV</td>
<td align="left">5.86</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr16</td>
<td align="left">3789660</td>
<td align="left">3789661</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">CREBBP</td>
<td align="left">SNV</td>
<td align="left">1.33</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr16</td>
<td align="left">3828109</td>
<td align="left">3828110</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">CREBBP</td>
<td align="left">SNV</td>
<td align="left">4.95</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr17</td>
<td align="left">7574001</td>
<td align="left">7574002</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">TP53</td>
<td align="left">SNV</td>
<td align="left">2.84</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr17</td>
<td align="left">7577105</td>
<td align="left">&#x2014;</td>
<td align="left">&#x2014;</td>
<td align="left">GA</td>
<td align="left">TP53</td>
<td align="left">Insertion</td>
<td align="left">11.64</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr17</td>
<td align="left">29562778</td>
<td align="left">29562779</td>
<td align="left">T</td>
<td align="left">C</td>
<td align="left">NF1</td>
<td align="left">SNV</td>
<td align="left">8.23</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr17</td>
<td align="left">29576056</td>
<td align="left">29576057</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">NF1</td>
<td align="left">SNV</td>
<td align="left">3.05</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr17</td>
<td align="left">42327859</td>
<td align="left">42327860</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">SLC4A1</td>
<td align="left">SNV</td>
<td align="left">4.22</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr18</td>
<td align="left">48581260</td>
<td align="left">48581261</td>
<td align="left">C</td>
<td align="left">T</td>
<td align="left">SMAD4</td>
<td align="left">SNV</td>
<td align="left">8.08</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr18</td>
<td align="left">60985848</td>
<td align="left">60985849</td>
<td align="left">C</td>
<td align="left">G</td>
<td align="left">BCL2</td>
<td align="left">SNV</td>
<td align="left">6.8</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr19</td>
<td align="left">17943615</td>
<td align="left">17943616</td>
<td align="left">T</td>
<td align="left">A</td>
<td align="left">JAK3</td>
<td align="left">SNV</td>
<td align="left">7.35</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr19</td>
<td align="left">17945516</td>
<td align="left">17945517</td>
<td align="left">T</td>
<td align="left">C</td>
<td align="left">JAK3</td>
<td align="left">SNV</td>
<td align="left">9.5</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr22</td>
<td align="left">22142671</td>
<td align="left">22142672</td>
<td align="left">G</td>
<td align="left">T</td>
<td align="left">MAPK1</td>
<td align="left">SNV</td>
<td align="left">8.87</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr22</td>
<td align="left">23523943</td>
<td align="left">23523944</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">BCR</td>
<td align="left">SNV</td>
<td align="left">10.45</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr22</td>
<td align="left">29695597</td>
<td align="left">29695598</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">EWSR1</td>
<td align="left">SNV</td>
<td align="left">4.19</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr22</td>
<td align="left">30032780</td>
<td align="left">30032801</td>
<td align="left">GGA&#x200b;CTC&#x200b;TGG&#x200b;GGC&#x200b;TCC&#x200b;GAG&#x200b;AAA&#x200b;C</td>
<td align="left">&#x2014;</td>
<td align="left">NF2</td>
<td align="left">Deletion</td>
<td align="left">1.17</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chr22</td>
<td align="left">41564512</td>
<td align="left">41564513</td>
<td align="left">G</td>
<td align="left">A</td>
<td align="left">EP300</td>
<td align="left">SNV</td>
<td align="left">4.87</td>
<td align="left">&#x2014;</td>
<td align="left">&#x221a;</td>
</tr>
<tr>
<td align="left">chrX</td>
<td align="left">53440074</td>
<td align="left">53440075</td>
<td align="left">T</td>
<td align="left">G</td>
<td align="left">SMC1A</td>
<td align="left">SNV</td>
<td align="left">15.15</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x221a;&#x2019; indicates a correct result and &#x201c;-&#x201d; indicates no result.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s4-5">
<title>Running speed</title>
<p>Good accuracy is often at the expense of computing time. DeteX increases computing speed by splitting the targeted capture interval by setting the threads. We calculated the time taken by these four softwares to detect SNVs and InDels in 28 pairs of clinical samples (<xref ref-type="fig" rid="F5">Figure 5A</xref>). The samples in <xref ref-type="fig" rid="F5">Figure 5A</xref> are arranged from shortest to longest according to VarDict&#x2019;s running time. It takes much less time than VarDict. Due to the high complexity of the algorithm, it takes slightly longer than VarScan and MuTect. DeteX processes the bam data linearly first and then it outputs the sequencing reads of the variant site in the case sample. Therefore, the more variations exist, the deeper the sequencing depth, and the longer it takes. The number of variants is not only related to the sample itself, but also to the sequencing quality. The higher the quality, the faster the speed is. <xref ref-type="fig" rid="F5">Figure 5B</xref> shows the relationship between the running time of DeteX and the average sequencing depth. It can be seen that there is a high linear relationship between them, and the correlation coefficient reached .8.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Computational performance of DeteX. <bold>(A)</bold> Time consumption of four callers in 28 clinical samples. <bold>(B)</bold> The relationship between running time of DeteX and average sequencing depth.</p>
</caption>
<graphic xlink:href="fgene-13-1118183-g005.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s5">
<title>Discussion</title>
<p>The performance of the Bayesian classification model depends on the probability of the variables, although this distribution model is usually used to detect all kinds of variations, and the actual NGS data is not ideally distributed. Furthermore, when there are many attributes or the attributes are highly correlated, the Bayesian classifier is also effective. This is because in NGS data, with the increase of subclone types, the statistical method of determining mutation based on the likelihood ratio tests becomes unstable. The sequencing errors and true variants are not absolutely distinguished. The reads and base anomalies in non-reference sequences may contain causes of both sequencing background noise and true variants. The Bayesian model is used in mutect, so that false-positive variations exists in the control samples. By filtering the control variation, the real tumor variation is deleted, and its sensitivity is reduced. The Bayesian model used in MuTect could filter out possible variants due to the low requirement resulting in non-variant loci in normal samples being considered as variant loci. MuTect can detect low-frequency variation, and its precision is high enough, but its sensitivity is not enough. Software that detects variants using Fisher&#x2019;s exact test, such as VarDict and VarScan, is sufficient in sensitivity but not enough in precision. If both software are used to detect variants together, the number of false positive variants increases, which is a great challenge for clinical testing cycle time and accuracy. First, we used BC model to ensure the correct detection of high-frequency variation, then used FET algorithm to ensure the detection of low-frequency variation, and finally reduced false positives by various filtering conditions, thus ensuring high sensitivity and specificity of the test results.</p>
<p>When detecting substitution and complex variants, DeteX fully considers whether multiple adjacent variants occur simultaneously, which is expressed in the data as to whether they occur in the same reads. If two adjacent single nucleotide mutations occur simultaneously, they are considered as a substitution mutation which is written in the form of one variation, such as variation one in <xref ref-type="table" rid="T3">Table 3</xref>. If two mutations do not occur simultaneously, they are considered as two variants which are written in the form of two variants such as variation two or four in <xref ref-type="table" rid="T3">Table 3</xref>. Thus, this ensures the correctness of detection of substitution mutation. For complex mutations, the situation is similar. The adjacent gap, ins and SNV in the same reads are fully considered. They are combined into one variation, which ensures the accuracy of this mutation detection. Due to lack of local realignment and over-reliance on the comprision results, some of the variants that occur at the end of reads may be softclip off instead of being accurately compared, so the frequency of variant may be slightly lower, which needs to be improved.</p>
<p>DeteX method for detecting single sample mutations is the same as that of detecting somatic mutations in paired samples, except that there is no normal sample as a control. Therefore, it can detect low-frequency variation very well. Compared with the HaplotypeCaller in Gatk, which is commonly used to detect single sample mutations, DeteX is much more sensitive. This greatly improves the efficiency of detecting somatic mutations in tumor samples without control samples. It offers the possibility of reducing costs and provide testing for more patients.</p>
</sec>
<sec sec-type="conclusion" id="s6">
<title>Conclusion</title>
<p>Compared to other softwares, DeteX has higher sensitivity and precision in detecting systemic SNV and InDel mutations in tumor samples, especially in the detecting substitution and complex variants. This enhances our confidence in detecting sparse driver mutations in tumor samples, reduces the workload of relevant staff and improves detection efficiency. Clinical applicable mutation detection software with high sensitivity and specificity is very important for patient therapy and clinical research. The advent of our software has led to significant advances in clinical genetic testing.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/<xref ref-type="sec" rid="s12">Supplementary Material</xref>.</p>
</sec>
<sec id="s8">
<title>Author contributions</title>
<p>XF and CT conceived the idea and designed the study. YC, HL, and PL wrote the main text. HW and ZZ contributed to data analysis and prepared figures. HQ validated the study. All authors read and approved the final manuscript.</p>
</sec>
<sec id="s9">
<title>Funding</title>
<p>This work was supported by the National Key Research and Development Program of China (2022YFC2406803 and 2020YFC2003400).</p>
</sec>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>Authors ZZ and CT are employed by the Tianjin Marvel Medical Laboratory, Tianjin Marvelbio Technology Co., Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2022.1118183/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2022.1118183/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table2.xlsx" id="SM1" mimetype="application/xlsx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table3.xlsx" id="SM2" mimetype="application/xlsx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table6.xlsx" id="SM3" mimetype="application/xlsx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table4.xlsx" id="SM4" mimetype="application/xlsx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.xlsx" id="SM5" mimetype="application/xlsx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table5.xlsx" id="SM6" mimetype="application/xlsx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<sec id="s13">
<title>Abbreviations</title>
<p>NGS, next-generation sequencing; SNVs, single nucleotide variants; MNVs, multi-nucleotide variants; InDels, insertion and deletions; FET, Fisher&#x2019;s Exact Test; BC, Bayesian Classifier; VAFs, variant allele frequencies; PCR, polymerase chain reaction; BED, browser extensible data; AD, allele depth; AF, allele frequency; cfDNA, cell-free DNA.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arifuzzaman</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mitra</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hamza</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Absar</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Dash</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>
<italic>In silico</italic> analysis of nonsynonymous single-nucleotide polymorphisms (nsSNPs) of the SMPX gene</article-title>. <source>Ann. Hum. Genet.</source> <volume>84</volume> (<issue>1</issue>), <fpage>54</fpage>&#x2013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1111/ahg.12350</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Banerji</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cibulskis</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Rangel-Escareno</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>K. K.</given-names>
</name>
<name>
<surname>Carter</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Frederick</surname>
<given-names>A. M.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Sequence analysis of mutations and translocations across breast cancer subtypes</article-title>. <source>Nature</source> <volume>486</volume> (<issue>7403</issue>), <fpage>405</fpage>&#x2013;<lpage>409</lpage>. <pub-id pub-id-type="doi">10.1038/nature11154</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carter</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Cibulskis</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Helman</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>McKenna</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zack</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Absolute quantification of somatic DNA alterations in human cancer</article-title>. <source>Nat. Biotechnol.</source> <volume>30</volume> (<issue>5</issue>), <fpage>413</fpage>&#x2013;<lpage>421</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.2203</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chiang</surname>
<given-names>H. L.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y. T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Identification of functional single nucleotide polymorphisms in the branchpoint site</article-title>. <source>Hum. Genomics</source> <volume>11</volume> (<issue>1</issue>), <fpage>27</fpage>. <pub-id pub-id-type="doi">10.1186/s40246-017-0122-6</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cibulskis</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lawrence</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Carter</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Sivachenko</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jaffe</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sougnez</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Sensitive detection of somatic point mutations in impure and heterogeneous cancer samples</article-title>. <source>Nat. Biotechnol.</source> <volume>31</volume> (<issue>3</issue>), <fpage>213</fpage>&#x2013;<lpage>219</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.2514</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>DePristo</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Banks</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Poplin</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Garimella</surname>
<given-names>K. V.</given-names>
</name>
<name>
<surname>Maguire</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Hartl</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>A framework for variation discovery and genotyping using next-generation DNA sequencing data</article-title>. <source>Nat. Genet.</source> <volume>43</volume> (<issue>5</issue>), <fpage>491</fpage>&#x2013;<lpage>498</lpage>. <pub-id pub-id-type="doi">10.1038/ng.806</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ley</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Larson</surname>
<given-names>D. E.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Koboldt</surname>
<given-names>D. C.</given-names>
</name>
<name>
<surname>Welch</surname>
<given-names>J. S.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Clonal evolution in relapsed acute myeloid leukaemia revealed by whole-genome sequencing</article-title>. <source>Nature</source> <volume>481</volume> (<issue>7382</issue>), <fpage>506</fpage>&#x2013;<lpage>510</lpage>. <pub-id pub-id-type="doi">10.1038/nature10738</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eenst</surname>
<given-names>E. H.</given-names>
</name>
<name>
<surname>Grondahl</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Grund</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hardy</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Heuck</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sunde</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Dormancy and activation of human oocytes from primordial and primary follicles: Molecular clues to oocyte regulation</article-title>. <source>Hum. Reprod.</source> <volume>32</volume> (<issue>8</issue>), <fpage>1684</fpage>&#x2013;<lpage>1700</lpage>. <pub-id pub-id-type="doi">10.1093/humrep/dex238</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Forbes</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Beare</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Boutselakis</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Bamford</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bindal</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Tate</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>COSMIC: Somatic cancer genetics at high-resolution</article-title>. <source>Nucleic Acids Res.</source> <volume>45</volume> (<issue>D1</issue>), <fpage>D777</fpage>&#x2013;<lpage>D783</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkw1121</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Frampton</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Fichtenholtz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Otto</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Downing</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Development and validation of a clinical cancer genomic profiling test based on massively parallel DNA sequencing</article-title>. <source>Nat. Biotechnol.</source> <volume>31</volume> (<issue>11</issue>), <fpage>1023</fpage>&#x2013;<lpage>1031</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.2696</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kandoth</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Schultz</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Cherniack</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Akbani</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y. X.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Integrated genomic characterization of endometrial carcinoma</article-title>. <source>Nature</source> <volume>497</volume> (<issue>7447</issue>), <fpage>67</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1038/nature12113</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kircher</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Heyn</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kelso</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Addressing challenges in the production and analysis of Illumina sequencing data</article-title>. <source>BMC Genomics</source> <volume>12</volume>, <fpage>382</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2164-12-382</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koboldt</surname>
<given-names>D. C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q. Y.</given-names>
</name>
<name>
<surname>Larson</surname>
<given-names>D. E.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>McLellan</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>VarScan2:somatic mutation and copy number alteration discovery in cancer by exome sequencing</article-title>. <source>Genome Res.</source> <volume>22</volume> (<issue>3</issue>), <fpage>568</fpage>&#x2013;<lpage>576</lpage>. <pub-id pub-id-type="doi">10.1101/gr.129684.111</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lai</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Markovets</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ahdesmaki</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chapman</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Hofmann</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>McEwen</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>VarDict: A novel and versatile variant caller for next-generation sequencing in cancer research</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume> (<issue>11</issue>), <fpage>e108</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gkw227</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Landau</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Carter</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Stojanov</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mckenna</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Stevenson</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lawrence</surname>
<given-names>M. S.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Evolution and impact of subclonal mutations in chronic lymphocytic leukemia</article-title>. <source>Cell</source> <volume>152</volume> (<issue>4</issue>), <fpage>714</fpage>&#x2013;<lpage>726</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2013.01.019</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langmead</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Salzberg</surname>
<given-names>S. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Fast gapped-read alignment with Bowtie 2</article-title>. <source>Nat. Meth</source> <volume>9</volume> (<issue>4</issue>), <fpage>357</fpage>&#x2013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.1923</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langmead</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Trapnell</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pop</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Salzberg</surname>
<given-names>S. L.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Ultrafast and memory-efficient alignment of short DNA sequences to the human genome</article-title>. <source>Genome Biol.</source> <volume>10</volume> (<issue>3</issue>), <fpage>R25</fpage>. <pub-id pub-id-type="doi">10.1186/gb-2009-10-3-r25</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langmead</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wilks</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Antonescu</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Charles</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Scaling read aligners to hundreds of threads on general-purpose processors</article-title>. <source>Bioinformatics</source> <volume>35</volume> (<issue>3</issue>), <fpage>421</fpage>&#x2013;<lpage>432</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty648</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Durbin</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Fast and accurate short read alignment with Burrows-Wheeler Transform</article-title>. <source>Bioinformatics</source> <volume>25</volume> (<issue>14</issue>), <fpage>1754</fpage>&#x2013;<lpage>1760</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp324</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McKenna</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hanna</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Banks</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Sivachenko</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cibulskis</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kernytsky</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>The genome analysis toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data</article-title>. <source>Genome Res.</source> <volume>20</volume> (<issue>9</issue>), <fpage>1297</fpage>&#x2013;<lpage>1303</lpage>. <pub-id pub-id-type="doi">10.1101/gr.107524.110</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Metzker</surname>
<given-names>M. L.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Sequencing technologies &#x2014; The next generation</article-title>. <source>Nat. Rev. Genet.</source> <volume>11</volume> (<issue>1</issue>), <fpage>31</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2626</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>O&#x27;Rawe</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>G. Q.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>J. C.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Low concordance of multiple variant-calling pipelines: Practical implications for exome and genome sequencing</article-title>. <source>Genome Med.</source> <volume>5</volume> (<issue>3</issue>), <fpage>28</fpage>. <pub-id pub-id-type="doi">10.1186/gm432</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Robasky</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lewis</surname>
<given-names>N. E.</given-names>
</name>
<name>
<surname>Church</surname>
<given-names>G. M.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>The role of replicates for error mitigation in next-generation sequencing</article-title>. <source>Nat. Rev. Genet.</source> <volume>15</volume> (<issue>1</issue>), <fpage>56</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3655</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sherry</surname>
<given-names>S. T.</given-names>
</name>
<name>
<surname>Ward</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Kholodov</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Baker</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Phan</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Smigielski</surname>
<given-names>E. M.</given-names>
</name>
<etal/>
</person-group> (<year>2001</year>). <article-title>dbSNP: the NCBI database of genetic variation</article-title>. <source>Nucleic Acids Res.</source> <volume>29</volume> (<issue>1</issue>), <fpage>308</fpage>&#x2013;<lpage>311</lpage>. <pub-id pub-id-type="doi">10.1093/nar/29.1.308</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smigielski</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Sirotkin</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ward</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sherry</surname>
<given-names>S. T.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>dbSNP: a database of single nucleotide polymorphisms</article-title>. <source>Nucleic Acids Res.</source> <volume>28</volume> (<issue>1</issue>), <fpage>352</fpage>&#x2013;<lpage>355</lpage>. <pub-id pub-id-type="doi">10.1093/nar/28.1.352</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Gardner</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Hovhannisyan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Natalizio</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Weymouth</surname>
<given-names>K. S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Exploring the landscape of pathogenic genetic variation in the ExAC population database: Insights of relevance to variant classification</article-title>. <source>Genet. Med.</source> <volume>18</volume> (<issue>8</issue>), <fpage>850</fpage>&#x2013;<lpage>854</lpage>. <pub-id pub-id-type="doi">10.1038/gim.2015.180</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stransky</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Egloff</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Tward</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Kostic</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Cibulskis</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Sivachenko</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>The mutational landscape of head and neck squamous cell carcinoma</article-title>. <source>Science</source> <volume>333</volume> (<issue>6046</issue>), <fpage>1157</fpage>&#x2013;<lpage>1160</lpage>. <pub-id pub-id-type="doi">10.1126/science.1208130</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stratton</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Exploring the genomes of cancer cells: Progress and promise</article-title>. <source>Science</source> <volume>331</volume> (<issue>6024</issue>), <fpage>1553</fpage>&#x2013;<lpage>1558</lpage>. <pub-id pub-id-type="doi">10.1126/science.1204040</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trapnell</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pachter</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Salzberg</surname>
<given-names>S. L.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>TopHat: Discovering splice junctions with RNA-seq</article-title>. <source>Bioinformatics</source> <volume>25</volume> (<issue>9</issue>), <fpage>1105</fpage>&#x2013;<lpage>1111</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp120</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>