<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="methods-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Immunol.</journal-id>
<journal-title>Frontiers in Immunology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Immunol.</abbrev-journal-title>
<issn pub-type="epub">1664-3224</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fimmu.2016.00372</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Immunology</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>SONAR: A High-Throughput Pipeline for Inferring Antibody Ontogenies from Longitudinal Sequencing of B Cell Transcripts</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Schramm</surname> <given-names>Chaim A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x0002A;</xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://frontiersin.org/people/u/375370"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Sheng</surname> <given-names>Zizhang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://frontiersin.org/people/u/371221"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Zhenhai</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02021;</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Mascola</surname> <given-names>John R.</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Kwong</surname> <given-names>Peter D.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x0002A;</xref>
<uri xlink:href="http://frontiersin.org/people/u/15899"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Shapiro</surname> <given-names>Lawrence</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x0002A;</xref>
<uri xlink:href="http://frontiersin.org/people/u/354152"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Biochemistry and Molecular Biophysics, Columbia University</institution>, <addr-line>New York, NY</addr-line>, <country>USA</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Systems Biology, Columbia University</institution>, <addr-line>New York, NY</addr-line>, <country>USA</country></aff>
<aff id="aff3"><sup>3</sup><institution>Vaccine Research Center, National Institute of Allergy and Infectious Diseases, National Institutes of Health</institution>, <addr-line>Bethesda, MD</addr-line>, <country>USA</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Ignacio Sanz, University of Rochester, USA</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Gregory C. Ippolito, University of Texas at Austin, USA; Felix Breden, Simon Fraser University, Canada</p></fn>
<corresp content-type="corresp" id="cor1">&#x0002A;Correspondence: Chaim A. Schramm, <email>chaim.schramm&#x00040;nih.gov</email>; Peter D. Kwong, <email>pdkwong&#x00040;nih.gov</email>; Lawrence Shapiro, <email>shapiro&#x00040;convex.hhmi.columbia.edu</email></corresp>
<fn fn-type="present-address" id="fn001"><p><sup>&#x02020;</sup>Chaim A. Schramm, Zizhang Sheng, and Zhenhai Zhang contributed equally.</p></fn>
<fn fn-type="present-address" id="fn002"><p><sup>&#x02021;</sup>Present address: Zhenhai Zhang, National Clinical Research Center for Kidney Disease, Ministry of Education, Nanfang Hospital, Southern Medical University, Guangzhou, Guangdong, China;Key Laboratory of Organ Failure Research, Ministry of Education, Nanfang Hospital, Southern Medical University, Guangzhou, Guangdong, China</p></fn>
<fn fn-type="other" id="fn003"><p>Specialty section: This article was submitted to B Cell Biology, a section of the journal Frontiers in Immunology</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>09</month>
<year>2016</year>
</pub-date>
<pub-date pub-type="collection">
<year>2016</year>
</pub-date>
<volume>7</volume>
<elocation-id>372</elocation-id>
<history>
<date date-type="received">
<day>08</day>
<month>06</month>
<year>2016</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>09</month>
<year>2016</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2016 Schramm, Sheng, Zhang, Mascola, Kwong and Shapiro.</copyright-statement>
<copyright-year>2016</copyright-year>
<copyright-holder>Schramm, Sheng, Zhang, Mascola, Kwong and Shapiro</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) or licensor are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>The rapid advance of massively parallel or next-generation sequencing technologies has made possible the characterization of B cell receptor repertoires in ever greater detail, and these developments have triggered a proliferation of software tools for processing and annotating these data. Of especial interest, however, is the capability to track the development of specific antibody lineages across time, which remains beyond the scope of most current programs. We have previously reported on the use of techniques such as inter- and intradonor analysis and CDR3 tracing to identify transcripts related to an antibody of interest. Here, we present Software for the Ontogenic aNalysis of Antibody Repertoires (SONAR), capable of automating both general repertoire analysis and specialized techniques for investigating specific lineages. SONAR annotates next-generation sequencing data, identifies transcripts in a lineage of interest, and tracks lineage development across multiple time points. SONAR also generates figures, such as identity&#x02013;divergence plots and longitudinal phylogenetic &#x0201C;birthday&#x0201D; trees, and provides interfaces to other programs such as DNAML and BEAST. SONAR can be downloaded as a ready-to-run Docker image or manually installed on a local machine. In the latter case, it can also be configured to take advantage of a high-performance computing cluster for the most computationally intensive steps, if available. In summary, this software provides a useful new tool for the processing of large next-generation sequencing datasets and the ontogenic analysis of neutralizing antibody lineages. SONAR can be found at <uri xlink:href="https://github.com/scharch/SONAR">https://github.com/scharch/SONAR</uri>, and the Docker image can be obtained from <uri xlink:href="https://hub.docker.com/r/scharch/sonar/">https://hub.docker.com/r/scharch/sonar/</uri>.</p>
</abstract>
<kwd-group>
<kwd>antibody repertoire</kwd>
<kwd>antibody lineage</kwd>
<kwd>antibody maturation</kwd>
<kwd>B cell ontogeny</kwd>
<kwd>longitudinal analysis</kwd>
<kwd>next-generation sequencing</kwd>
</kwd-group>
<contract-num rid="cn01">P01 AI104722-3, U01 AI116086-01</contract-num>
<contract-sponsor id="cn01">National Institutes of Health<named-content content-type="fundref-id">10.13039/100000002</named-content></contract-sponsor>
<contract-sponsor id="cn02">Division of Intramural Research, National Institute of Allergy and Infectious Diseases<named-content content-type="fundref-id">10.13039/100006492</named-content></contract-sponsor>
<counts>
<fig-count count="4"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="57"/>
<page-count count="10"/>
<word-count count="7085"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1" sec-type="introduction">
<title>Introduction</title>
<p>Antibodies, the soluble form of B cell receptors (BCRs), play a critical role in adaptive immunity. Approximately 50 million naive B cells are generated <italic>via</italic> V(D)J recombination in the bone marrow each day. Due to the combinatorial possibilities of recombination and the inclusion of non-templated &#x0201C;N&#x0201D; and &#x0201C;P&#x0201D; nucleotides, each naive B cell generally expresses a unique BCR (<xref ref-type="bibr" rid="B1">1</xref>). If a naive B cell encounters an antigen that can be bound by its receptor and is stimulated by a cognate T cell, it will begin proliferating. As B cells proliferate, they express activation-induced cytidine deaminase, which causes the rapid accumulation of somatic hypermutation in the BCR gene (<xref ref-type="bibr" rid="B2">2</xref>). Daughter cells descended from the same naive B cell form a B cell lineage. The typical human B cell repertoire has been estimated to contain &#x0007E;30,000 highly expanded IgM, IgG, and IgA lineages as well as &#x0007E;5 million low-expansion IgM lineages at any given time (<xref ref-type="bibr" rid="B3">3</xref>).</p>
<p>The mutated BCRs expressed by the cells of a B cell lineage are selected for binding to antigen. In this way, the adaptive immune system can produce antibodies capable of binding to and protecting against nearly any invading pathogen. Most effective vaccines work by eliciting neutralizing antibodies (<xref ref-type="bibr" rid="B4">4</xref>), and many recombinant antibodies are now being used as therapeutics (<xref ref-type="bibr" rid="B5">5</xref>). In addition, B cell dysfunction may result in autoimmune diseases, such as systemic lupus erythematosus (<xref ref-type="bibr" rid="B6">6</xref>), and various B cell lymphomas (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B8">8</xref>), among others. Understanding each of these B cell-related diseases requires knowledge of the properties and dynamics of natural antibody repertoires and how these properties change in response to factors such as age, vaccination, and disease.</p>
<p>A particularly important area of research is the generation and development (ontogeny) of individual B cell lineages and ontogeny-based vaccine design (<xref ref-type="bibr" rid="B9">9</xref>). These studies can reveal not only the mechanisms of modulating antibody-affinity maturation and neutralization breadth development (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B10">10</xref>&#x02013;<xref ref-type="bibr" rid="B12">12</xref>) but also help to find related antibodies that are more suitable for use as therapeutics (<xref ref-type="bibr" rid="B13">13</xref>&#x02013;<xref ref-type="bibr" rid="B15">15</xref>). However, several obstacles must be overcome to define the history and maturation of a single lineage. First, out of a total repertoire of millions of antibody lineages (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B16">16</xref>), even a highly expanded lineage may constitute at most only up to 0.1% of the overall B cell population (<xref ref-type="bibr" rid="B16">16</xref>). Thus, careful selection procedures and/or extensive sampling are required in order to gain sufficient representation. The rapid development of next-generation sequencing technology (<xref ref-type="bibr" rid="B17">17</xref>&#x02013;<xref ref-type="bibr" rid="B19">19</xref>) has ameliorated the first of these problems. It is now possible to obtain millions of reads quickly and cheaply, making it possible to sample the antibody repertoire at great depth. To help manage and process these data, a wealth of software tools have been introduced, most notably IMGT-vQuest (<xref ref-type="bibr" rid="B20">20</xref>), JoinSolver (<xref ref-type="bibr" rid="B21">21</xref>, <xref ref-type="bibr" rid="B22">22</xref>), and IgBlast (<xref ref-type="bibr" rid="B23">23</xref>), as well as more recent tools such as VDJSeq-Solver (<xref ref-type="bibr" rid="B24">24</xref>), ImmunediveRsity (<xref ref-type="bibr" rid="B25">25</xref>), IMonitor (<xref ref-type="bibr" rid="B26">26</xref>), CloAnalyst (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B28">28</xref>), and partis (<xref ref-type="bibr" rid="B29">29</xref>).</p>
<p>Even with adequate sampling, it can be difficult to determine which antibodies are members of the same B cell lineage, as there will generally be multiple lineages which share the same V and J gene. The recombination region &#x02013; including 5&#x02032; and 3&#x02032; excisions, N and P added nucleotides, and (for heavy chains) the choice of D gene &#x02013; is generally regarded as a definitive signature of membership in a single B cell lineage [e.g., Ref. (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B25">25</xref>, <xref ref-type="bibr" rid="B30">30</xref>&#x02013;<xref ref-type="bibr" rid="B32">32</xref>)]. However, such signatures can be obscured by sequencing error and somatic hypermutation (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B33">33</xref>), unless patterns of mutations across the entire variable region are taken into account (<xref ref-type="bibr" rid="B34">34</xref>).<xref ref-type="fn" rid="fn1"><sup>1</sup></xref> The light chains of a lineage are even more difficult to assess, as they do not contain a D gene. A somewhat simpler problem than <italic>de novo</italic> or &#x0201C;unseeded&#x0201D; lineage identification is finding only those transcripts which are in the same lineage as a known &#x0201C;seed&#x0201D; antibody sequence, such as an antibody identified by cell sorting or culture. We have previously reported several methods for addressing this question, including identity&#x02013;divergence plots (<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B36">36</xref>), inter- and intra donor phylogenetic analysis (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B35">35</xref>), and CDR3 clustering (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B35">35</xref>).</p>
<p>Once a group of transcripts in a lineage have been identified, phylogenetic analysis can be used to build a tree showing how the lineage developed and infer the sequence of unobserved ancestral sequences. While a few tools are available for this task (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B37">37</xref>, <xref ref-type="bibr" rid="B38">38</xref>), they do not distinguish transcripts from different time points or allow direct and explicit analysis of how a lineage evolves over time. Longitudinal information can be extremely important, however, for indicating whether a lineage is static or continuing to mature (<xref ref-type="bibr" rid="B12">12</xref>) and providing the ability to trace co-evolution with a viral pathogen (<xref ref-type="bibr" rid="B10">10</xref>, <xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B39">39</xref>, <xref ref-type="bibr" rid="B40">40</xref>).</p>
<p>Here, we present the Software for the Ontogenic aNalysis of Antibody Repertoires (SONAR), an integrated pipeline for performing all of these types of analyses in a single environment. SONAR focuses on the analysis of longitudinal data to understand the development of a single antibody lineage over time. Early versions of this pipeline were used to successfully trace the development of broadly neutralizing antibodies against HIV-1 such as CAP256-VRC26 (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B39">39</xref>, <xref ref-type="bibr" rid="B41">41</xref>) and VRC01 (<xref ref-type="bibr" rid="B12">12</xref>); it has now been extensively overhauled for efficiency and readability, and many new features have been added. Here, we release SONAR as open software under the GNU General Public License. SONAR source code is available from GitHub or as a platform-independent Docker image with all required dependencies already installed.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="S2-1">
<title>Computer Hardware and Software Requirements</title>
<p>The SONAR pipeline can be run on any operation system (OS) using the Docker image found at <uri xlink:href="https://hub.docker.com/r/scharch/sonar/">https://hub.docker.com/r/scharch/sonar/</uri>. Local installation is available for Unix-based operating systems and requires Python 2.7 with the BioPython package (<xref ref-type="bibr" rid="B42">42</xref>); Perl 5 or higher with the BioPerl module (<xref ref-type="bibr" rid="B43">43</xref>); R with the ggplot2, grid, and MASS libraries; and BLAST&#x0002B; (<xref ref-type="bibr" rid="B44">44</xref>). For full functionality, the following programs are also required: FASTX-Toolkit,<xref ref-type="fn" rid="fn2"><sup>2</sup></xref> USEARCH v8 (<xref ref-type="bibr" rid="B45">45</xref>), MUSCLE v3.8 (<xref ref-type="bibr" rid="B46">46</xref>), DNAML (<xref ref-type="bibr" rid="B47">47</xref>), BEAST2 (<xref ref-type="bibr" rid="B48">48</xref>), the ete2 Python package (<xref ref-type="bibr" rid="B49">49</xref>), and docopt for Python and R.<xref ref-type="fn" rid="fn3"><sup>3</sup></xref></p>
</sec>
<sec id="S2-2">
<title>License and Distribution</title>
<p>Software for the Ontogenic aNalysis of Antibody Repertoires is made available under the GNU General Public License, version 3. Permission is granted to modify and redistribute SONAR in any fashion so long as the original copyright notice remains intact and any changes are clearly marked. Source code can be downloaded from <uri xlink:href="https://github.com/scharch/SONAR">https://github.com/scharch/SONAR</uri>.</p>
</sec>
<sec id="S2-3">
<title>Reference Germline Gene Sequences</title>
<p>Reference human germline gene sequences were downloaded from the IMGT database (release 201631-4, August 4, 2016). Alleles marked by IMGT as &#x0201C;ORF&#x0201D; or &#x0201C;P&#x0201D; are excluded from the default databases; however, files with all IMGT alleles are included, as well.</p>
</sec>
<sec id="S2-4">
<title>Sample Deep-Sequencing Data</title>
<p>The examples shown here make use of previously published 454 data from donor CAP256 (<xref ref-type="bibr" rid="B11">11</xref>) and can be downloaded from the NCBI Sequence Reads Archive under accession number SRP034555.</p>
</sec>
</sec>
<sec id="S3">
<title>Results</title>
<sec id="S3-1">
<title>Overview of SONAR</title>
<p>To run SONAR locally, download the source code from GitHub and run the setup.sh bash script. This script will ask for the installation paths of needed accessory programs and make this information available to the main SONAR programs. The setup.sh script also allows SONAR to be set up to use a Grid Engine-managed computing cluster, enabling parallel processing of large datasets.</p>
<p>The setup procedure only needs to be run the first time that SONAR is downloaded; updates to the source code can be downloaded without overwriting user-specific data. Alternatively, a ready-to-use Docker image can be obtained from Docker hub and run using the command:
<disp-quote>
<p><monospace>docker run -i -t -v /path/to/local/project:/project scharch/sonar</monospace></p>
</disp-quote></p>
<p>where &#x0003C;project&#x0003E; is the name of project with data to be analyzed, and the path indicates its location on the local disk.</p>
<p>Because many different sequencing protocols are used to generate antibody repertoire data, SONAR expects transcripts that have already been preprocessed, if necessary. This can include separating different experiments based on barcodes and/or collapsing redundant transcripts using molecular ID tags. SONAR does offer a script to merge paired-end reads from the Illumina MiSeq platform and to remove transcripts with the expected number of errors above a chosen threshold using USEARCH (<xref ref-type="bibr" rid="B45">45</xref>), but other forms of quality control must be performed manually before running the SONAR pipeline.</p>
<p>Software for the Ontogenic aNalysis of Antibody Repertoires proceeds in three conceptual steps (Figure <xref ref-type="fig" rid="F1">1</xref>). First, it annotates the bulk transcripts using BLAST&#x0002B; (<xref ref-type="bibr" rid="B44">44</xref>), which produces a picture of the overall repertoire sampled by a single experiment. Second, SONAR attempts to classify transcripts into distinct lineages, using either seeded or unseeded techniques. Finally, SONAR combines related transcripts from multiple time points or experiments to conduct an ontogenic analysis.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p><bold>Overview of SONAR workflow</bold>. Green represents input data, blue indicates analysis steps, and red denotes graphical output.</p></caption>
<graphic xlink:href="fimmu-07-00372-g001.tif"/>
</fig>
<p>All SONAR scripts can be called with a -h or -help option to print detailed documentation and usage options at the command line. This documentation will also typically be produced if a script is called with insufficient or incorrectly formatted options.</p>
</sec>
<sec id="S3-2">
<title>Module 1: Annotation</title>
<p>This module characterizes the overall repertoire captured by sequencing. To do so, the germline V(D)J gene of each transcript is assigned using BLAST&#x0002B; with optimized parameters. Because IgBlast was not available as a stand-alone program that could be run locally when we began building SONAR, we developed separate scripts to find the V and J genes and assign the boundaries of CDR3 using the alignment boundaries output by BLAST. While a blunt tool, such as BLAST, cannot resolve uncertainty in the assignment of the exact allele of a particular germline gene used in recombination (<xref ref-type="bibr" rid="B29">29</xref>), SONAR is designed primarily for use with highly mutated neutralizing antibody sequences, for which a definitive assignment is often not possible. SONAR does report the top allele found by BLAST but only uses the gene for all phylogenetic analyses. In addition, the exact alleles carried can vary widely among different donors (<xref ref-type="bibr" rid="B50">50</xref>), and this information is typically not available. Similarly, SONAR currently makes no attempt to assign the exact boundaries of recombination, as this information is often obscured for highly mutated antibodies (<xref ref-type="bibr" rid="B29">29</xref>). In addition, the IMGT databases included in the distribution contain some alleles with identical sequences but multiple designators (e.g., IGHV3-30&#x0002A;18 and IGHV3-30-5&#x0002A;01 or IGKV1-12&#x0002A;02 and IGKV1D-12&#x0002A;02), which cannot be distinguished by BLAST, and SONAR shares this limitation. The output from this module includes a master table with the disposition of each input transcript and summary statistics for gene usage. This information can be passed to the plotting module to create figures describing the repertoire (Figure <xref ref-type="fig" rid="F2">2</xref>).</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p><bold>Sample figures for Module 1 analyses</bold>. <bold>(A)</bold> The V gene usage for donor CAP256 at 59&#x02009;weeks post-infection (SRA ascension SRX395942). cDNA was amplified with VH3 family-specific primers; the peak for VH3-30 is from the expansion of the CAP256-VRC26 lineage. <bold>(B)</bold> The J gene usage for the same dataset. The CAP256-VRC26 lineage uses JH3. <bold>(C)</bold> The status assigned to each transcript in this dataset by SONAR. Approximately 30% of the transcripts are identified as containing in-dels (light green), which is typical for uncorrected 454 data. Approximately 60% are high-quality transcripts (dark purple), but only &#x0007E;2% of these (&#x0007E;3% overall) are non-redundant at the 97% threshold (light purple). <bold>(D)</bold> CDR H3 length distribution (in amino acids, IMGT delineation) for all nine donor CAP256 time points. The CAP256-VRC26 lineage can be seen in the peak at 37 amino acids, which first appears at the 34&#x02009;weeks post-infection time point.</p></caption>
<graphic xlink:href="fimmu-07-00372-g002.tif"/>
</fig>
<sec id="S3-2-1">
<title>1.0-MiSeq_assembly.pl</title>
<p>This optional script merges paired-end reads from Illumina MiSeq (or HiSeq) and removes reads that cannot be merged or are of low quality. Trimming is done <italic>via</italic> the FastX Toolkit, and merging is done with USEARCH. Prior to merging, reads can be trimmed by a specific number of nucleotides or based on quality scores. Low quality reads can be discarded after merging using the number of expected miscalled bases (as calculated by USEARCH from the quality scores at each position).</p>
</sec>
<sec id="S3-2-2">
<title>1.1-blast_V.py</title>
<p>This script initiates the analysis for each project. The name of the current working folder is used as the project name, which is used as the stem for all output files. New directories are created for working files and processed output. If the work or output directories already exist, the script exits with an error unless the -f (force) flag has been specified. This prevents accidental overwriting of existing data.</p>
<p>By default, all fasta and fastq files in the work directory are processed, but a specific file or files can be stipulated. Reads which are too short or too long to correspond to an antibody variable region are discarded. Input sequences are broken into groups and blasted against a library of germline V genes. Human heavy, kappa, and lambda libraries are included with the source code, but a custom library can be specified using the -lib option. By default, BLAST&#x0002B; is run locally using one thread; however, multiple threads can be used or the individual blast jobs can be submitted to a cluster if one is present.</p>
</sec>
<sec id="S3-2-3">
<title>1.2-blast_J.py</title>
<p>This script parses the output of BLAST&#x0002B; from 1.1-blast_V.py to extract the assigned germline V gene and generates new BLAST&#x0002B; jobs to search for the germline J gene. To improve assignment efficiency, only the portion of the NGS transcript after the 3&#x02032; end of the V gene match is scanned; transcripts with no matched V gene are discarded. By default, this script also uses BLAST&#x0002B; to assign the constant region and D gene for heavy chain transcripts, but this functionality can be disabled to speed up processing time. Outputs from this script are text tables in output/tables with the top V gene hit for each transcript and a summary of how many times each V gene allele is observed in the dataset.</p>
</sec>
<sec id="S3-2-4">
<title>1.3-finalize_assignments.py</title>
<p>This script parses the output of BLAST&#x0002B; from 1.2-blast_J.py to extract the assigned germline J gene and uses the boundaries of the V and J gene alignment to extract CDR3. Each transcript is also checked for frameshifts and stop codons, and a final status is assigned. Outputs in output/tables include top assignments and summary tables for J genes (plus D genes and constant regions, if applicable). In addition, a master table is generated indicating the source, characteristics, and disposition of each transcript. In output/sequences are files with various subsets of the input sequences, including all transcripts with successful V and J assignments, successful CDR3 extraction, and transcripts with all of the above plus no detected frameshifts or stop codons. Data about the repertoire can be visualized using 4.1-setup_plots.pl (Figure <xref ref-type="fig" rid="F2">2</xref>).</p>
</sec>
<sec id="S3-2-5">
<title>1.4-dereplicate_sequences.pl</title>
<p>This script uses USEARCH to eliminate redundant transcripts and those below a given sequencing depth threshold. Clustering is also used to account for the introduction of error during PCR and sequencing, eliminating artificial diversity (<xref ref-type="bibr" rid="B36">36</xref>).The default identity threshold for clustering is 99%, and only clusters containing at least three transcripts are retained. Both parameters can be adjusted by the user.</p>
</sec>
</sec>
<sec id="S3-3">
<title>Module 2: Lineage Determination</title>
<p>The process of classifying a set of NGS transcripts into component lineages without any additional information is termed &#x0201C;unseeded lineage assignment.&#x0201D; By contrast, &#x0201C;seeded lineage assignment&#x0201D; uses the sequences of one or more known antibodies as seeds to find all transcripts in the dataset that are from the same lineage, while leaving the remainder of transcripts unclassified. Unseeded lineage assignment is typically accomplished by clustering transcripts based on sequence similarity in CDR3 (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B25">25</xref>, <xref ref-type="bibr" rid="B30">30</xref>&#x02013;<xref ref-type="bibr" rid="B32">32</xref>), though more sophisticated algorithms have recently been described (<xref ref-type="bibr" rid="B34">34</xref>, see footnote text 1). SONAR offers 2.4-cluster_into_groups.py to carry out unseeded lineage assignment, but the suite overall focuses more heavily on seeded lineage assignment, since phylogenetic analysis is carried out on specific lineages. We have previously demonstrated several techniques for effective and efficient seeded lineage assignment, which are included in Module 2 of SONAR (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B36">36</xref>, <xref ref-type="bibr" rid="B40">40</xref>, <xref ref-type="bibr" rid="B51">51</xref>).</p>
<sec id="S3-3-1">
<title>2.1-calculate_id-div.pl</title>
<p>This script carries out seeded lineage assignment, using Muscle (<xref ref-type="bibr" rid="B46">46</xref>) (the default), ClustalO (<xref ref-type="bibr" rid="B52">52</xref>), or MAFFT (<xref ref-type="bibr" rid="B53">53</xref>) to align each transcript to its assigned germline sequence and to known antibody sequences of interest. Output is a table with the percent identity of each transcript to each of the specified known antibody sequences and its percent divergence from germline V gene. These data can be visualized using 4.3-plot_identity_divergence.R (see below) to identify &#x0201C;islands&#x0201D; of transcripts that are likely to be in the same lineage as an antibody or antibodies of interest (Figure <xref ref-type="fig" rid="F3">3</xref>A).</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p><bold>Sample figures for Module 2 analyses</bold>. <bold>(A)</bold> Identity&#x02013;divergence plot of transcripts assigned to VH3-30 for donor CAP256 at 59&#x02009;weeks post-infection. Bulk sequencing data are shown as a heat map with colors as indicated. The CAP256-VRC26 lineage is visible as a distinct island of transcripts at higher identity. <bold>(B)</bold> The same plot with transcripts identified as likely lineage members by intradonor analysis overlaid as orange points. Two thirds of these transcripts are found in the high-identity island; the remaining third in the main body of transcripts at &#x0007E;70% identity are false positives. This is a typical result, showing why multiple tools for lineage determination are included in SONAR and manual curation is strongly advised.</p></caption>
<graphic xlink:href="fimmu-07-00372-g003.tif"/>
</fig>
</sec>
<sec id="S3-3-2">
<title>2.2-get_island.py</title>
<p>Once an island of transcripts likely to be in the same lineage as the seed antibody has been identified on an identity&#x02013;divergence plot, this script can be used to extract the transcripts in the island and save them to a new file in output/sequences/nucleotide.</p>
</sec>
<sec id="S3-3-3">
<title>2.3-intradonor_analysis.py</title>
<p>This script offers a second method to perform seeded lineage assignment by using an iterative phylogenetic analysis to find transcripts, which are in the same lineage as set of known antibodies. Transcripts are randomly split into groups and used together with known antibody sequences to build neighbor-joining trees rooted on the germline V gene of the known antibodies. Transcripts in the minimum sub-tree spanning all of the known sequences are passed forward into the next iteration. The algorithm is considered to have converged when 95% of the input sequences in a round are in the minimum sub-tree, and these transcripts are deemed to be in the same lineage as the known antibodies. The algorithm is generally intended to find somatically related antibodies from a single lineage within a single donor. However, in the special case of VRC01 class antibodies (<xref ref-type="bibr" rid="B35">35</xref>), we have shown that exogenous VRC01 class heavy chains can be used for &#x0201C;cross-donor&#x0201D; analysis to identify a lineage of VRC01 class antibodies within a new donor (<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B54">54</xref>). For both intradonor and cross-donor analysis, the accuracy and specificity of the algorithm depends on the number of seed sequences used and how closely related they are. Various filtering options are available for the transcripts before starting the analysis, and the tree-building steps of each iteration can be submitted to a high-performance computing cluster, if available. 4.3-plot_identity_divergence.R can be used to overlay the transcripts thus identified as in the same lineage on the visualization of the overall repertoire (Figure <xref ref-type="fig" rid="F3">3</xref>B).</p>
</sec>
<sec id="S3-3-4">
<title>2.4-cluster_into_groups.py</title>
<p>This script provides both a third technique for seeded lineage assignment and a basic approach for unseeded lineage assignment. Antibody transcripts are first separated into groups based on assigned V and J genes. The transcripts in each group are then clustered based on their CDR3 nucleotide identity using the UCLUST algorithm in USEARCH, and each cluster is identified as a distinct unseeded lineage. Known antibodies of interest can also be included among the transcripts to be clustered, allowing seeded lineage assignment for one or more lineages (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B35">35</xref>).</p>
</sec>
</sec>
<sec id="S3-4">
<title>Module 3: Phylogenetic Analysis</title>
<p>Once transcripts in the lineage of the seed antibodies have been identified from one or more cross-sectional samples, the overall phylogenetic structure of an antibody lineage can be examined and the ontogeny of the lineage can be inferred. This includes building and analyzing a phylogenetic tree, inferring intermediates along the maturation pathway of an interested antibody, as well as estimating the evolutionary rate of the lineage over time.</p>
<sec id="S3-4-1">
<title>3.1-merge_timepoints.pl</title>
<p>This script collects transcripts in the lineage of the seed antibodies identified at multiple time points using Module 2 and renames them to indicate their temporal origins. A unique label may be specified for each file, such as a sample date or visit code. This script then identifies and collapses transcripts that appear at multiple time points and assigns a &#x0201C;birthday&#x0201D; based on the first observation.</p>
</sec>
<sec id="S3-4-2">
<title>3.2-run_DNAML.py</title>
<p>This is a wrapper script for using DNAML (<xref ref-type="bibr" rid="B47">47</xref>) to build a maximum likelihood tree representing the phylogenetic development of the lineage and to infer unobserved ancestral sequences. In most cases, the user should provide a manually verified, high-quality alignment in PHYLIP format, in order to allow for accurate inference of ancestor sequences. However, the program will call MUSCLE to align the collected transcripts if no alignment is provided. DNAML will be run three times on randomly ordered input, and outgroup rooted on the germline V gene sequence. All other options for DNAML are left at their default settings. The phylogenetic tree produced can be displayed using 4.4-display_tree.py (see below), and an example can be seen in Figure <xref ref-type="fig" rid="F4">4</xref>A.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p><bold>Sample figures for Module 3 analyses</bold>. <bold>(A)</bold> Longitudinal birthday tree of the CAP256-VRC26 lineage. This tree includes 384 NGS transcripts and the 33 isolated monoclonals. <bold>(B)</bold> The same tree displaying only 12 major branches derived from the NGS data and the 33 monoclonals. This allows the structure of the tree to be seen more clearly.</p></caption>
<graphic xlink:href="fimmu-07-00372-g004.tif"/>
</fig>
</sec>
<sec id="S3-4-3">
<title>3.3-pick_intermediates.pl</title>
<p>This script analyzes the phylogenetic tree and ancestral sequences inferred by DNAML to pick developmental intermediates that show how a known antibody of interest evolved from the inferred unmutated common ancestor. The user may either specify how many approximately equally spaced intermediates should be selected or the approximate number of amino acid changes between consecutive intermediates. The script can also identify the inferred sequence for the most recent common ancestor of multiple antibodies of interest.</p>
</sec>
<sec id="S3-4-4">
<title>3.4-collapse_minor_branches.pl</title>
<p>Often there are too many sequences (hundreds or thousands) to be clearly displayed on a phylogenetic tree. This script clusters lineage CDR3 sequences in a phylogenetically aware manner to produce a partially collapsed version of the phylogenetic tree emphasizing the major branches of the lineage. The identity threshold for clustering CDR3s and the minimum number of sequences required to define a &#x0201C;major&#x0201D; branch may be adjusted by the user. Known antibody sequences may be specified and will be displayed regardless of whether or not they are part of a major branch. The summary table will also indicate the temporal persistence of each major branch, where available. A collapsed version of the tree in Figure <xref ref-type="fig" rid="F4">4</xref>A is shown in Figure <xref ref-type="fig" rid="F4">4</xref>B.</p>
</sec>
<sec id="S3-4-5">
<title>3.5-evolutionary_rate.pl</title>
<p>This script generates an xml-formatted configuration file for BEAST2 (<xref ref-type="bibr" rid="B48">48</xref>) to calculate the evolutionary rate of an antibody lineage. DNA sequences from at least two time points are required to run this script. The script can separate antibody variable region sequences into different partitions and generate configuration files to calculate the evolutionary rates spontaneously for V(D)J region, CDR regions, framework regions, and the first&#x02009;&#x0002B;&#x02009;second and third codon positions (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B12">12</xref>).</p>
</sec>
</sec>
<sec id="S3-5">
<title>Module 4: Figures and Output</title>
<p>The final module of SONAR produces figures visualizing the results of the analyses conducted by the other three modules.</p>
<sec id="S3-5-1">
<title>4.1-setup_plots.pl and 4.2-plot_histograms.R</title>
<p>These scripts plot histograms or bar charts to show the distributions of many different repertoire properties, such as transcript lengths, germline gene usage, SHM levels, and CDR3 net charge, among others. These properties may be calculated for all transcripts in the raw data, all functional transcripts (successful V and J assignment, in-frame junction, and no stop codons), unique transcripts only (as determined by the parameters provided to 2.1-calulate_id-div.pl), or a manually specified subset of transcripts. Multiple repertoire features or data from multiple samples may be plotted on a single figure, as well, and many options are provided for adjusting the appearance of the final figure. All options are provided by the user to 4.1-setup_plots.pl, which extracts and reformats the required data and then automatically calls 4.2-plot_histograms.R to plot the data and generate the final figure. Sample plots are shown in Figure <xref ref-type="fig" rid="F2">2</xref>.</p>
</sec>
<sec id="S3-5-2">
<title>4.3-plot_identity_divergence.R</title>
<p>This script uses the output of 2.1-calulate_id-div.pl to plot bulk NGS data as a heat map with the <italic>x</italic> axis corresponding to the divergence from the assigned germline V gene for each transcript and the <italic>y</italic> axis showing the full-length sequence identity to an antibody of interest. In these plots, transcripts from the same lineage as the antibody reference typically appear as clearly distinguishable islands separated from the main body of unrelated transcripts (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>) (Figure <xref ref-type="fig" rid="F3">3</xref>A). In addition, markers can be used to indicate the positions of specific transcripts, such as those identified by Module 2 as members of the same lineage (Figure <xref ref-type="fig" rid="F3">3</xref>B). Finally, multiple longitudinal datasets can be provided to generate a single figure with a row of identity&#x02013;divergence plots showing the evolution of the repertoire over time.</p>
</sec>
<sec id="S3-5-3">
<title>4.4-display_tree.py</title>
<p>This script uses the ete2 library (<xref ref-type="bibr" rid="B49">49</xref>) to generate publication-quality images of the trees output by 3.2-run_DNAML.py or 3.4-cluster_tree.pl. Each branch is colored by the birthday time point assigned by 3.1-merge_timepoints.pl. Options are provided to label both intermediates (internal nodes) and sequences (leaves/tips) of interest or to collapse specific branches of the tree. Additional options for adjusting various graphical parameters are also available. Sample trees are shown in Figure <xref ref-type="fig" rid="F4">4</xref>.</p>
</sec>
</sec>
<sec id="S3-6">
<title>Other Utility Scripts</title>
<p>A variety of additional stand-alone scripts are provided to help carry out common tasks. These include detecting frameshift mutations from pyrosequencing, subsetting sequence files, and manipulating phylogenetic trees in various ways.</p>
</sec>
<sec id="S3-7">
<title>Data Vignette</title>
<p>We have previously used earlier versions of the SONAR scripts to analyze several lineages of broadly neutralizing antibodies targeting HIV-1, including the CAP256-VRC26 lineage (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B39">39</xref>, <xref ref-type="bibr" rid="B41">41</xref>). The raw sequencing data for donor CAP256 are available from the NCBI Sequence Reads Archive with accession number SRP034555. As a tutorial, SONAR includes the commands used to download these data and run the pipeline on it on the Docker container, along with the outputs produced.</p>
</sec>
</sec>
<sec id="S4" sec-type="discussion">
<title>Discussion</title>
<p>Here, we present an integrated pipeline for analyzing NGS data of BCR transcripts to identify and to trace the development of a specific antibody lineage across multiple time points. This pipeline has already been used successfully to investigate multiple broadly neutralizing antibody lineages against HIV-1 (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B39">39</xref>, <xref ref-type="bibr" rid="B41">41</xref>) and can easily be applied to other systems of interest, including antibodies against influenza virus and pathogenic autoantibodies.</p>
<p>Software for the Ontogenic aNalysis of Antibody Repertoires serves as an all-in-one solution, allowing a user to go from raw data to final analysis within a single ecosystem. With the recent proliferation of software for analyzing NGS data from BCR repertoires (<xref ref-type="bibr" rid="B55">55</xref>, <xref ref-type="bibr" rid="B56">56</xref>), several specialized programs are available for assigning exact allelic origins and recombination points (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B29">29</xref>). However, SONAR&#x02019;s unique strength lies in the ability to easily identify transcripts related to an antibody of interest and, especially, to integrate sequences from multiple time points. Therefore, while SONAR assigns a particular allele to each transcript based on the BLAST output, all downstream analyses group the alleles of each germline gene in order to be more inclusive. SONAR is also explicitly agnostic as to the exact recombination points and P- and N-insertions within a specific antibody sequence. Importantly, because SONAR is focused on finding transcripts related to a known antibody, this imprecision can yield better results in the description of a lineage&#x02019;s ontogeny. Moreover, by working with simple fasta-formatted sequence files, SONAR provides interoperability with these specialized tools, as well as with others devoted to dividing an entire repertoire into its component lineages [e.g., Ref. (<xref ref-type="bibr" rid="B57">57</xref>, see footnote text 1)].</p>
<p>Software for the Ontogenic aNalysis of Antibody Repertoires relies on a number of external programs and libraries, including BLAST&#x0002B;, MUSCLE, USEARCH, DNAML, and others. Because each of these may also have their own dependencies, setting up SONAR can be difficult. To increase the ease of use, we have created a fully setup Docker image,<xref ref-type="fn" rid="fn4"><sup>4</sup></xref> which can be downloaded and run on any computer or operating system without need for installation of any additional software.</p>
<p>The current version of SONAR closely resembles that used to carry out previously described analyses (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>) and provides a fully functional, integrated pipeline for the ontogenic analysis of antibody repertories. In addition, SONAR remains under active development. Current focuses include a module to estimate functional selection pressure dynamics over time for antibody lineages (<xref ref-type="bibr" rid="B2">2</xref>). As we have shown that mutability and substitution bias modulate how somatic hypermutation occurs at each position in the antibody variable region (<xref ref-type="bibr" rid="B10">10</xref>), a module to characterize germline gene-specific mutational profiles from transcripts sampled by NGS would allow estimation of how likely certain mutation patterns are to be reproduced in either natural infection or vaccination. Other new functionalities are also being developed, and both bug fixes and new features will be added to the GitHub repository as they become available.</p>
</sec>
<sec id="S5">
<title>Author Contributions</title>
<p>CS, ZS, ZZ, JM, PK, and LS designed the analyses to be included in the SONAR suite. CS and ZZ built SONAR&#x02019;s underlying architecture. CS, ZS, and ZZ wrote the code. CS wrote the manuscript. All authors reviewed, commented on, and approved the manuscript.</p>
</sec>
<sec id="S6">
<title>Conflict of Interest Statement</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<ack>
<p>We thank Batsirai Mabvakure and Dr. Cathrine Scheepers for help beta testing SONAR. Funding was provided in part by the intramural program of the Vaccine Research Center, National Institute of Allergy and Infectious Disease, National Institutes of Health. Funding was also provided by HIVRAD grant AI104722-3 and U01 AI116086-01 to LS.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="B1"><label>1</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Murphy</surname> <given-names>K</given-names></name> <name><surname>Travers</surname> <given-names>P</given-names></name> <name><surname>Walport</surname> <given-names>M</given-names></name> <name><surname>Janeway</surname> <given-names>C</given-names></name></person-group>. <source>Janeway&#x02019;s Immunobiology</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Garland Science</publisher-name> (<year>2012</year>).</citation></ref>
<ref id="B2"><label>2</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sheng</surname> <given-names>Z</given-names></name> <name><surname>Schramm</surname> <given-names>CA</given-names></name> <name><surname>Connors</surname> <given-names>M</given-names></name> <name><surname>Morris</surname> <given-names>L</given-names></name> <name><surname>Mascola</surname> <given-names>JR</given-names></name> <name><surname>Kwong</surname> <given-names>PD</given-names></name> <etal/></person-group> <article-title>Effects of Darwinian selection and mutability on rate of broadly neutralizing antibody evolution during HIV-1 infection</article-title>. <source>PLoS Comput Biol</source> (<year>2016</year>) <volume>12</volume>(<issue>5</issue>):<fpage>e1004940</fpage>.<pub-id pub-id-type="doi">10.1371/journal.pcbi.1004940</pub-id><pub-id pub-id-type="pmid">27191167</pub-id></citation></ref>
<ref id="B3"><label>3</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vollmers</surname> <given-names>C</given-names></name> <name><surname>Sit</surname> <given-names>RV</given-names></name> <name><surname>Weinstein</surname> <given-names>JA</given-names></name> <name><surname>Dekker</surname> <given-names>CL</given-names></name> <name><surname>Quake</surname> <given-names>SR</given-names></name></person-group>. <article-title>Genetic measurement of memory B-cell recall using antibody repertoire sequencing</article-title>. <source>Proc Natl Acad Sci U S A</source> (<year>2013</year>) <volume>110</volume>(<issue>33</issue>):<fpage>13463</fpage>&#x02013;<lpage>8</lpage>.<pub-id pub-id-type="doi">10.1073/pnas.1312146110</pub-id><pub-id pub-id-type="pmid">23898164</pub-id></citation></ref>
<ref id="B4"><label>4</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Plotkin</surname> <given-names>SA</given-names></name></person-group>. <article-title>Correlates of protection induced by vaccination</article-title>. <source>Clin Vaccine Immunol</source> (<year>2010</year>) <volume>17</volume>(<issue>7</issue>):<fpage>1055</fpage>&#x02013;<lpage>65</lpage>.<pub-id pub-id-type="doi">10.1128/CVI.00131-10</pub-id><pub-id pub-id-type="pmid">20463105</pub-id></citation></ref>
<ref id="B5"><label>5</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Buss</surname> <given-names>NA</given-names></name> <name><surname>Henderson</surname> <given-names>SJ</given-names></name> <name><surname>McFarlane</surname> <given-names>M</given-names></name> <name><surname>Shenton</surname> <given-names>JM</given-names></name> <name><surname>de Haan</surname> <given-names>L</given-names></name></person-group>. <article-title>Monoclonal antibody therapeutics: history and future</article-title>. <source>Curr Opin Pharmacol</source> (<year>2012</year>) <volume>12</volume>(<issue>5</issue>):<fpage>615</fpage>&#x02013;<lpage>22</lpage>.<pub-id pub-id-type="doi">10.1016/j.coph.2012.08.001</pub-id><pub-id pub-id-type="pmid">22920732</pub-id></citation></ref>
<ref id="B6"><label>6</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shlomchik</surname> <given-names>MJ</given-names></name> <name><surname>Craft</surname> <given-names>JE</given-names></name> <name><surname>Mamula</surname> <given-names>MJ</given-names></name></person-group>. <article-title>From T to B and back again: positive feedback in systemic autoimmune disease</article-title>. <source>Nat Rev Immunol</source> (<year>2001</year>) <volume>1</volume>(<issue>2</issue>):<fpage>147</fpage>&#x02013;<lpage>53</lpage>.<pub-id pub-id-type="doi">10.1038/35100573</pub-id><pub-id pub-id-type="pmid">11905822</pub-id></citation></ref>
<ref id="B7"><label>7</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cheah</surname> <given-names>CY</given-names></name> <name><surname>Fowler</surname> <given-names>NH</given-names></name> <name><surname>Wang</surname> <given-names>ML</given-names></name></person-group>. <article-title>Breakthrough therapies in B-cell non-Hodgkin lymphoma</article-title>. <source>Ann Oncol</source> (<year>2016</year>) <volume>27</volume>(<issue>5</issue>):<fpage>778</fpage>&#x02013;<lpage>87</lpage>.<pub-id pub-id-type="doi">10.1093/annonc/mdw029</pub-id><pub-id pub-id-type="pmid">26802148</pub-id></citation></ref>
<ref id="B8"><label>8</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Young</surname> <given-names>RM</given-names></name> <name><surname>Shaffer</surname> <given-names>AL</given-names> <suffix>III</suffix></name> <name><surname>Phelan</surname> <given-names>JD</given-names></name> <name><surname>Staudt</surname> <given-names>LM</given-names></name></person-group>. <article-title>B-cell receptor signaling in diffuse large B-cell lymphoma</article-title>. <source>Semin Hematol</source> (<year>2015</year>) <volume>52</volume>(<issue>2</issue>):<fpage>77</fpage>&#x02013;<lpage>85</lpage>.<pub-id pub-id-type="doi">10.1053/j.seminhematol.2015.01.008</pub-id></citation></ref>
<ref id="B9"><label>9</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gorman</surname> <given-names>J</given-names></name> <name><surname>Soto</surname> <given-names>C</given-names></name> <name><surname>Yang</surname> <given-names>MM</given-names></name> <name><surname>Davenport</surname> <given-names>TM</given-names></name> <name><surname>Guttman</surname> <given-names>M</given-names></name> <name><surname>Bailer</surname> <given-names>RT</given-names></name> <etal/></person-group> <article-title>Structures of HIV-1 Env V1V2 with broadly neutralizing antibodies reveal commonalities that enable vaccine design</article-title>. <source>Nat Struct Mol Biol</source> (<year>2016</year>) <volume>23</volume>(<issue>1</issue>):<fpage>81</fpage>&#x02013;<lpage>90</lpage>.<pub-id pub-id-type="doi">10.1038/nsmb.3144</pub-id><pub-id pub-id-type="pmid">26689967</pub-id></citation></ref>
<ref id="B10"><label>10</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bonsignori</surname> <given-names>M</given-names></name> <name><surname>Zhou</surname> <given-names>T</given-names></name> <name><surname>Sheng</surname> <given-names>Z</given-names></name> <name><surname>Chen</surname> <given-names>L</given-names></name> <name><surname>Gao</surname> <given-names>F</given-names></name> <name><surname>Joyce</surname> <given-names>MG</given-names></name> <etal/></person-group> <article-title>Maturation pathway from germline to broad HIV-1 neutralizer of a CD4-mimic antibody</article-title>. <source>Cell</source> (<year>2016</year>) <volume>165</volume>(<issue>2</issue>):<fpage>449</fpage>&#x02013;<lpage>63</lpage>.<pub-id pub-id-type="doi">10.1016/j.cell.2016.02.022</pub-id><pub-id pub-id-type="pmid">26949186</pub-id></citation></ref>
<ref id="B11"><label>11</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Doria-Rose</surname> <given-names>NA</given-names></name> <name><surname>Schramm</surname> <given-names>CA</given-names></name> <name><surname>Gorman</surname> <given-names>J</given-names></name> <name><surname>Moore</surname> <given-names>PL</given-names></name> <name><surname>Bhiman</surname> <given-names>JN</given-names></name> <name><surname>DeKosky</surname> <given-names>BJ</given-names></name> <etal/></person-group> <article-title>Developmental pathway for potent V1V2-directed HIV-neutralizing antibodies</article-title>. <source>Nature</source> (<year>2014</year>) <volume>509</volume>(<issue>7498</issue>):<fpage>55</fpage>&#x02013;<lpage>62</lpage>.<pub-id pub-id-type="doi">10.1038/nature13036</pub-id><pub-id pub-id-type="pmid">24590074</pub-id></citation></ref>
<ref id="B12"><label>12</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>Z</given-names></name> <name><surname>Schramm</surname> <given-names>CA</given-names></name> <name><surname>Joyce</surname> <given-names>MG</given-names></name> <name><surname>Kwon</surname> <given-names>YD</given-names></name> <name><surname>Zhou</surname> <given-names>T</given-names></name> <etal/></person-group> <article-title>Maturation and diversity of the VRC01-antibody lineage over 15 years of chronic HIV-1 infection</article-title>. <source>Cell</source> (<year>2015</year>) <volume>161</volume>(<issue>3</issue>):<fpage>470</fpage>&#x02013;<lpage>85</lpage>.<pub-id pub-id-type="doi">10.1016/j.cell.2015.03.004</pub-id><pub-id pub-id-type="pmid">25865483</pub-id></citation></ref>
<ref id="B13"><label>13</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rudicell</surname> <given-names>RS</given-names></name> <name><surname>Kwon</surname> <given-names>YD</given-names></name> <name><surname>Ko</surname> <given-names>SY</given-names></name> <name><surname>Pegu</surname> <given-names>A</given-names></name> <name><surname>Louder</surname> <given-names>MK</given-names></name> <name><surname>Georgiev</surname> <given-names>IS</given-names></name> <etal/></person-group> <article-title>Enhanced potency of a broadly neutralizing HIV-1 antibody in vitro improves protection against lentiviral infection in vivo</article-title>. <source>J Virol</source> (<year>2014</year>) <volume>88</volume>(<issue>21</issue>):<fpage>12669</fpage>&#x02013;<lpage>82</lpage>.<pub-id pub-id-type="doi">10.1128/JVI.02213-14</pub-id><pub-id pub-id-type="pmid">25142607</pub-id></citation></ref>
<ref id="B14"><label>14</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ledgerwood</surname> <given-names>JE</given-names></name> <name><surname>Coates</surname> <given-names>EE</given-names></name> <name><surname>Yamshchikov</surname> <given-names>G</given-names></name> <name><surname>Saunders</surname> <given-names>JG</given-names></name> <name><surname>Holman</surname> <given-names>L</given-names></name> <name><surname>Enama</surname> <given-names>ME</given-names></name> <etal/></person-group> <article-title>Safety, pharmacokinetics and neutralization of the broadly neutralizing HIV-1 human monoclonal antibody VRC01 in healthy adults</article-title>. <source>Clin Exp Immunol</source> (<year>2015</year>) <volume>182</volume>(<issue>3</issue>):<fpage>289</fpage>&#x02013;<lpage>301</lpage>.<pub-id pub-id-type="doi">10.1111/cei.12692</pub-id><pub-id pub-id-type="pmid">26332605</pub-id></citation></ref>
<ref id="B15"><label>15</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kwon</surname> <given-names>YD</given-names></name> <name><surname>Georgiev</surname> <given-names>IS</given-names></name> <name><surname>Ofek</surname> <given-names>G</given-names></name> <name><surname>Zhang</surname> <given-names>B</given-names></name> <name><surname>Asokan</surname> <given-names>M</given-names></name> <name><surname>Bailer</surname> <given-names>RT</given-names></name> <etal/></person-group> <article-title>Optimization of the solubility of HIV-1-neutralizing antibody 10E8 through somatic variation and structure-based design</article-title>. <source>J Virol</source> (<year>2016</year>) <volume>90</volume>(<issue>13</issue>):<fpage>5899</fpage>&#x02013;<lpage>914</lpage>.<pub-id pub-id-type="doi">10.1128/JVI.03246-15</pub-id><pub-id pub-id-type="pmid">27053554</pub-id></citation></ref>
<ref id="B16"><label>16</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boyd</surname> <given-names>SD</given-names></name> <name><surname>Marshall</surname> <given-names>EL</given-names></name> <name><surname>Merker</surname> <given-names>JD</given-names></name> <name><surname>Maniar</surname> <given-names>JM</given-names></name> <name><surname>Zhang</surname> <given-names>LN</given-names></name> <name><surname>Sahaf</surname> <given-names>B</given-names></name> <etal/></person-group> <article-title>Measurement and clinical monitoring of human lymphocyte clonality by massively parallel VDJ pyrosequencing</article-title>. <source>Sci Transl Med</source> (<year>2009</year>) <volume>1</volume>(<issue>12</issue>):<fpage>12ra23</fpage>.<pub-id pub-id-type="doi">10.1126/scitranslmed.3000540</pub-id><pub-id pub-id-type="pmid">20161664</pub-id></citation></ref>
<ref id="B17"><label>17</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shendure</surname> <given-names>J</given-names></name> <name><surname>Ji</surname> <given-names>H</given-names></name></person-group>. <article-title>Next-generation DNA sequencing</article-title>. <source>Nat Biotechnol</source> (<year>2008</year>) <volume>26</volume>(<issue>10</issue>):<fpage>1135</fpage>&#x02013;<lpage>45</lpage>.<pub-id pub-id-type="doi">10.1038/nbt1486</pub-id><pub-id pub-id-type="pmid">18846087</pub-id></citation></ref>
<ref id="B18"><label>18</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Metzker</surname> <given-names>ML</given-names></name></person-group>. <article-title>Sequencing technologies &#x02013; the next generation</article-title>. <source>Nat Rev Genet</source> (<year>2010</year>) <volume>11</volume>(<issue>1</issue>):<fpage>31</fpage>&#x02013;<lpage>46</lpage>.<pub-id pub-id-type="doi">10.1038/nrg2626</pub-id></citation></ref>
<ref id="B19"><label>19</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Six</surname> <given-names>A</given-names></name> <name><surname>Mariotti-Ferrandiz</surname> <given-names>ME</given-names></name> <name><surname>Chaara</surname> <given-names>W</given-names></name> <name><surname>Magadan</surname> <given-names>S</given-names></name> <name><surname>Pham</surname> <given-names>HP</given-names></name> <name><surname>Lefranc</surname> <given-names>MP</given-names></name> <etal/></person-group> <article-title>The past, present, and future of immune repertoire biology &#x02013; the rise of next-generation repertoire analysis</article-title>. <source>Front Immunol</source> (<year>2013</year>) <volume>4</volume>:<fpage>413</fpage>.<pub-id pub-id-type="doi">10.3389/fimmu.2013.00413</pub-id><pub-id pub-id-type="pmid">24348479</pub-id></citation></ref>
<ref id="B20"><label>20</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lefranc</surname> <given-names>MP</given-names></name> <name><surname>Giudicelli</surname> <given-names>V</given-names></name> <name><surname>Duroux</surname> <given-names>P</given-names></name> <name><surname>Jabado-Michaloud</surname> <given-names>J</given-names></name> <name><surname>Folch</surname> <given-names>G</given-names></name> <name><surname>Aouinti</surname> <given-names>S</given-names></name> <etal/></person-group> <article-title>IMGT(R), the international ImMunoGeneTics information system(R) 25 years on</article-title>. <source>Nucleic Acids Res</source> (<year>2015</year>) <volume>43</volume>(<issue>Database issue</issue>):<fpage>D413</fpage>&#x02013;<lpage>22</lpage>.<pub-id pub-id-type="doi">10.1093/nar/gku1056</pub-id></citation></ref>
<ref id="B21"><label>21</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Souto-Carneiro</surname> <given-names>MM</given-names></name> <name><surname>Longo</surname> <given-names>NS</given-names></name> <name><surname>Russ</surname> <given-names>DE</given-names></name> <name><surname>Sun</surname> <given-names>HW</given-names></name> <name><surname>Lipsky</surname> <given-names>PE</given-names></name></person-group>. <article-title>Characterization of the human Ig heavy chain antigen binding complementarity determining region 3 using a newly developed software algorithm, JOINSOLVER</article-title>. <source>J Immunol</source> (<year>2004</year>) <volume>172</volume>(<issue>11</issue>):<fpage>6790</fpage>&#x02013;<lpage>802</lpage>.<pub-id pub-id-type="doi">10.4049/jimmunol.172.11.6790</pub-id><pub-id pub-id-type="pmid">15153497</pub-id></citation></ref>
<ref id="B22"><label>22</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Russ</surname> <given-names>DE</given-names></name> <name><surname>Ho</surname> <given-names>KY</given-names></name> <name><surname>Longo</surname> <given-names>NS</given-names></name></person-group>. <article-title>HTJoinSolver: human immunoglobulin VDJ partitioning using approximate dynamic programming constrained by conserved motifs</article-title>. <source>BMC Bioinformatics</source> (<year>2015</year>) <volume>16</volume>:<fpage>170</fpage>.<pub-id pub-id-type="doi">10.1186/s12859-015-0589-x</pub-id><pub-id pub-id-type="pmid">26001675</pub-id></citation></ref>
<ref id="B23"><label>23</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ye</surname> <given-names>J</given-names></name> <name><surname>Ma</surname> <given-names>N</given-names></name> <name><surname>Madden</surname> <given-names>TL</given-names></name> <name><surname>Ostell</surname> <given-names>JM</given-names></name></person-group>. <article-title>IgBLAST: an immunoglobulin variable domain sequence analysis tool</article-title>. <source>Nucleic Acids Res</source> (<year>2013</year>) <volume>41</volume>(<issue>Web Server issue</issue>):<fpage>W34</fpage>&#x02013;<lpage>40</lpage>.<pub-id pub-id-type="doi">10.1093/nar/gkt382</pub-id><pub-id pub-id-type="pmid">23671333</pub-id></citation></ref>
<ref id="B24"><label>24</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Paciello</surname> <given-names>G</given-names></name> <name><surname>Acquaviva</surname> <given-names>A</given-names></name> <name><surname>Pighi</surname> <given-names>C</given-names></name> <name><surname>Ferrarini</surname> <given-names>A</given-names></name> <name><surname>Macii</surname> <given-names>E</given-names></name> <name><surname>Zamo</surname> <given-names>A</given-names></name> <etal/></person-group> <article-title>VDJSeq-solver: in silico V(D)J recombination detection tool</article-title>. <source>PLoS One</source> (<year>2015</year>) <volume>10</volume>(<issue>3</issue>):<fpage>e0118192</fpage>.<pub-id pub-id-type="doi">10.1371/journal.pone.0118192</pub-id><pub-id pub-id-type="pmid">25799103</pub-id></citation></ref>
<ref id="B25"><label>25</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cortina-Ceballos</surname> <given-names>B</given-names></name> <name><surname>Godoy-Lozano</surname> <given-names>EE</given-names></name> <name><surname>Samano-Sanchez</surname> <given-names>H</given-names></name> <name><surname>Aguilar-Salgado</surname> <given-names>A</given-names></name> <name><surname>Velasco-Herrera Mdel</surname> <given-names>C</given-names></name> <name><surname>Vargas-Chavez</surname> <given-names>C</given-names></name> <etal/></person-group> <article-title>Reconstructing and mining the B cell repertoire with immune diversity</article-title>. <source>MAbs</source> (<year>2015</year>) <volume>7</volume>(<issue>3</issue>):<fpage>516</fpage>&#x02013;<lpage>24</lpage>.<pub-id pub-id-type="doi">10.1080/19420862.2015.1026502</pub-id><pub-id pub-id-type="pmid">25875140</pub-id></citation></ref>
<ref id="B26"><label>26</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>W</given-names></name> <name><surname>Du</surname> <given-names>Y</given-names></name> <name><surname>Su</surname> <given-names>Z</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name> <name><surname>Zeng</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>R</given-names></name> <etal/></person-group> <article-title>IMonitor: a robust pipeline for TCR and BCR repertoire analysis</article-title>. <source>Genetics</source> (<year>2015</year>) <volume>201</volume>(<issue>2</issue>):<fpage>459</fpage>&#x02013;<lpage>72</lpage>.<pub-id pub-id-type="doi">10.1534/genetics.115.176735</pub-id><pub-id pub-id-type="pmid">26297338</pub-id></citation></ref>
<ref id="B27"><label>27</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kepler</surname> <given-names>TB</given-names></name></person-group>. <article-title>Reconstructing a B-cell clonal lineage. I. Statistical inference of unobserved ancestors</article-title>. <source>F1000Res</source> (<year>2013</year>) <volume>2</volume>:<fpage>103</fpage>.<pub-id pub-id-type="doi">10.12688/f1000research.2-103.v1</pub-id><pub-id pub-id-type="pmid">24555054</pub-id></citation></ref>
<ref id="B28"><label>28</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kepler</surname> <given-names>TB</given-names></name> <name><surname>Munshaw</surname> <given-names>S</given-names></name> <name><surname>Wiehe</surname> <given-names>K</given-names></name> <name><surname>Zhang</surname> <given-names>R</given-names></name> <name><surname>Yu</surname> <given-names>JS</given-names></name> <name><surname>Woods</surname> <given-names>CW</given-names></name> <etal/></person-group> <article-title>Reconstructing a B-cell clonal lineage. II. Mutation, selection, and affinity maturation</article-title>. <source>Front Immunol</source> (<year>2014</year>) <volume>5</volume>:<fpage>170</fpage>.<pub-id pub-id-type="doi">10.3389/fimmu.2014.00170</pub-id><pub-id pub-id-type="pmid">24795717</pub-id></citation></ref>
<ref id="B29"><label>29</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ralph</surname> <given-names>DK</given-names></name> <name><surname>Matsen</surname> <given-names>FA</given-names></name></person-group>. <article-title>Consistency of VDJ rearrangement and substitution parameters enables accurate B cell receptor sequence annotation</article-title>. <source>PLoS Comput Biol</source> (<year>2016</year>) <volume>12</volume>(<issue>1</issue>):<fpage>e1004409</fpage>.<pub-id pub-id-type="doi">10.1371/journal.pcbi.1004409</pub-id><pub-id pub-id-type="pmid">26751373</pub-id></citation></ref>
<ref id="B30"><label>30</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jackson</surname> <given-names>KJ</given-names></name> <name><surname>Liu</surname> <given-names>Y</given-names></name> <name><surname>Roskin</surname> <given-names>KM</given-names></name> <name><surname>Glanville</surname> <given-names>J</given-names></name> <name><surname>Hoh</surname> <given-names>RA</given-names></name> <name><surname>Seo</surname> <given-names>K</given-names></name> <etal/></person-group> <article-title>Human responses to influenza vaccination show seroconversion signatures and convergent antibody rearrangements</article-title>. <source>Cell Host Microbe</source> (<year>2014</year>) <volume>16</volume>(<issue>1</issue>):<fpage>105</fpage>&#x02013;<lpage>14</lpage>.<pub-id pub-id-type="doi">10.1016/j.chom.2014.05.013</pub-id><pub-id pub-id-type="pmid">24981332</pub-id></citation></ref>
<ref id="B31"><label>31</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>N</given-names></name> <name><surname>He</surname> <given-names>J</given-names></name> <name><surname>Weinstein</surname> <given-names>JA</given-names></name> <name><surname>Penland</surname> <given-names>L</given-names></name> <name><surname>Sasaki</surname> <given-names>S</given-names></name> <name><surname>He</surname> <given-names>XS</given-names></name> <etal/></person-group> <article-title>Lineage structure of the human antibody repertoire in response to influenza vaccination</article-title>. <source>Sci Transl Med</source> (<year>2013</year>) <volume>5</volume>(<issue>171</issue>):<fpage>171ra19</fpage>.<pub-id pub-id-type="doi">10.1126/scitranslmed.3004794</pub-id><pub-id pub-id-type="pmid">23390249</pub-id></citation></ref>
<ref id="B32"><label>32</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Laserson</surname> <given-names>U</given-names></name> <name><surname>Vigneault</surname> <given-names>F</given-names></name> <name><surname>Gadala-Maria</surname> <given-names>D</given-names></name> <name><surname>Yaari</surname> <given-names>G</given-names></name> <name><surname>Uduman</surname> <given-names>M</given-names></name> <name><surname>Vander Heiden</surname> <given-names>JA</given-names></name> <etal/></person-group> <article-title>High-resolution antibody dynamics of vaccine-induced immune responses</article-title>. <source>Proc Natl Acad Sci U S A</source> (<year>2014</year>) <volume>111</volume>(<issue>13</issue>):<fpage>4928</fpage>&#x02013;<lpage>33</lpage>.<pub-id pub-id-type="doi">10.1073/pnas.1323862111</pub-id><pub-id pub-id-type="pmid">24639495</pub-id></citation></ref>
<ref id="B33"><label>33</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hershberg</surname> <given-names>U</given-names></name> <name><surname>Luning Prak</surname> <given-names>ET</given-names></name></person-group>. <article-title>The analysis of clonal expansions in normal and autoimmune B cell repertoires</article-title>. <source>Philos Trans R Soc Lond B Biol Sci</source> (<year>2015</year>) <volume>370</volume>(<issue>1676</issue>):<fpage>239</fpage>.<pub-id pub-id-type="doi">10.1098/rstb.2014.0239</pub-id><pub-id pub-id-type="pmid">26194753</pub-id></citation></ref>
<ref id="B34"><label>34</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Briney</surname> <given-names>B</given-names></name> <name><surname>Le</surname> <given-names>K</given-names></name> <name><surname>Zhu</surname> <given-names>J</given-names></name> <name><surname>Burton</surname> <given-names>DR</given-names></name></person-group>. <article-title>Clonify: unseeded antibody lineage assignment from next-generation sequencing data</article-title>. <source>Sci Rep</source> (<year>2016</year>) <volume>6</volume>:<fpage>23901</fpage>.<pub-id pub-id-type="doi">10.1038/srep23901</pub-id><pub-id pub-id-type="pmid">27102563</pub-id></citation></ref>
<ref id="B35"><label>35</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>X</given-names></name> <name><surname>Zhou</surname> <given-names>T</given-names></name> <name><surname>Zhu</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>B</given-names></name> <name><surname>Georgiev</surname> <given-names>I</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name> <etal/></person-group> <article-title>Focused evolution of HIV-1 neutralizing antibodies revealed by structures and deep sequencing</article-title>. <source>Science</source> (<year>2011</year>) <volume>333</volume>(<issue>6049</issue>):<fpage>1593</fpage>&#x02013;<lpage>602</lpage>.<pub-id pub-id-type="doi">10.1126/science.1207532</pub-id><pub-id pub-id-type="pmid">21835983</pub-id></citation></ref>
<ref id="B36"><label>36</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>J</given-names></name> <name><surname>O&#x02019;Dell</surname> <given-names>S</given-names></name> <name><surname>Ofek</surname> <given-names>G</given-names></name> <name><surname>Pancera</surname> <given-names>M</given-names></name> <name><surname>Wu</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>B</given-names></name> <etal/></person-group> <article-title>Somatic populations of PGT135-137 HIV-1-neutralizing antibodies identified by 454 pyrosequencing and bioinformatics</article-title>. <source>Front Microbiol</source> (<year>2012</year>) <volume>3</volume>:<fpage>315</fpage>.<pub-id pub-id-type="doi">10.3389/fmicb.2012.00315</pub-id><pub-id pub-id-type="pmid">23024643</pub-id></citation></ref>
<ref id="B37"><label>37</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Barak</surname> <given-names>M</given-names></name> <name><surname>Zuckerman</surname> <given-names>NS</given-names></name> <name><surname>Edelman</surname> <given-names>H</given-names></name> <name><surname>Unger</surname> <given-names>R</given-names></name> <name><surname>Mehr</surname> <given-names>R</given-names></name></person-group>. <article-title>IgTree: creating immunoglobulin variable region gene lineage trees</article-title>. <source>J Immunol Methods</source> (<year>2008</year>) <volume>338</volume>(<issue>1&#x02013;2</issue>):<fpage>67</fpage>&#x02013;<lpage>74</lpage>.<pub-id pub-id-type="doi">10.1016/j.jim.2008.06.006</pub-id><pub-id pub-id-type="pmid">18706908</pub-id></citation></ref>
<ref id="B38"><label>38</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lees</surname> <given-names>WD</given-names></name> <name><surname>Shepherd</surname> <given-names>AJ</given-names></name></person-group>. <article-title>Utilities for high-throughput analysis of B-cell clonal lineages</article-title>. <source>J Immunol Res</source> (<year>2015</year>) <volume>2015</volume>:<fpage>323506</fpage>.<pub-id pub-id-type="doi">10.1155/2015/323506</pub-id><pub-id pub-id-type="pmid">26527585</pub-id></citation></ref>
<ref id="B39"><label>39</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bhiman</surname> <given-names>JN</given-names></name> <name><surname>Anthony</surname> <given-names>C</given-names></name> <name><surname>Doria-Rose</surname> <given-names>NA</given-names></name> <name><surname>Karimanzira</surname> <given-names>O</given-names></name> <name><surname>Schramm</surname> <given-names>CA</given-names></name> <name><surname>Khoza</surname> <given-names>T</given-names></name> <etal/></person-group> <article-title>Viral variants that initiate and drive maturation of V1V2-directed HIV-1 broadly neutralizing antibodies</article-title>. <source>Nat Med</source> (<year>2015</year>) <volume>21</volume>(<issue>11</issue>):<fpage>1332</fpage>&#x02013;<lpage>6</lpage>.<pub-id pub-id-type="doi">10.1038/nm.3963</pub-id><pub-id pub-id-type="pmid">26457756</pub-id></citation></ref>
<ref id="B40"><label>40</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liao</surname> <given-names>HX</given-names></name> <name><surname>Lynch</surname> <given-names>R</given-names></name> <name><surname>Zhou</surname> <given-names>T</given-names></name> <name><surname>Gao</surname> <given-names>F</given-names></name> <name><surname>Alam</surname> <given-names>SM</given-names></name> <name><surname>Boyd</surname> <given-names>SD</given-names></name> <etal/></person-group> <article-title>Co-evolution of a broadly neutralizing HIV-1 antibody and founder virus</article-title>. <source>Nature</source> (<year>2013</year>) <volume>496</volume>(<issue>7446</issue>):<fpage>469</fpage>&#x02013;<lpage>76</lpage>.<pub-id pub-id-type="doi">10.1038/nature12053</pub-id><pub-id pub-id-type="pmid">23552890</pub-id></citation></ref>
<ref id="B41"><label>41</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Doria-Rose</surname> <given-names>NA</given-names></name> <name><surname>Bhiman</surname> <given-names>JN</given-names></name> <name><surname>Roark</surname> <given-names>RS</given-names></name> <name><surname>Schramm</surname> <given-names>CA</given-names></name> <name><surname>Gorman</surname> <given-names>J</given-names></name> <name><surname>Chuang</surname> <given-names>GY</given-names></name> <etal/></person-group> <article-title>New member of the V1V2-directed CAP256-VRC26 lineage that shows increased breadth and exceptional potency</article-title>. <source>J Virol</source> (<year>2016</year>) <volume>90</volume>(<issue>1</issue>):<fpage>76</fpage>&#x02013;<lpage>91</lpage>.<pub-id pub-id-type="doi">10.1128/JVI.01791-15</pub-id></citation></ref>
<ref id="B42"><label>42</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cock</surname> <given-names>PJ</given-names></name> <name><surname>Antao</surname> <given-names>T</given-names></name> <name><surname>Chang</surname> <given-names>JT</given-names></name> <name><surname>Chapman</surname> <given-names>BA</given-names></name> <name><surname>Cox</surname> <given-names>CJ</given-names></name> <name><surname>Dalke</surname> <given-names>A</given-names></name> <etal/></person-group> <article-title>BioPython: freely available Python tools for computational molecular biology and bioinformatics</article-title>. <source>Bioinformatics</source> (<year>2009</year>) <volume>25</volume>(<issue>11</issue>):<fpage>1422</fpage>&#x02013;<lpage>3</lpage>.<pub-id pub-id-type="doi">10.1093/bioinformatics/btp163</pub-id><pub-id pub-id-type="pmid">19304878</pub-id></citation></ref>
<ref id="B43"><label>43</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stajich</surname> <given-names>JE</given-names></name> <name><surname>Block</surname> <given-names>D</given-names></name> <name><surname>Boulez</surname> <given-names>K</given-names></name> <name><surname>Brenner</surname> <given-names>SE</given-names></name> <name><surname>Chervitz</surname> <given-names>SA</given-names></name> <name><surname>Dagdigian</surname> <given-names>C</given-names></name> <etal/></person-group> <article-title>The BioPerl toolkit: Perl modules for the life sciences</article-title>. <source>Genome Res</source> (<year>2002</year>) <volume>12</volume>(<issue>10</issue>):<fpage>1611</fpage>&#x02013;<lpage>8</lpage>.<pub-id pub-id-type="doi">10.1101/gr.361602</pub-id><pub-id pub-id-type="pmid">12368254</pub-id></citation></ref>
<ref id="B44"><label>44</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Camacho</surname> <given-names>C</given-names></name> <name><surname>Coulouris</surname> <given-names>G</given-names></name> <name><surname>Avagyan</surname> <given-names>V</given-names></name> <name><surname>Ma</surname> <given-names>N</given-names></name> <name><surname>Papadopoulos</surname> <given-names>J</given-names></name> <name><surname>Bealer</surname> <given-names>K</given-names></name> <etal/></person-group> <article-title>BLAST&#x0002B;: architecture and applications</article-title>. <source>BMC Bioinformatics</source> (<year>2009</year>) <volume>10</volume>:<fpage>421</fpage>.<pub-id pub-id-type="doi">10.1186/1471-2105-10-421</pub-id><pub-id pub-id-type="pmid">20003500</pub-id></citation></ref>
<ref id="B45"><label>45</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Edgar</surname> <given-names>RC</given-names></name></person-group>. <article-title>Search and clustering orders of magnitude faster than BLAST</article-title>. <source>Bioinformatics</source> (<year>2010</year>) <volume>26</volume>(<issue>19</issue>):<fpage>2460</fpage>&#x02013;<lpage>1</lpage>.<pub-id pub-id-type="doi">10.1093/bioinformatics/btq461</pub-id><pub-id pub-id-type="pmid">20709691</pub-id></citation></ref>
<ref id="B46"><label>46</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Edgar</surname> <given-names>RC</given-names></name></person-group>. <article-title>MUSCLE: multiple sequence alignment with high accuracy and high throughput</article-title>. <source>Nucleic Acids Res</source> (<year>2004</year>) <volume>32</volume>(<issue>5</issue>):<fpage>1792</fpage>&#x02013;<lpage>7</lpage>.<pub-id pub-id-type="doi">10.1093/nar/gkh340</pub-id><pub-id pub-id-type="pmid">15034147</pub-id></citation></ref>
<ref id="B47"><label>47</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Felsenstein</surname> <given-names>J</given-names></name> <name><surname>Churchill</surname> <given-names>GA</given-names></name></person-group>. <article-title>A hidden Markov model approach to variation among sites in rate of evolution</article-title>. <source>Mol Biol Evol</source> (<year>1996</year>) <volume>13</volume>(<issue>1</issue>):<fpage>93</fpage>&#x02013;<lpage>104</lpage>.<pub-id pub-id-type="doi">10.1093/oxfordjournals.molbev.a025575</pub-id><pub-id pub-id-type="pmid">8583911</pub-id></citation></ref>
<ref id="B48"><label>48</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bouckaert</surname> <given-names>R</given-names></name> <name><surname>Heled</surname> <given-names>J</given-names></name> <name><surname>Kuhnert</surname> <given-names>D</given-names></name> <name><surname>Vaughan</surname> <given-names>T</given-names></name> <name><surname>Wu</surname> <given-names>CH</given-names></name> <name><surname>Xie</surname> <given-names>D</given-names></name> <etal/></person-group> <article-title>BEAST 2: a software platform for Bayesian evolutionary analysis</article-title>. <source>PLoS Comput Biol</source> (<year>2014</year>) <volume>10</volume>(<issue>4</issue>):<fpage>e1003537</fpage>.<pub-id pub-id-type="doi">10.1371/journal.pcbi.1003537</pub-id><pub-id pub-id-type="pmid">24722319</pub-id></citation></ref>
<ref id="B49"><label>49</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huerta-Cepas</surname> <given-names>J</given-names></name> <name><surname>Dopazo</surname> <given-names>J</given-names></name> <name><surname>Gabaldon</surname> <given-names>T</given-names></name></person-group>. <article-title>ETE: a Python environment for tree exploration</article-title>. <source>BMC Bioinformatics</source> (<year>2010</year>) <volume>11</volume>:<fpage>24</fpage>.<pub-id pub-id-type="doi">10.1186/1471-2105-11-24</pub-id><pub-id pub-id-type="pmid">20070885</pub-id></citation></ref>
<ref id="B50"><label>50</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Scheepers</surname> <given-names>C</given-names></name> <name><surname>Shrestha</surname> <given-names>RK</given-names></name> <name><surname>Lambson</surname> <given-names>BE</given-names></name> <name><surname>Jackson</surname> <given-names>KJ</given-names></name> <name><surname>Wright</surname> <given-names>IA</given-names></name> <name><surname>Naicker</surname> <given-names>D</given-names></name> <etal/></person-group> <article-title>Ability to develop broadly neutralizing HIV-1 antibodies is not restricted by the germline Ig gene repertoire</article-title>. <source>J Immunol</source> (<year>2015</year>) <volume>194</volume>(<issue>9</issue>):<fpage>4371</fpage>&#x02013;<lpage>8</lpage>.<pub-id pub-id-type="doi">10.4049/jimmunol.1500118</pub-id><pub-id pub-id-type="pmid">25825450</pub-id></citation></ref>
<ref id="B51"><label>51</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>J</given-names></name> <name><surname>Ofek</surname> <given-names>G</given-names></name> <name><surname>Yang</surname> <given-names>Y</given-names></name> <name><surname>Zhang</surname> <given-names>B</given-names></name> <name><surname>Louder</surname> <given-names>MK</given-names></name> <name><surname>Lu</surname> <given-names>G</given-names></name> <etal/></person-group> <article-title>Mining the antibodyome for HIV-1-neutralizing antibodies with next-generation sequencing and phylogenetic pairing of heavy/light chains</article-title>. <source>Proc Natl Acad Sci U S A</source> (<year>2013</year>) <volume>110</volume>(<issue>16</issue>):<fpage>6470</fpage>&#x02013;<lpage>5</lpage>.<pub-id pub-id-type="doi">10.1073/pnas.1219320110</pub-id><pub-id pub-id-type="pmid">23536288</pub-id></citation></ref>
<ref id="B52"><label>52</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sievers</surname> <given-names>F</given-names></name> <name><surname>Wilm</surname> <given-names>A</given-names></name> <name><surname>Dineen</surname> <given-names>D</given-names></name> <name><surname>Gibson</surname> <given-names>TJ</given-names></name> <name><surname>Karplus</surname> <given-names>K</given-names></name> <name><surname>Li</surname> <given-names>W</given-names></name> <etal/></person-group> <article-title>Fast, scalable generation of high-quality protein multiple sequence alignments using Clustal Omega</article-title>. <source>Mol Syst Biol</source> (<year>2011</year>) <volume>7</volume>:<fpage>539</fpage>.<pub-id pub-id-type="doi">10.1038/msb.2011.75</pub-id><pub-id pub-id-type="pmid">21988835</pub-id></citation></ref>
<ref id="B53"><label>53</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Katoh</surname> <given-names>K</given-names></name> <name><surname>Standley</surname> <given-names>DM</given-names></name></person-group>. <article-title>MAFFT multiple sequence alignment software version 7: improvements in performance and usability</article-title>. <source>Mol Biol Evol</source> (<year>2013</year>) <volume>30</volume>(<issue>4</issue>):<fpage>772</fpage>&#x02013;<lpage>80</lpage>.<pub-id pub-id-type="doi">10.1093/molbev/mst010</pub-id><pub-id pub-id-type="pmid">23329690</pub-id></citation></ref>
<ref id="B54"><label>54</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>J</given-names></name> <name><surname>Wu</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>B</given-names></name> <name><surname>McKee</surname> <given-names>K</given-names></name> <name><surname>O&#x02019;Dell</surname> <given-names>S</given-names></name> <name><surname>Soto</surname> <given-names>C</given-names></name> <etal/></person-group> <article-title>De novo identification of VRC01 class HIV-1-neutralizing antibodies by next-generation sequencing of B-cell transcripts</article-title>. <source>Proc Natl Acad Sci U S A</source> (<year>2013</year>) <volume>110</volume>(<issue>43</issue>):<fpage>E4088</fpage>&#x02013;<lpage>97</lpage>.<pub-id pub-id-type="doi">10.1073/pnas.1306262110</pub-id><pub-id pub-id-type="pmid">24106303</pub-id></citation></ref>
<ref id="B55"><label>55</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Georgiou</surname> <given-names>G</given-names></name> <name><surname>Ippolito</surname> <given-names>GC</given-names></name> <name><surname>Beausang</surname> <given-names>J</given-names></name> <name><surname>Busse</surname> <given-names>CE</given-names></name> <name><surname>Wardemann</surname> <given-names>H</given-names></name> <name><surname>Quake</surname> <given-names>SR</given-names></name></person-group>. <article-title>The promise and challenge of high-throughput sequencing of the antibody repertoire</article-title>. <source>Nat Biotechnol</source> (<year>2014</year>) <volume>32</volume>(<issue>2</issue>):<fpage>158</fpage>&#x02013;<lpage>68</lpage>.<pub-id pub-id-type="doi">10.1038/nbt.2782</pub-id><pub-id pub-id-type="pmid">24441474</pub-id></citation></ref>
<ref id="B56"><label>56</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yaari</surname> <given-names>G</given-names></name> <name><surname>Kleinstein</surname> <given-names>SH</given-names></name></person-group>. <article-title>Practical guidelines for B-cell receptor repertoire sequencing analysis</article-title>. <source>Genome Med</source> (<year>2015</year>) <volume>7</volume>(<issue>1</issue>):<fpage>121</fpage>.<pub-id pub-id-type="doi">10.1186/s13073-015-0243-2</pub-id><pub-id pub-id-type="pmid">26589402</pub-id></citation></ref>
<ref id="B57"><label>57</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jardine</surname> <given-names>JG</given-names></name> <name><surname>Kulp</surname> <given-names>DW</given-names></name> <name><surname>Havenar-Daughton</surname> <given-names>C</given-names></name> <name><surname>Sarkar</surname> <given-names>A</given-names></name> <name><surname>Briney</surname> <given-names>B</given-names></name> <name><surname>Sok</surname> <given-names>D</given-names></name> <etal/></person-group> <article-title>HIV-1 broadly neutralizing antibody precursor B cells revealed by germline-targeting immunogen</article-title>. <source>Science</source> (<year>2016</year>) <volume>351</volume>(<issue>6280</issue>):<fpage>1458</fpage>&#x02013;<lpage>63</lpage>.<pub-id pub-id-type="doi">10.1126/science.aad9195</pub-id><pub-id pub-id-type="pmid">27013733</pub-id></citation></ref>
</ref-list>
<fn-group>
<fn id="fn1"><p><sup>1</sup>Ralph DK, Matsen FA. <italic>Likelihood-Based Inference of B-Cell Clonal Families</italic>. (2016). in press. Available from: <uri xlink:href="https://arxiv.org/abs/1603.08127">https://arxiv.org/abs/1603.08127</uri></p></fn>
<fn id="fn2"><p><sup>2</sup><uri xlink:href="http://hannonlab.cshl.edu/fastx_toolkit/">http://hannonlab.cshl.edu/fastx_toolkit/</uri></p></fn>
<fn id="fn3"><p><sup>3</sup><uri xlink:href="http://www.docopt.org">www.docopt.org</uri></p></fn>
<fn id="fn4"><p><sup>4</sup><uri xlink:href="https://hub.docker.com/r/scharch/sonar/">https://hub.docker.com/r/scharch/sonar/</uri></p></fn>
</fn-group>
</back>
</article>
