<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">764619</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2022.764619</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioinformatics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>BioViz <italic>Connect</italic>: Web Application Linking CyVerse Cloud Resources to Genomic Visualization in the Integrated Genome Browser</article-title>
<alt-title alt-title-type="left-running-head">Raveendran et al.</alt-title>
<alt-title alt-title-type="right-running-head">Linking IGB and CyVerse</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Raveendran</surname>
<given-names>Karthik</given-names>
</name>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1505819/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Freese</surname>
<given-names>Nowlan H.</given-names>
</name>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1354564/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kintali</surname>
<given-names>Chaitanya</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/1808217/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tiwari</surname>
<given-names>Srishti</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/1803998/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bole</surname>
<given-names>Pawan</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/1813503/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dias</surname>
<given-names>Chester</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/1805199/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Loraine</surname>
<given-names>Ann E.</given-names>
</name>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/42986/overview"/>
</contrib>
</contrib-group>
<aff>
<institution>Department of Bioinformatics and Genomics</institution>, <institution>University of North Carolina at Charlotte</institution>, <addr-line>Charlotte</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1272840/overview">Jim Procter</ext-link>, University of Dundee, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1139072/overview">William C. Ray</ext-link>, Nationwide Children&#x2019;s Hospital, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/541194/overview">Ram Vinay Pandey</ext-link>, Karolinska University Hospital, Sweden</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Ann E. Loraine, <email>aloraine@uncc.edu</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Data Visualization, a section of the journal Frontiers in Bioinformatics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>05</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>2</volume>
<elocation-id>764619</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>08</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>04</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Raveendran, Freese, Kintali, Tiwari, Bole, Dias and Loraine.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Raveendran, Freese, Kintali, Tiwari, Bole, Dias and Loraine</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Genomics researchers do better work when they can interactively explore and visualize data. Due to the vast size of experimental datasets, researchers are increasingly using powerful, cloud-based systems to process and analyze data. These remote systems, called science gateways, offer user-friendly, Web-based access to high performance computing and storage resources, but typically lack interactive visualization capability. In this paper, we present BioViz <italic>Connect</italic>, a middleware Web application that links CyVerse science gateway resources to the Integrated Genome Browser (IGB), a highly interactive native application implemented in Java that runs on the user&#x2019;s personal computer. Using BioViz <italic>Connect</italic>, users can 1) stream data from the CyVerse data store into IGB for visualization, 2) improve the IGB user experience for themselves and others by adding IGB specific metadata to CyVerse data files, including genome version and track appearance, and 3) run compute-intensive visual analytics functions on CyVerse infrastructure to create new datasets for visualization in IGB or other applications. To demonstrate how BioViz <italic>Connect</italic> facilitates interactive data visualization, we describe an example RNA-Seq data analysis investigating how heat and desiccation stresses affect gene expression in the model plant <italic>Arabidopsis thaliana</italic>. The RNA-Seq use case illustrates how interactive visualization with IGB can help a user identify problematic experimental samples, sanity-check results using a positive control, and create new data files for interactive visualization in IGB (or other tools) using a Docker image deployed to CyVerse <italic>via</italic> the Terrain API. Lastly, we discuss limitations of the technologies used and suggest opportunities for future work. BioViz <italic>Connect</italic> is available from <ext-link ext-link-type="uri" xlink:href="https://bioviz.org/">https://bioviz.org</ext-link>.</p>
</abstract>
<kwd-group>
<kwd>SR45a</kwd>
<kwd>AT1G07350</kwd>
<kwd>Arabidopsis</kwd>
<kwd>abiotic stress</kwd>
<kwd>Integrated Genome Browser</kwd>
<kwd>CyVerse</kwd>
<kwd>visualization</kwd>
<kwd>Terrain API</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Institute of General Medical Sciences<named-content content-type="fundref-id">10.13039/100000057</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Science gateways are Web sites that implement user-friendly interfaces to high performance computing and storage systems (<xref ref-type="bibr" rid="B30">Wilkins-Diehr et al., 2008</xref>). Science gateways typically assemble and curate discipline-specific, command-line, Unix-based tools within a single, easy-to-use interface, enabling users to run compute-intensive processing on datasets too large for a personal computer (<xref ref-type="bibr" rid="B7">Giardine et al., 2005</xref>; <xref ref-type="bibr" rid="B8">Goff et al., 2011</xref>; <xref ref-type="bibr" rid="B21">Merchant et al., 2016</xref>). In a typical use case, domain researchers upload their &#x201c;raw&#x201d; (unprocessed) data to the gateway site and then operate the gateway&#x2019;s Web-based interface to create custom processing and analysis pipelines, where a pipeline is defined as tasks performed in sequence by non-interactive tools which emit and consume well-understood file types and formats. Common pipeline tasks in genomics include aligning RNA-Seq sequences onto a reference genome to produce BAM (binary alignment) format files (<xref ref-type="bibr" rid="B18">Li et al., 2009</xref>), generating scaled RNA-Seq coverage graphs from the &#x201c;BAM&#x201d; files using tools such as deepTools bamCoverage (<xref ref-type="bibr" rid="B25">Ramirez et al., 2016</xref>), or searching promoter regions for sequence motifs common to sets of similarly regulated genes using tools such as DREME (<xref ref-type="bibr" rid="B1">Bailey, 2011</xref>).</p>
<p>A science gateway aims to provide a single point of access for tools needed to process and analyze data from a research project. However, native visualization tools with their own graphical user interfaces separate from a Web browser are difficult to use with Web-based science gateway systems. The Integrated Genome Browser from <ext-link ext-link-type="uri" xlink:href="http://BioViz.org">BioViz.org</ext-link> (<xref ref-type="bibr" rid="B22">Nicol et al., 2009</xref>; <xref ref-type="bibr" rid="B6">Freese et al., 2016</xref>) and the Broad Institute&#x2019;s Integrative Genomics Viewer (<xref ref-type="bibr" rid="B27">Robinson et al., 2011</xref>) exemplify this problem. Both tools require that data files reside on the user&#x2019;s local file system or that they be accessible <italic>via</italic> HTTP (hypertext transfer protocol) and addressable <italic>via</italic> a file-specific URL (Uniform Resource Locator). If the gateway system does not allow URL-based access to data, then users must download the data files onto their local computer file system, which may not be practical or allowed.</p>
<p>Related problems confront visualization systems implemented as Web applications, deployed on Web hosts and not the user&#x2019;s local computer. Using Web applications to visualize data can be even more challenging for users, because these applications often require hard-to-set-up data storage and delivery mechanisms specialized to the application. To view one&#x2019;s data using the Web-based UCSC Genome Browser software, for example, users can either deploy their own copy of the software, which is difficult, or they can instead set up a UCSC Track Hub server, which is less technically challenging but nonetheless requires Track Hub-specific meta-data files to be created and configured (<xref ref-type="bibr" rid="B26">Raney et al., 2014</xref>). Similarly, using the JBrowse Web-based genome browser requires deploying data in JBrowse-compatible formats (<xref ref-type="bibr" rid="B2">Buels et al., 2016</xref>).</p>
<p>Another typical requirement for science gateways is extensibility, meaning they require a way for gateway developers or users to add new tools to the system to accommodate or even potentiate new directions for research. The CyVerse science gateway, the focus of this article, supports extensibility by allowing developers to create and deploy CyVerse Apps, which are user-contributed container images that run within a CyVerse-provided container environment (<xref ref-type="bibr" rid="B3">Devisetty et al., 2016</xref>). Users create containers using Docker and then contribute their container image along with metadata specifying input parameters and accepted data types to CyVerse. Once accepted and deployed, the container is configured to run as an asynchronous &#x201c;job&#x201d; within the CyVerse infrastructure <italic>via</italic> a queuing system. Thus, Apps run non-interactively and therefore are not well-suited to providing interactive, exploratory visualization. However, these Apps do provide a means to create new input data for visualization, as we explore here.</p>
<p>In this paper, we introduce BioViz <italic>Connect</italic>, a Web application that overcomes limitations described above to add genome visualization capability to the CyVerse science gateway system. Previously called iPlant, the CyVerse science gateway is a United States National Science Foundation funded cyberinfrastructure project with the aim of providing computational resources for life sciences researchers (<xref ref-type="bibr" rid="B8">Goff et al., 2011</xref>; <xref ref-type="bibr" rid="B21">Merchant et al., 2016</xref>). We chose to work with CyVerse in this study because it features a rich Application Programming Interface (API), the Terrain REST API, that supports secure computational access to CyVerse data storage and analysis resources.</p>
<p>Using this API, we implemented a new visualization-focused interface to these resources, called BioViz <italic>Connect</italic>, using the Integrated Genome Browser (IGB) as the demonstration application. We selected IGB because it offers one of the richest feature sets for visual analysis in genomics [for descriptions of IGB functionality, see (<xref ref-type="bibr" rid="B22">Nicol et al., 2009</xref>; <xref ref-type="bibr" rid="B10">Gulledge et al., 2014</xref>; <xref ref-type="bibr" rid="B19">Loraine et al., 2015</xref>; <xref ref-type="bibr" rid="B6">Freese et al., 2016</xref>; <xref ref-type="bibr" rid="B20">Mall et al., 2016</xref>)] and because we are members of the core IGB development team. Therefore, we possessed insider&#x2019;s knowledge of the featured visualization application that allowed us to modify IGB as needed for the project.</p>
<p>BioViz <italic>Connect</italic> enables users of Integrated Genome Browser to visually analyze their CyVerse data without having to download entire files to their local computer or migrate their data into application specific data stores. BioViz <italic>Connect</italic> lets users annotate their data sets with metadata, which control how the data will look when imported into the IGB and also indicate the genome version referenced in the data. Finally, BioViz <italic>Connect</italic> lets users run compute-intensive visual analytics algorithms, implemented as CyVerse Apps.</p>
<p>In the following sections, we describe how BioViz <italic>Connect</italic> is implemented, explaining the technology stack used and how BioViz <italic>Connect</italic> interacts with the CyVerse science gateway resources <italic>via</italic> its Terrain API. Next, we describe how BioViz <italic>Connect</italic> enables flow of data into the IGB desktop software by activating a REST API endpoint residing in IGB itself. To illustrate the functionality, we describe an example use case scenario for BioViz <italic>Connect</italic> in which a hypothetical analyst uses visualization and visual analytics tools within IGB in conjunction with their CyVerse account to quality-check and analyze an RNA-Seq data set from <italic>Arabidopsis thaliana</italic> plants undergoing desiccation and heat stresses. Lastly, we discuss insights gained from implementing BioViz <italic>Connect</italic>, describe limitations of the technology used, and propose how these limitations might be overcome. BioViz <italic>Connect</italic> represents a next step toward building integrated, user-friendly computational environments that blend powerful local tools like IGB with even more powerful remote infrastructures like CyVerse, creating new possibilities for users to discover biologically meaningful features in data while avoiding artifacts.</p>
</sec>
<sec id="s2">
<title>Design and Implementation Details</title>
<p>In the following sections, we describe technical aspects of how BioViz <italic>Connect</italic> is implemented, while also describing user interface design choices intended to improve both usability and transparency for users.</p>
<sec id="s2-1">
<title>BioViz <italic>Connect</italic> Client and Server-Side Design</title>
<p>BioViz <italic>Connect</italic> consists of two parts: a JavaScript-based user interface that runs in a Web browser and a server-side application that manages authentication and communication with Terrain API endpoints. The user interface code on the client-side is implemented using HTML5, CSS, Bootstrap 4.3.1, JavaScript, and jQuery 1.10.2. The server-side code is implemented in python3 using the Django web application framework (<xref ref-type="fig" rid="F1">Figure 1</xref>). The currently available production instance of BioViz <italic>Connect</italic> is deployed on an Ubuntu 18.04 system and hosted using the apache2 Web server software as a reverse proxy. BioViz <italic>Connect</italic> code is open source and available from <ext-link ext-link-type="uri" xlink:href="https://bitbucket.org/lorainelab/bioviz-connect">https://bitbucket.org/lorainelab/bioviz-connect</ext-link>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Diagram illustrating local client and remote server-side design of BioViz <italic>Connect</italic>. After users log in using their CyVerse user credentials, the BioViz <italic>Connect</italic> Web interface appears in their Web browser. Clicking the &#x201c;View in IGB&#x201d; button causes the browser to make HTTP requests to a localhost REST endpoint within IGB. These HTTP requests include information IGB uses to issue new HTTP requests for data resources. The BioViz <italic>Connect</italic> Web interface is populated <italic>via</italic> HTTP requests made to a remote host running the Apache Web server, which forwards these requests (using a reverse proxy mechanism) to the BioViz <italic>Connect</italic> Web application, implemented using the Django Web application framework. BioViz <italic>Connect</italic> then translates and forwards these user requests <italic>via</italic> HTTP to REST API endpoints that are part of the Terrain API, which manages interactions with the CyVerse cloud.</p>
</caption>
<graphic xlink:href="fbinf-02-764619-g001.tif"/>
</fig>
</sec>
<sec id="s2-2">
<title>User and Password Management</title>
<p>To use BioViz <italic>Connect</italic>, users must first obtain a CyVerse Discovery Environment account by registering at <ext-link ext-link-type="uri" xlink:href="https://user.cyverse.org/register">https://user.cyverse.org/register</ext-link> (CyVerse, RRID: SCR_014531). At the time of this writing, there is no charge for this account. BioViz <italic>Connect</italic> delegates user management, including logging in and password management, to the Central Authentication Service (CAS) OAuth service hosted and maintained by CyVerse (<xref ref-type="fig" rid="F1">Figure 1</xref>). Thus, CyVerse infrastructure manages user accounts and information; no BioViz-specific accounts or passwords are required, and the BioViz <italic>Connect</italic> software never gains access to the user&#x2019;s CyVerse password. After a user has logged in to BioViz <italic>Connect</italic> using their CyVerse credentials, BioViz <italic>Connect</italic> uses its server-side Redis database to store a user-specific access token for the duration of the session, a token that allows BioViz <italic>Connect</italic> to access and modify user data stored in CyVerse infrastructure <italic>via</italic> the Terrain API on the user&#x2019;s behalf.</p>
</sec>
<sec id="s2-3">
<title>Integrated Genome Browser REST Endpoint</title>
<p>Integrated Genome Browser is a free, open-source desktop software program written in Java which users download and install on their local computer systems (IGB, RRID: SCR_011792) (<xref ref-type="bibr" rid="B22">Nicol et al., 2009</xref>; <xref ref-type="bibr" rid="B6">Freese et al., 2016</xref>). Installers for Linux, MacOS, and Windows platforms are available at <ext-link ext-link-type="uri" xlink:href="https://bioviz.org/">https://bioviz.org</ext-link>.</p>
<p>The IGB source code resides in a git repository hosted on Atlassian&#x2019;s <ext-link ext-link-type="uri" xlink:href="http://bitbucket.org">bitbucket.org</ext-link> site (<ext-link ext-link-type="uri" xlink:href="https://bitbucket.org/lorainelab/integrated-genome-browser">https://bitbucket.org/lorainelab/integrated-genome-browser</ext-link>). When viewed on the BitBucket git repository&#x2019;s Web site, changes to the code called &#x201c;commits&#x201d; link to pages on the project management Web site documenting the motivation for the change and/or technical challenges encountered, thus making the source code easier to manage and understand. The project management Web site uses Jira from Atlassian Software, with URL <ext-link ext-link-type="uri" xlink:href="https://jira.bioviz.org/">https://jira.bioviz.org</ext-link>. IGB version 9.1.4 or greater is required for IGB to connect to BioViz <italic>Connect</italic>.</p>
<p>IGB contains a simple Web server configured to respond to REST-style queries on an IGB-specific port on the user&#x2019;s local computer. JavaScript code downloaded into the Web browser when users visit BioViz <italic>Connect</italic> pages enables requesting URLs addressed to &#x201c;localhost&#x201d;, the user&#x2019;s computer, using the IGB-specific port. IGB intercepts these requests and performs actions dictated by parameters embedded in the URL text. This mechanism repurposes a REST endpoint dating from the earliest releases of IGB from the early 2000s. The IGB Users&#x2019; Guide hosted at <ext-link ext-link-type="uri" xlink:href="https://wiki.bioviz.org/confluence">https://wiki.bioviz.org/confluence</ext-link> describes these and other features.</p>
</sec>
<sec id="s2-4">
<title>BioViz <italic>Connect</italic> Metadata</title>
<p>BioViz <italic>Connect</italic> uses the Terrain Metadata API to manage and obtain IGB-specific metadata for files and folders. The Terrain API represents metadata items as triplets containing Attribute, Value, and Unit. A metadata item&#x2019;s Attribute attaches meaning to what the metadata contains, and application developers can create their own custom Attributes to support diverse purposes. For example, since BioViz <italic>Connect</italic> is concerned with genomic data visualization, we created custom Attributes signaling genome assembly version, visual style information such as foreground color and background color, and free text comments on the data provided by the user, which are displayed in BioViz <italic>Connect</italic>&#x2019;s Web interface. A metadata item&#x2019;s Value is specific to the file or folder being tagged. BioViz <italic>Connect</italic> uses the Unit value to indicate that the metadata element concerns IGB and the BioViz <italic>Connect</italic> application.</p>
<p>The genome identifier attribute requires further explanation, as matching genome version names across systems has caused many problems for genome browsers and their users. Integrated Genome Browser, like many other systems, uses an application-specific scheme for naming genome versions, and contains a listing of synonyms matching these IGB-specific names onto genome version names from other systems. For example, the IGB genome version named H_sapiens_Feb_2009 is the same as UCSC genome version name hg17, which is the same as NCBI version 35. The BioViz <italic>Connect</italic> user interface includes components for users to view, designate, or change the genome version metadata associated with individual files. To ensure compatibility with IGB, BioViz <italic>Connect</italic> uses a list of IGB-formatted genome identifiers hosted on the IGB Quickload site (<ext-link ext-link-type="uri" xlink:href="http://igbquickload.org/quickload/">http://igbquickload.org/quickload/</ext-link>) to configure the genome version selection components, implemented as menus. When users operate the interface to view data within IGB, the genome version metadata, along with style metadata, are passed to IGB <italic>via</italic> its localhost REST endpoint. This ensures that the data appear in the context of the correct genome assembly, alongside other data already loaded from BioViz <italic>Connect</italic> or other sources, while also enabling the user to specify in advance how the data will look once it appears in IGB. In addition, if other users load the same files, the data will look the same.</p>
</sec>
<sec id="s2-5">
<title>Enabling Access to Data <italic>via</italic> Public URLs</title>
<p>The flow of data from CyVerse into IGB depends on two key technical features of the CyVerse data storage and hosting system. First, the Terrain API enables users to create publicly accessible URLs for data files in their accounts, and these URLs can be enabled or disabled at will. In the current implementation, URLs created in this way are accessible to any internet user. Second, the CyVerse infrastructure supports HTTP range requests for these URLs, enabling clients such as IGB to request subsets of data, thus avoiding having to download or transfer an entire data file.</p>
<p>The BioViz <italic>Connect</italic> interface is designed to make the process of managing these URLs as easy as possible, similar to commercial cloud storage systems such as Dropbox and Google Drive that let users create, destroy, and manage public links to individual files and folders. Within the BioViz <italic>Connect</italic> interface, users create URLs for individual files by right clicking the file and selecting the &#x201c;Manage Link&#x201d; option. Selecting this option opens a right panel display in which the current status of the file is shown, and users can toggle between making the file public or private (<xref ref-type="fig" rid="F2">Figures 2A,B</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>BioViz <italic>Connect</italic> interface and IGB data visualization. <bold>(A,B)</bold> BioViz <italic>Connect</italic> &#x201c;Manage Link&#x201d; interface, from the right panel display. By default, files are not publicly accessible, and the interface appears as in <bold>(A)</bold>. Clicking the button labeled &#x201c;Create Public Link&#x201d; creates a public link, switching the display to the image shown in <bold>(B)</bold>. <bold>(C)</bold> BioViz <italic>Connect</italic> main page. The left panel shows shortcuts to home, shared, community folders. The middle panel lists files and folders. The right panel shows the selected file&#x2019;s metadata. <bold>(D)</bold> SRR10060893.bam and SRR10060894.bam files viewed in IGB overlapping the SR45a gene of <italic>A. thaliana</italic>. The track labeled TAIR10 mRNA shows SR45a gene models AT1G07350.1 and AT1G07350.2.</p>
</caption>
<graphic xlink:href="fbinf-02-764619-g002.tif"/>
</fig>
<p>As shown in <xref ref-type="fig" rid="F2">Figure 2B</xref>, the text of this public URL is visible to the user, and users can copy it to their system clipboard by clicking the &#x201c;copy&#x201d; icon. The Terrain API determines the link text, and currently, it always contains the user&#x2019;s chosen name for the file and the path to the file within the virtual file system, preceded by the prefix shown in <xref ref-type="fig" rid="F2">Figure 2B</xref>. We expose this detail to users because increasingly many researchers are using their CyVerse accounts to host files, and the current transparency and predictability of these URLs seems important for them to know about. Likewise, if the pattern ever changes, they will need to know this, as well.</p>
</sec>
<sec id="s2-6">
<title>BioViz <italic>Connect</italic> Deployment</title>
<p>BioViz <italic>Connect</italic> is managed using ansible roles and playbooks publicly available in a git repository from <ext-link ext-link-type="uri" xlink:href="https://bitbucket.org/lorainelab/bioviz-connect-playbooks">https://bitbucket.org/lorainelab/bioviz-connect-playbooks</ext-link>. The playbooks contain two sets of tasks. One set of tasks creates a virtual machine using the Amazon EC2 Web service. Once the host is created and running, a second set of ansible tasks installs and configures software on the host, including an Apache2 Web server, a MySQL database, and the BioViz <italic>Connect</italic> code base. Playbook users can specify the BioViz <italic>Connect</italic> repository and branch they wish to deploy, which facilitates rapid testing of proposed new code. During the provisioning process, a call is made to a Terrain endpoint that provides a list of all CyVerse asynchronous analysis apps that can produce output visible to IGB. These data are then used to construct the &#x201c;analysis&#x201d; sections of the user interface, and are stored in the BioViz <italic>Connect</italic> relational database, co-located on the same host.</p>
</sec>
<sec id="s2-7">
<title>BioViz <italic>Connect</italic> Interface for Running Visual Analysis Apps</title>
<p>When users right-click a file name in BioViz <italic>Connect</italic>, a context menu appears with an option labeled &#x201c;Analyse.&#x201d; Information about IGB-compatible Apps, the file types they can accept, and App parameters are stored in the relational database configured during deployment as described above. When a user selects this option, BioViz <italic>Connect</italic> queries the database to identify IGB Community Apps that accept the file as input, and these are then displayed to the user. Once the user has selected an App, another query retrieves additional information about it, such as user-friendly description of what the App does, which is then displayed to the user.</p>
<p>The CyVerse ecosystem contains many hundreds of Apps, many of which are redundant or obsolete, and so the BioViz Team controls which ones are shown to users by adding them to the IGB Community, a CyVerse organizing concept that groups resources (such as Apps) according to which users can use or modify them. BioViz <italic>Connect</italic> only shows Apps that have been added to the IGB Community.</p>
</sec>
<sec id="s2-8">
<title>RNA-Seq Data</title>
<p>RNA-Seq data presented in the use case scenario are from Sequence Read Archive Bioproject PRJNA509437 (<xref ref-type="bibr" rid="B17">Leinonen et al., 2011</xref>), an experiment in which Arabidopsis plants underwent either a 3-h, non-lethal heat stress or a multi-day desiccation stress. Two post-treatment sample time points were collected for treated plants and their untreated control counterparts, with two to four replicates per sample type and 23 samples in total. Sample libraries were sequenced in single-end runs of the Illumina platform and are identified by their run identifiers. BAM files were generated by aligning sequence reads to the Arabidopsis June 2009 reference genome assembly using TopHat2 (TopHat, RRID: SCR_013035) (<xref ref-type="bibr" rid="B16">Kim et al., 2013</xref>). The data are available in the Community folder of publicly accessible datasets, represented as a folder in the left-side panel of the BioViz <italic>Connect</italic> display.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>Understanding and Navigating the BioViz <italic>Connect</italic> Interface</title>
<p>Our design goal in creating BioViz <italic>Connect</italic> was to give the user a feeling of almost limitless computational power and space by integrating seamlessly with the CyVerse &#x201c;cloud.&#x201d; Doing so requires that users identify themselves to the system by entering a username and password, but how this process takes place can easily destroy the illusion of seamless access. To avoid this, we used an OAUTH-style Terrain API endpoint that delegates logins to CyVerse infrastructure, preventing BioViz <italic>Connect</italic> from learning the user&#x2019;s password.</p>
<p>To begin a session with BioViz <italic>Connect</italic>, the user opens the <ext-link ext-link-type="uri" xlink:href="http://BioViz.org">BioViz.org</ext-link> website in a Web browser, selects the link labeled BioViz <italic>Connect</italic>, and then clicks the link labeled &#x201c;Sign in with your CyVerse ID&#x201d;. This action opens a Central Authentication Service (CAS) page, hosted by CyVerse, where users enter their CyVerse username and password, or sign up for a new account if they do not already have one. The Web browser then returns to a &#x201c;call-back&#x201d; URL on the <ext-link ext-link-type="uri" xlink:href="http://BioViz.org">BioViz.org</ext-link> site, which displays the BioViz <italic>Connect</italic> user interface, a browsable, sortable, paginated view of the user&#x2019;s CyVerse home directory and its contents (<xref ref-type="fig" rid="F2">Figure 2C</xref>).</p>
<p>This view of files and data resembles the interface for commercial, consumer-focused cloud storage systems, a deliberate design choice aimed at building on many users&#x2019; familiarity with Google Drive, the Dropbox Web interface, and others. This interface displays a sortable, table-based view of the user&#x2019;s home directory within the CyVerse file storage system, displaying a listing of files and folders the user has uploaded to their account or created using CyVerse Apps, including BioViz <italic>Connect</italic> Apps described in later sections. Single-clicking a file or folder selects it, double-clicking a folder opens it and displays the contents, and double-clicking a file opens a metadata display showing information about the file (<xref ref-type="fig" rid="F2">Figure 2C</xref>). A bread crumb display at the top of the page shows the path from the root folder to the currently opened folder, and a copy icon next to the breadcrumb allows the user to copy the folder name and path. The browser forward and back buttons work as expected, and users can bookmark individual screens for faster navigation. The URLs displayed in the browser&#x2019;s URL bar match the currently opened folder&#x2019;s location, making the interface feel more polished and user-friendly by ensuring that every user-facing detail, including the URL, mimic and reinforce how the user has organized their data within the CyVerse virtual file system.</p>
<p>The top part of every BioViz <italic>Connect</italic> page also features a search bar that can be used to find files and folders with names matching a user-entered query string. Matches are returned in a list view similar to the original table view, and users can sort the results list by name, size, or date modified. Only files for which the user has read access and that reside in the currently visible section (Home, Community, or Shared with me) are returned. On the left side of every page, BioViz <italic>Connect</italic> displays icons representing shortcut links to the user&#x2019;s home directory, a publicly available community data folder, and other destinations. The &#x201c;Community&#x201d; folder contains data published for all CyVerse users, including the example RNA-Seq data set for the use case scenario described in the next section.</p>
</sec>
<sec id="s3-2">
<title>Using BioViz <italic>Connect</italic> to View Data in Integrated Genome Browser</title>
<p>To demonstrate BioViz <italic>Connect</italic> functionality, we next describe an example use case scenario in which a hypothetical researcher visually analyzes data from a typical RNA-Seq experiment. The use case focuses on two main tasks: visually checking data quality and then confirming differential expression of a control gene known to be regulated by the treatment.</p>
<p>The experimental design included two treatments, heat and desiccation stress, their controls, and two time points, totaling six sample types, each with two to four replicates. The RNA-Seq sequences are available in the Sequence Read Archive, and the researcher has obtained the data, aligned it to the reference genome, and then contributed the files to the Community folder. Alignment files are stored in the file path &#x201c;<italic>BioViz/rnaseq/A_thaliana_Jun_2009/SRP220157/reads</italic>&#x201d;. The user has also annotated each file using the BioViz <italic>Connect</italic> interface, adding the genome version, visual style information, and notes describing each sample.</p>
<p>Now that the data are organized and annotated, the researcher uses the BioViz <italic>Connect</italic> interface to import the data into Integrated Genome Browser for visualization and proceeds to look at each file, one by one, to check the quality of the alignments and confirm file identity. BioViz <italic>Connect</italic> makes this task easy to perform. To illustrate, we discuss RNA-Seq alignment files SRR10060893.bam and SRR10060894.bam, replicate control samples from time point one of the heat stress treatment. A quick scan of files listed in the BioViz <italic>Connect</italic> table view shows that SRR10060893.bam has size 1.61 GB, about twice the size of SRR10060894.bam, which is 0.669&#xa0;GB. The user has annotated the files with the number of sequence reads obtained per sample, around 37 million for each. Because the samples were sequenced to about the same depth, their resulting alignment files ought to have similar sizes. Visualizing the sequence read alignments will help explain the discrepancy.</p>
<p>To visualize the alignments, the user launches Integrated Genome Browser, which is already installed on the local computer, downloaded from the <ext-link ext-link-type="uri" xlink:href="http://BioViz.org">BioViz.org</ext-link> Web site. Once IGB is running, the user clicks the &#x201c;View in IGB&#x201d; button available in the &#x201c;Visualization Tools&#x201d; column in the BioViz <italic>Connect</italic> table view, repeating this action for each file (<xref ref-type="fig" rid="F2">Figure 2C</xref>). This action causes JavaScript code running within the Web browser to request data from a local URL (domain &#x201c;localhost&#x201d;) corresponding to a REST endpoint implemented within IGB. The URL includes parameters such as the publicly accessible URL for the data file, the IGB name of its reference genome, and visual style information indicating how the file should look once loaded into IGB. In response, IGB opens the requested genome version associated with the file and adds the file as a new track to the display.</p>
<p>To check assumptions about a new data set, it is useful to visualize a gene of known behavior, such as a gene already known to be regulated by the experimental treatment. Prior work from our lab and others have shown that SR45a, encoding an RNA-binding protein, is upregulated by heat and desiccation stresses, making it a good choice for this purpose (<xref ref-type="bibr" rid="B31">Yoshimura et al., 2011</xref>; <xref ref-type="bibr" rid="B9">Gulledge et al., 2012</xref>). To find the gene, the analyst enters SR45a into IGB&#x2019;s search interface at the top left of the IGB window, which zooms and pans the display to the gene&#x2019;s position in the genome. Next, the user loads the alignments into the display by clicking the &#x201c;Load Data&#x201d; button at the top right of the IGB window. Once the data load, the user customizes track appearance by modifying vertical zoom setting and changing the number of sequences that can be shown individually in a track (stack height), creating the view shown in <xref ref-type="fig" rid="F2">Figure 2D</xref>.</p>
<p>This customized view makes problems with SRR10060894 obvious at a glance. The alignments for this sample appear to stack on top of each other in orderly, uniform towers covering only 30% of the gene&#x2019;s exonic sequence. By contrast, the alignments for sample SRR10060893 cover most of the exonic sequence and also include many spliced reads split across introns. The sparser pattern observed in SRR10060894 typically arises when the library synthesis process included too many polymerase chain reaction amplification cycles, reducing the diversity of resulting sequence data. This pattern indicates that the user should exclude SRR10060894 from further analysis, but the other file appears to be fine.</p>
</sec>
<sec id="s3-3">
<title>Comparing Sequencing Depth and Complexity Using Integrated Genome Browser Visual Analytics</title>
<p>Repeating the preceding process with other samples in the dataset, the user identifies another problematic pair of files. The files are replicates, but like the previous example, the files sizes differ. The alignments file SRR10060911.bam is 1.83&#xa0;Gb, but its replicate SRR10060912.bam is only 0.454&#xa0;Gb. Opening and viewing the alignment files in IGB, the user confirms that one file appears to contain more data than the other (<xref ref-type="fig" rid="F3">Figure 3A</xref>). To quantify this observation, the user takes advantage of a simple, interactive visual analytics feature within IGB: selection-based counting. As with PowerPoint and many other graphical applications, IGB users can click-drag the mouse over graphical elements to select a group of items and then single-click while pressing SHIFT or CTRL-SHIFT keys to add or remove items from selection group. IGB reports the number of currently selected items in the Selection Info box at the top right of the IGB window. Using this feature, the researcher finds that sample SRR10060912 contains 1,925 alignments covering SR45a, and sample SRR10060911 has 10,867 alignments, nearly five times as many.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Heat treated samples viewed in IGB. <bold>(A)</bold> Vertical dimension is compressed to show all alignments. <bold>(B)</bold> SRR10060911 and <bold>(C)</bold> SRR10060912 tracks stretched vertically to reveal alignment patterns in more detail. <bold>(D)</bold> Alignment coverage graphs calculated within IGB using alignments from <bold>(A)</bold>. The y-axis values represent the number of aligned sequences per base pair position indicated on the coordinates track. The track labeled TAIR10 mRNA shows SR45a gene models AT1G07350.1 and AT1G07350.2.</p>
</caption>
<graphic xlink:href="fbinf-02-764619-g003.tif"/>
</fig>
<p>By further configuring track height and appearance settings, and operating IGB&#x2019;s dynamic vertical and horizontal zoom controls, the user can stretch the display in each dimension independently to reveal more detail about the alignments (<xref ref-type="fig" rid="F3">Figures 3B,C</xref>). From this new view of the data, the user can tentatively conclude that alignment pattern diversity is similar in each sample, but the depth of sequencing was greater in SRR10060911. To confirm the finding, the user then applies a visual analytics function (called a &#x201c;Track Operation&#x201d; within IGB) that creates coverage graphs, also called depth graphs, using data from the read alignment tracks (<xref ref-type="fig" rid="F3">Figure 3D</xref>). To make a coverage graph, the user right-clicks a track label for a read alignment track and chooses option &#x201c;Track Operations &#x3e; Depth Graph (All).&#x201d; This generates a new track showing a graph in which the y-axis indicates the number of sequences aligned per x-axis position, corresponding to base pair positions. After modifying the y-axis lower and upper boundary values (using controls in IGB&#x2019;s Graph tab), the user again can observe that the pattern of alignments is similar between the two samples, but the overall level of sequencing was different. Thus, the file size difference most likely is due to a difference in sequencing depth rather than a problem with the library synthesis, as was the case in the previous example.</p>
</sec>
<sec id="s3-4">
<title>Normalizing Coverage Graphs to Compare Gene Expression Visually</title>
<p>Coverage graphs set to the same scale allow comparing gene expression across sample types, but only if the libraries were sequenced to approximately the same depth. If not, then coverage graphs need to be normalized before comparing them. Scaling coverage graphs within IGB is impractical, however, as it would require downloading, reading, and processing the entire bam-format alignments file. A better approach is to off-load computationally intensive visual analytics tasks to CyVerse cloud computing resources. To demonstrate the value of this strategy, we deployed the deepTools genomeCoverage command line tool from the deepTools suite (Deeptools, RRID: SCR_016366) as a new IGB-friendly CyVerse App (<xref ref-type="bibr" rid="B25">Ramirez et al., 2016</xref>).</p>
<p>To create a scaled coverage graph, the user returns to BioViz <italic>Connect</italic>, right-clicks a bam format file, and chooses &#x201c;Analyse.&#x201d; This opens the Analysis right-panel display, which lists all IGB-compatible CyVerse Apps that can accept the selected file type as input (<xref ref-type="fig" rid="F4">Figure 4A</xref>). Selecting &#x201c;Make scaled coverage graph&#x201d; opens a form with options for creating the graph using the genomeCoverage algorithm (<xref ref-type="fig" rid="F4">Figure 4B</xref>). The interface includes a place for the user to enter names for the analysis and for the output file that will be produced. The user then clicks &#x201c;Run Analysis&#x201d; button, which calls upon the CyVerse analysis API to run the App with specified parameters using CyVerse computing resources. The request to run the App and the work it performs are called &#x201c;jobs,&#x201d; and jobs are carried out asynchronously, running and completing only when resources they require become available, as with other systems set up for high-performance computing. Users can check job status by using the Analyses History in the BioViz <italic>Connect</italic> interface (<xref ref-type="fig" rid="F4">Figure 4C</xref>), where Analyses are listed as Queued (waiting to run), Running, Failed, or Completed. The length of the time to complete a job is dependent on the size of the queue, the analysis being carried out, and the size of the file. When we ran these analyses ourselves, the &#x201c;Make scaled coverage graph&#x201d; job took 7&#xa0;min and 12&#xa0;s for the SRR10060911.bam as its file size is 1.83&#xa0;GB, whereas SRR10060912.bam took only 5&#xa0;min and 52&#xa0;s, most likely due to its smaller file size of 455&#xa0;MB. Larger files may take longer, for example, an 8.89&#xa0;GB file took 38&#xa0;min and 54&#xa0;s to complete. Independent of BioViz <italic>Connect</italic>, the CyVerse infrastructure sends an email to users when jobs finish. When a job finishes, any files or folders it creates appear in the analyses folder in the user&#x2019;s home directory, or in the same location as the input files, if those are stored in a location where the user has permission to modify or add to the folder. To quickly navigate to results, users can click the analysis name in the Analyses History, opening the folder where the output data files are stored.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Example analysis in BioViz <italic>Connect</italic> with output visualized in IGB. <bold>(A)</bold> BioViz <italic>Connect</italic> main page with analysis right panel open. <bold>(B)</bold> Scaled coverage graph analysis options for naming the analysis, selecting input file, output file name, and index file selection. <bold>(C)</bold> Analyses History showing the status of current and previous jobs. <bold>(D)</bold> SRR10060904 (control), SRR10060911 (heat treated), and SRR10060912 (heat treated) scaled coverage graphs viewed in IGB overlapping the SR45a gene of <italic>A. thaliana</italic>.</p>
</caption>
<graphic xlink:href="fbinf-02-764619-g004.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F4">Figure 4D</xref> shows sample App output, a visualization of the SR45A region with three scaled coverage graphs loaded from bigwig data files, a compact binary format for representing numeric values associated with base pairs in a genome map. Two heat-treated and one control sample are shown. The three coverage graphs have been configured to use the same y-axis scale, making it obvious that the heat treatment elevated SR45A gene expression, consistent with previously published reports. The image presents a clear visual argument in favor of this conclusion, and it also shows the user how much the expression level measurement varies across the gene body, something a single summary statistic cannot provide.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>BioViz <italic>Connect</italic> introduces and demonstrates innovations in the field of science gateway development and research, while providing useful functionality for researchers seeking to understand and visualize genomic data. BioViz <italic>Connect</italic> enables users of the CyVerse science gateway to visualize genomic data files from their CyVerse accounts using Integrated Genome Browser, a desktop application. To our knowledge, BioViz <italic>Connect</italic> is the first and only resource that integrates remote CyVerse file storage and computational resources with a genome browser native to the local computer, achieving this cross-application communication <italic>via</italic> localhost REST endpoints.</p>
<p>We implemented BioViz <italic>Connect</italic> using the CyVerse Terrain API, a collection of remote REST endpoints that form a comprehensive computational interface to CyVerse resources. However, CyVerse and its Terrain API were not the first cloud system we considered. Our larger goal was to expand users&#x2019; experience of genome browsing by connecting the interactivity and speed of a native, desktop genome browser (such as IGB) with the vast resources of cloud-based, remote storage and computing systems, making it easier for users to store and share their data with others and also run compute-intensive visual analytics algorithms that would never be possible using just the user&#x2019;s personal computer. To achieve this, we considered several commercial and public-sector systems, but selected CyVerse because of its focus on supporting scientific research, its free cost for users, and its early support for computational interfaces <italic>via</italic> APIs (<xref ref-type="bibr" rid="B4">Dooley et al., 2012</xref>).</p>
<p>At first, we proposed to use the CyVerse Agave API, which was well-documented and well-supported at the time. Since then, at least two other groups have published workflow management sites that use Agave, justifying our original choice (<xref ref-type="bibr" rid="B29">Wang et al., 2018</xref>; <xref ref-type="bibr" rid="B12">Hubbard et al., 2020</xref>). However, several months after launching our project, we discovered that Agave&#x2019;s manipulation of user data conflicted with CyVerse&#x2019;s own Discovery Environment interface, then a Web interface resembling a personal computer desktop. We also learned that Agave lacked support for HTTP range requests against data files, an essential feature from our perspective, and that this feature was unlikely to be added, as Agave&#x2019;s maintainers were in the process of migrating to a new version to be called &#x201c;Tapis.&#x201d; Realizing our problem, they recommended we instead use Terrain, the API that powers the Discovery Environment interface. After consulting with developer teams working on Terrain and Discovery Environment, both based at University of Arizona, we decided to use Terrain.</p>
<p>We chose to use Integrated Genome Browser as the visualization component of BioViz <italic>Connect</italic> for several reasons. The first was that we wanted to demonstrate and explore a connection between cloud-based resources and a pre-existing, native, desktop application already in wide use, and IGB satisfied this requirement. The second major reason was convenience. As the core development group for IGB, we understand its architecture and capabilities, reducing our learning curve when connecting this local application to the cloud. IGB already contained a localhost REST interface that we could repurpose for BioViz <italic>Connect</italic>, an endpoint was first developed in the early 2000s to enable a connection between the Affymetrix NetAffx Web site and IGB. Since then, we used this same endpoint to implement IGB&#x2019;s internal region and data bookmarking system. IGV, the only other native genome browser application in wide use, has a similar REST endpoint used to trigger loading of data files from the Galaxy Web site and others, but this endpoint lacks features such as the ability to specify track appearance. The third reason was that the IGB interface decouples navigation and data loading, thus making it easier for users to control when data are requested from the remote host. We surmised that this would make possible delays in data loading less onerous than for other browsers, such as IGV (<xref ref-type="bibr" rid="B27">Robinson et al., 2011</xref>), UCSC Genome Browser (<xref ref-type="bibr" rid="B15">Kent et al., 2002</xref>), Jbrowse (<xref ref-type="bibr" rid="B2">Buels et al., 2016</xref>), and Ensembl (<xref ref-type="bibr" rid="B11">Howe et al., 2021</xref>), all of which load data automatically when users navigate to a new region. However, since we first released BioViz <italic>Connect</italic>, the CyVerse development team have improved data throughput, making those tools&#x2019; design less problematic.</p>
<p>Our success in linking IGB to the cloud, along with the abovementioned improvement in CyVerse infrastructure, suggests an interesting next step for BioViz <italic>Connect</italic>: adding other genome browser systems to the interface. Anticipating this possibility, the first column in the BioViz <italic>Connect</italic> file browser table is labeled &#x201c;Visualization Tools,&#x201d; a generic heading that suggests adding other tools. Doing this would be valuable because although genome browsers often recapitulate each other&#x2019;s features, all have capabilities unique to them, and users who prefer them. For example, IGB offers fast navigation through a genome, the ability to interact directly with data, access to shared data <italic>via</italic> IGB Quickload sites, and visual analytics functions called &#x201c;Operations&#x201d; that aid exploratory analysis. Unique features of the Broad Institute&#x2019;s IGV include a sashimi plot view for detecting differential splicing (<xref ref-type="bibr" rid="B14">Katz et al., 2015</xref>) and a bisulfite sequencing view for understanding DNA methylation. The UCSC Genome Browser excels at offering a multitude of data sets in distinct tracks, while the Ensembl browser and associated informatics system famously support nearly every reference assembly known to science, including many plant genomes not supported by UCSC. And the Jalview system provides a host of features for examining the deep details of alignments, the heart of genomic analysis (<xref ref-type="bibr" rid="B24">Procter et al., 2021</xref>). BioViz <italic>Connect</italic> could make these systems easier to use and compare, allowing us to study how different approaches to visualization affect understanding.</p>
<p>To our knowledge, BioViz <italic>Connect</italic> is the first application developed using the Terrain API by a group outside the CyVerse development team. Because our work is open source, developed entirely in public, other groups can use our implementation as a guide or inspiration for their own work. BioViz <italic>Connect</italic> further demonstrates to the larger community of biologists, developers, and funders that modern, feature-rich REST interfaces to powerful computational resources stimulate and enable innovation and progress.</p>
<p>The scaled coverage graphs described in the use case scenario offer a useful, practical example of how remote resources can power interactive visual analytics on the desktop, an idea that has been explored in diverse fields and settings, but not often applied to genome visualization as was done here. The example we presented used a pre-existing algorithm, developed by others, but it shows how developers can harness a more powerful gateway system to develop and deploy all-new interactive genome data visualizations. Offloading compute-intensive visual analytics functions to science gateway systems will likely become more appealing and important as the size and complexity of genomic data continue to increase.</p>
</sec>
<sec id="s5">
<title>Limitations and Ways to Overcome Them</title>
<p>However, at least two important technical limitations remain, providing opportunities for future work. The first technical limitation has to do with how data flows from the CyVerse back end data store and into the desktop genome browser application. Integrated Genome Browser as currently implemented can display data from users&#x2019; CyVerse accounts because the Terrain API can assign publicly accessible URLs to individual data files, which makes them available for visualization but exposes them to everyone on the internet. This problem of public accessibility could perhaps be addressed by adding password protection to these URLs, using Basic Authentication headers defined by the HTTP protocol. IGB already supports logging into password-protected Web servers, and so this solution would require little or no changes on the client side.</p>
<p>Another problem has to do with the data file formats themselves and how they can sometimes expose more information than anticipated. IGB, along with every other genome visualization system we are aware of, uses random access, indexed file formats to retrieve subsets of data corresponding to genomic regions. For example, BAM (binary alignment) files are typically large, impractical to download in their entirety. The data stored in these files are sorted by genomic location and therefore can be indexed by genomic location. When retrieving data for a desired genomic region, IGB and other programs use the BAM file&#x2019;s index, stored separately in a smaller &#x201c;bai&#x201d; file, to look up the range of bytes where those data reside in the target file, and then read and process only the data for that region, ignoring the rest. This idea of mapping genomic coordinates to physical file coordinates has been in heavy use for decades, for as long as IGB has existed. Indeed, the original IGB development team at Affymetrix implemented one of the first indexed file formats, called &#x201c;bar&#x201d; for &#x201c;binary array format&#x201d;, used for storing and accessing data from Affymetrix genome tiling arrays, one of the first technologies invented to survey transcription across an entire genome in an unbiased way. However, in some situations, the index can sometimes serve as a genomic map, providing an overview of an entire dataset that could identify an individual. For example, as shown in (<xref ref-type="bibr" rid="B23">Pedersen et al., 2017</xref>), one can use the BAM index to detect chromosome abnormalities from whole genome sequencing data, exposing more information about a person or an experiment than anticipated.</p>
<p>The second technical limitation concerns how to flow data from remote sites, <italic>via</italic> a Web browser, into other programs running natively on the desktop, such as Integrated Genome Browser. Web browser development communities are constantly changing and improving their security models, essential to keeping users and their data safe in an increasingly adversarial and dangerous digital environment. Most Web pages are now loaded over encrypted channels, using HTTPS, the secure version of HTTP, and this includes BioViz <italic>Connect</italic>. This means that the JavaScript code responsible for interacting with IGB&#x2019;s localhost endpoint is also loaded <italic>via</italic> HTTPS. However, when this code interacts with IGB <italic>via</italic> its localhost endpoint, it does so <italic>via</italic> unencrypted HTTP, because there is currently no robust way to support HTTPS for the localhost domain. The Chrome and Firefox browser allow BioViz <italic>Connect</italic> code to access the localhost IGB endpoint using HTTP because the communication channel is limited to the user&#x2019;s own computer, presumed to be secure. The MacOS Safari Web browser does not allow it, however. This means that BioViz <italic>Connect&#x2019;s</italic> &#x201c;View in IGB&#x201d; feature fails for Safari users. We handle this by advising the user to switch to a different browser on MacOS. This issue exemplifies a more general problem with connecting the desktop to the cloud. The methods used to communicate with remote computers are always changing, usually becoming more restrictive, which means that developers need to constantly test, revise, and update their software, more so perhaps than developers who create stand-alone, independent applications that rarely need to interoperate with anything other than the host computer&#x2019;s operating system.</p>
<p>Architectures using Web-based REST APIs may help solve these problems. For example, CyVerse or BioViz <italic>Connect</italic> could add new endpoints that themselves support region-based retrieval of genomic data, as with the XML-based Distributed Annotation Service (<xref ref-type="bibr" rid="B5">Dowell et al., 2001</xref>; <xref ref-type="bibr" rid="B13">Jenkinson et al., 2008</xref>), the newer JSON-based University of Santa Cruz Genome Informatics REST interface (<xref ref-type="bibr" rid="B28">UCSC, 2021</xref>), or the BEACONS network API, which supports multiple layers of user authentication (<ext-link ext-link-type="uri" xlink:href="https://beacon-project.io/">https://beacon-project.io/</ext-link>). Rather than deliver data in new JSON or XML formats that would require modifying the client software, these new endpoints could simply stream the data in their native formats, requiring minimal or no change to the client software. Another way to achieve this would be to design APIs using the facade design pattern, in which an application translates an incompatible interface to a compatible one, expanding the range of clients able to access a resource. For example, developers could create a novel API that provides all the services required for accessing BAM files and their indexes, by creating and destroying secure URLs as users open and load data file resources during a session. Many variations are possible, and as cloud computing infrastructures become easier and cheaper to build upon, more bioinformatics groups will attempt even more daring and exciting innovations, amplifying their users&#x2019; ability to investigate biological systems.</p>
<p>Finally, we highlight aspects of the BioViz <italic>Connect</italic> interface and functionality that could be further developed to help users find useful tools and help developers find users for their tools. First, we note that the &#x201c;View in IGB&#x201d; button in the BioViz <italic>Connect</italic> table view occupies a column labeled &#x201c;Visualization Tools,&#x201d; a space where links to other visualization tools could also be added, based on the input data they accept. To make space for these other tools, we could replace the button with an IGB logo, and use tooltips to provide documentation or link to videos describing how to use the tools. Second, we could enhance BioViz <italic>Connect</italic> search capabilities to query MetaData tags or other file properties and attributes. Third, we could collaborate with the CyVerse team and other users to design and implement data registries, which data providers and users could use to publish, publicize, and locate data sets relevant to their work. As we hope the name suggests, BioViz <italic>Connect</italic> will connect researchers with data and tools, and will help tool developers connect with their intended audience, improving scientific practice for everyone.</p>
</sec>
</body>
<back>
<sec id="s6">
<title>Data Availability Statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/sra/?term=PRJNA509437">https://www.ncbi.nlm.nih.gov/sra/?term&#x3d;PRJNA509437</ext-link>.</p>
</sec>
<sec id="s7">
<title>Author Contributions</title>
<p>NF and AL conceived of and supervised the project. KR, CK, ST, and PB planned and developed BioViz <italic>Connect</italic>. NF, AL, KR, CK, ST, PB, and CD tested and debugged BioViz <italic>Connect</italic>. NF, KR, CK, and AL wrote the draft manuscript. All authors read and approved the final manuscript.</p>
</sec>
<sec id="s8">
<title>Funding</title>
<p>Research reported in this publication was supported by the National Institute of General Medical Sciences of the National Institutes of Health under award numbers 5R01GM121927 and R35GM139609. Funding was used to plan, design, and develop the software reported in the article.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>We thank Paul Sarando, Sarah Roberts, Sriram Srinivasan, Ian McEwen, Ramona Walls, and Reetu Tuteja for their assistance with the Terrain API and publishing CyVerse apps, which was made possible through CyVerse&#x2019;s External Collaborative Partnership program.</p>
</ack>
<sec id="s11">
<title>Abbreviations</title>
<p>API, Application Programming Interface; CSS, Cascading Style Sheets; HTML, HyperText Markup Language; IGB, Integrated Genome Browser; REST, REpresentational State Transfer.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bailey</surname>
<given-names>T. L.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>DREME: Motif Discovery in Transcription Factor ChIP-Seq Data</article-title>. <source>Bioinformatics</source> <volume>27</volume> (<issue>12</issue>), <fpage>1653</fpage>&#x2013;<lpage>1659</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr261</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buels</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Diesh</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Hayes</surname>
<given-names>R. D.</given-names>
</name>
<name>
<surname>Munoz-Torres</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Helt</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>JBrowse: A Dynamic Web Platform for Genome Visualization and Analysis</article-title>. <source>Genome Biol.</source> <volume>17</volume> (<issue>1</issue>), <fpage>66</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-016-0924-1</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Devisetty</surname>
<given-names>U. K.</given-names>
</name>
<name>
<surname>Kennedy</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Sarando</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Merchant</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Lyons</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Bringing Your Tools to CyVerse Discovery Environment Using Docker</article-title>. <source>F1000Res</source> <volume>5</volume>, <fpage>1442</fpage>. <pub-id pub-id-type="doi">10.12688/f1000research.8935.1</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dooley</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Vaughn</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Stanzione</surname>
<given-names>D. C.</given-names>
</name>
<name>
<surname>Terry</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>Software-as-a-Service: The iPlant Foundation API</article-title>,&#x201d; in <conf-name>5th IEEE Workshop on Many-Task Computing on Grids and Supercomputers (MTAGS)</conf-name>, <conf-loc>Salt Lake City, Utah, USA</conf-loc>. </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dowell</surname>
<given-names>R. D.</given-names>
</name>
<name>
<surname>Jokerst</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Day</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Eddy</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Stein</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>The Distributed Annotation System</article-title>. <source>BMC Bioinform.</source> <volume>2</volume>, <fpage>7</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-2-7</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Freese</surname>
<given-names>N. H.</given-names>
</name>
<name>
<surname>Norris</surname>
<given-names>D. C.</given-names>
</name>
<name>
<surname>Loraine</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Integrated Genome Browser: Visual Analytics Platform for Genomics</article-title>. <source>Bioinformatics</source> <volume>32</volume> (<issue>14</issue>), <fpage>2089</fpage>&#x2013;<lpage>2095</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw069</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giardine</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Riemer</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Hardison</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Burhans</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Elnitski</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>Galaxy: A Platform for Interactive Large-Scale Genome Analysis</article-title>. <source>Genome Res.</source> <volume>15</volume> (<issue>10</issue>), <fpage>1451</fpage>&#x2013;<lpage>1455</lpage>. <pub-id pub-id-type="doi">10.1101/gr.4086505</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goff</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Vaughn</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>McKay</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lyons</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Stapleton</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Gessler</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>The iPlant Collaborative: Cyberinfrastructure for Plant Biology</article-title>. <source>Front. Plant Sci.</source> <volume>2</volume>, <fpage>34</fpage>. <pub-id pub-id-type="doi">10.3389/fpls.2011.00034</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gulledge</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Roberts</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Vora</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Patel</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Loraine</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Mining <italic>Arabidopsis thaliana</italic> RNA-Seq Data with Integrated Genome Browser Reveals Stress-Induced Alternative Splicing of the Putative Splicing Regulator SR45a</article-title>. <source>Am. J. Bot.</source> <volume>99</volume> (<issue>2</issue>), <fpage>219</fpage>&#x2013;<lpage>231</lpage>. <pub-id pub-id-type="doi">10.3732/ajb.1100355</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gulledge</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Vora</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Patel</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Loraine</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A Protocol for Visual Analysis of Alternative Splicing in RNA-Seq Data Using Integrated Genome Browser</article-title>. <source>Methods Mol. Biol.</source> <volume>1158</volume>, <fpage>123</fpage>&#x2013;<lpage>137</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-4939-0700-7_8</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Howe</surname>
<given-names>K. L.</given-names>
</name>
<name>
<surname>Achuthan</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Alvarez-Jarreta</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Amode</surname>
<given-names>M. R.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Ensembl 2021</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume> (<issue>D1</issue>), <fpage>D884</fpage>&#x2013;<lpage>D891</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa942</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hubbard</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bomhoff</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>fRNAkenseq: A Fully Powered-By-CyVerse Cloud Integrated RNA-Sequencing Analysis Tool</article-title>. <source>PeerJ</source> <volume>8</volume>, <fpage>e8592</fpage>. <pub-id pub-id-type="doi">10.7717/peerj.8592</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jenkinson</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Albrecht</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Birney</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Blankenburg</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Down</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Finn</surname>
<given-names>R. D.</given-names>
</name>
<etal/>
</person-group> (<year>2008</year>). <article-title>Integrating Biological Data-Tthe Distributed Annotation System</article-title>. <source>BMC Bioinforma.</source> <volume>9</volume> (<issue>Suppl. 8</issue>), <fpage>S3</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-9-S8-S3</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Katz</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>E. T.</given-names>
</name>
<name>
<surname>Silterra</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Schwartz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Thorvaldsd&#xf3;ttir</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Quantitative Visualization of Alternative Exon Expression from RNA-Seq Data</article-title>. <source>Bioinformatics</source> <volume>31</volume> (<issue>14</issue>), <fpage>2400</fpage>&#x2013;<lpage>2402</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv034</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kent</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Sugnet</surname>
<given-names>C. W.</given-names>
</name>
<name>
<surname>Furey</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Roskin</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Pringle</surname>
<given-names>T. H.</given-names>
</name>
<name>
<surname>Zahler</surname>
<given-names>A. M.</given-names>
</name>
<etal/>
</person-group> (<year>2002</year>). <article-title>The Human Genome Browser at UCSC</article-title>. <source>Genome Res.</source> <volume>12</volume> (<issue>6</issue>), <fpage>996</fpage>&#x2013;<lpage>1006</lpage>. <pub-id pub-id-type="doi">10.1101/gr.229102</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Pertea</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Trapnell</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pimentel</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kelley</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Salzberg</surname>
<given-names>S. L.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>TopHat2: Accurate Alignment of Transcriptomes in the Presence of Insertions, Deletions and Gene Fusions</article-title>. <source>Genome Biol.</source> <volume>14</volume> (<issue>4</issue>), <fpage>R36</fpage>. <pub-id pub-id-type="doi">10.1186/gb-2013-14-4-r36</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leinonen</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sugawara</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Shumway</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>The Sequence Read Archive</article-title>. <source>Nucleic Acids Res.</source> <volume>39</volume>, <fpage>D19</fpage>&#x2013;<lpage>D21</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkq1019</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Handsaker</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wysoker</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fennell</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ruan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Homer</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>The Sequence Alignment/Map Format and SAMtools</article-title>. <source>Bioinformatics</source> <volume>25</volume> (<issue>16</issue>), <fpage>2078</fpage>&#x2013;<lpage>2079</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp352</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Loraine</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Blakley</surname>
<given-names>I. C.</given-names>
</name>
<name>
<surname>Jagadeesan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Harper</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Firon</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Analysis and Visualization of RNA-Seq Expression Data Using RStudio, Bioconductor, and Integrated Genome Browser</article-title>. <source>Methods Mol. Biol.</source> <volume>1284</volume>, <fpage>481</fpage>&#x2013;<lpage>501</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-4939-2444-8_24</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mall</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Eckstein</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Norris</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Vora</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Freese</surname>
<given-names>N. H.</given-names>
</name>
<name>
<surname>Loraine</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>ProtAnnot: An App for Integrated Genome Browser to Display How Alternative Splicing and Transcription Affect Proteins</article-title>. <source>Bioinformatics</source> <volume>32</volume> (<issue>16</issue>), <fpage>2499</fpage>&#x2013;<lpage>2501</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw068</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Merchant</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Lyons</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Goff</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vaughn</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ware</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Micklos</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>The iPlant Collaborative: Cyberinfrastructure for Enabling Data to Discovery for the Life Sciences</article-title>. <source>PLoS Biol.</source> <volume>14</volume> (<issue>1</issue>), <fpage>e1002342</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pbio.1002342</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nicol</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Helt</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Blanchard</surname>
<given-names>S. G.</given-names>
<suffix>Jr.</suffix>
</name>
<name>
<surname>Raja</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Loraine</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>The Integrated Genome Browser: Free Software for Distribution and Exploration of Genome-Scale Datasets</article-title>. <source>Bioinformatics</source> <volume>25</volume> (<issue>20</issue>), <fpage>2730</fpage>&#x2013;<lpage>2731</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btp472</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pedersen</surname>
<given-names>B. S.</given-names>
</name>
<name>
<surname>Collins</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Talkowski</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Quinlan</surname>
<given-names>A. R.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Indexcov: Fast Coverage Quality Control for Whole-Genome Sequencing</article-title>. <source>Gigascience</source> <volume>6</volume> (<issue>11</issue>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1093/gigascience/gix090</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Procter</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Carstairs</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Soares</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Mour&#xe3;o</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ofoegbu</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Barton</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Alignment of Biological Sequences with Jalview</article-title>. <source>Methods Mol. Biol.</source> <volume>2231</volume>, <fpage>203</fpage>&#x2013;<lpage>224</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-0716-1036-7_13</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ram&#xed;rez</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ryan</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Gr&#xfc;ning</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bhardwaj</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Kilpert</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Richter</surname>
<given-names>A. S.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>deepTools2: a Next Generation Web Server for Deep-Sequencing Data Analysis</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume> (<issue>W1</issue>), <fpage>W160</fpage>&#x2013;<lpage>W165</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkw257</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raney</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Dreszer</surname>
<given-names>T. R.</given-names>
</name>
<name>
<surname>Barber</surname>
<given-names>G. P.</given-names>
</name>
<name>
<surname>Clawson</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fujita</surname>
<given-names>P. A.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Track Data Hubs Enable Visualization of User-Defined Genome-Wide Annotations on the UCSC Genome Browser</article-title>. <source>Bioinformatics</source> <volume>30</volume> (<issue>7</issue>), <fpage>1003</fpage>&#x2013;<lpage>1005</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt637</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Robinson</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Thorvaldsd&#xf3;ttir</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Winckler</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Guttman</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lander</surname>
<given-names>E. S.</given-names>
</name>
<name>
<surname>Getz</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Integrative Genomics Viewer</article-title>. <source>Nat. Biotechnol.</source> <volume>29</volume> (<issue>1</issue>), <fpage>24</fpage>&#x2013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.1754</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="web">
<collab>UCSC</collab> (<year>2021</year>). <article-title>REST API Data Interface [Online]</article-title>. <comment>Available: <ext-link ext-link-type="uri" xlink:href="https://genome.ucsc.edu/goldenPath/help/api.html">https://genome.ucsc.edu/goldenPath/help/api.html</ext-link> (Accessed August 20, 2021)</comment>. </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Van Buren</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ware</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>SciApps: a Cloud-Based Platform for Reproducible Bioinformatics Workflows</article-title>. <source>Bioinformatics</source> <volume>34</volume> (<issue>22</issue>), <fpage>3917</fpage>&#x2013;<lpage>3920</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty439</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilkins-Diehr</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Gannon</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Klimeck</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Oster</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pamidighantam</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>TeraGrid Science Gateways and Their Impact on Science</article-title>. <source>Computer</source> <volume>41</volume> (<issue>11</issue>), <fpage>32</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1109/MC.2008.470</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yoshimura</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Mori</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yokoyama</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Koike</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tanabe</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Sato</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Identification of Alternative Splicing Events Regulated by an Arabidopsis Serine/arginine-Like Protein, atSR45a, in Response to High-Light Stress Using a Tiling Array</article-title>. <source>Plant Cell Physiol.</source> <volume>52</volume> (<issue>10</issue>), <fpage>1786</fpage>&#x2013;<lpage>1805</lpage>. <pub-id pub-id-type="doi">10.1093/pcp/pcr115</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>