<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Toxicol.</journal-id>
<journal-title>Frontiers in Toxicology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Toxic.</abbrev-journal-title>
<issn pub-type="epub">2673-3080</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">893924</article-id>
<article-id pub-id-type="doi">10.3389/ftox.2022.893924</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Toxicology</subject>
<subj-group>
<subject>Technology and Code</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Development of the InTelligence And Machine LEarning (TAME) Toolkit for Introductory Data Science, Chemical-Biological Analyses, Predictive Modeling, and Database Mining for Environmental Health Research</article-title>
<alt-title alt-title-type="left-running-head">Roell et al.</alt-title>
<alt-title alt-title-type="right-running-head">Environmental Health Data Science Training</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Roell</surname>
<given-names>Kyle</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/404125/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Koval</surname>
<given-names>Lauren E.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Boyles</surname>
<given-names>Rebecca</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/607253/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Patlewicz</surname>
<given-names>Grace</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/581692/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ring</surname>
<given-names>Caroline</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rider</surname>
<given-names>Cynthia V.</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/784249/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ward-Caviness</surname>
<given-names>Cavin</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Reif</surname>
<given-names>David M.</given-names>
</name>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/275221/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jaspers</surname>
<given-names>Ilona</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
<xref ref-type="aff" rid="aff9">
<sup>9</sup>
</xref>
<xref ref-type="aff" rid="aff10">
<sup>10</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fry</surname>
<given-names>Rebecca C.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/66056/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Rager</surname>
<given-names>Julia E.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
<xref ref-type="aff" rid="aff9">
<sup>9</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/612830/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>The Institute for Environmental Health Solutions</institution>, <institution>Gillings School of Global Public Health</institution>, <institution>The University of North Carolina at Chapel Hill</institution>, <addr-line>Chapel Hill</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Environmental Sciences and Engineering</institution>, <institution>Gillings School of Global Public Health</institution>, <institution>The University of North Carolina at Chapel Hill</institution>, <addr-line>Chapel Hill</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Research Computing</institution>, <institution>RTI International</institution>, <addr-line>Durham</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Center for Computational Toxicology and Exposure</institution>, <institution>US Environmental Protection Agency</institution>, <addr-line>Durham</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Division of the National Toxicology Program</institution>, <institution>National Institute of Environmental Health Sciences</institution>, <addr-line>Durham</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Center for Public Health and Environmental Assessment</institution>, <institution>US Environmental Protection Agency</institution>, <addr-line>Chapel Hill</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff7">
<sup>7</sup>
<institution>Bioinformatics Research Center</institution>, <institution>Department of Biological Sciences</institution>, <institution>North Carolina State University</institution>, <addr-line>Raleigh</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff8">
<sup>8</sup>
<institution>Curriculum in Toxicology and Environmental Medicine</institution>, <institution>School of Medicine</institution>, <institution>University of North Carolina</institution>, <addr-line>Chapel Hill</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff9">
<sup>9</sup>
<institution>Center for Environmental Medicine, Asthma and Lung Biology</institution>, <institution>School of Medicine</institution>, <institution>University of North Carolina</institution>, <addr-line>Chapel Hill</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<aff id="aff10">
<sup>10</sup>
<institution>Department of Pediatrics, Microbiology and Immunology</institution>, <institution>School of Medicine</institution>, <institution>University of North Carolina</institution>, <addr-line>Chapel Hill</addr-line>, <addr-line>NC</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/691767/overview">Susan Tilton</ext-link>, Oregon State University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1603580/overview">Shannon Bell</ext-link>, Integrated Laboratory Systems, Inc., United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1807290/overview">James Auman</ext-link>, Inotiv, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Julia E. Rager, <email>jrager@unc.edu</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work and share first authorship</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Computational Toxicology and Informatics, a section of the journal Frontiers in Toxicology</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>06</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>4</volume>
<elocation-id>893924</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>03</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>05</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Roell, Koval, Boyles, Patlewicz, Ring, Rider, Ward-Caviness, Reif, Jaspers, Fry and Rager.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Roell, Koval, Boyles, Patlewicz, Ring, Rider, Ward-Caviness, Reif, Jaspers, Fry and Rager</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Research in environmental health is becoming increasingly reliant upon data science and computational methods that can more efficiently extract information from complex datasets. Data science and computational methods can be leveraged to better identify relationships between exposures to stressors in the environment and human disease outcomes, representing critical information needed to protect and improve global public health. Still, there remains a critical gap surrounding the training of researchers on these <italic>in silico</italic> methods. We aimed to address this gap by developing the inTelligence And Machine lEarning (TAME) Toolkit, promoting trainee-driven data generation, management, and analysis methods to &#x201c;TAME&#x201d; data in environmental health studies. Training modules were developed to provide applications-driven examples of data organization and analysis methods that can be used to address environmental health questions. Target audiences for these modules include students, post-baccalaureate and post-doctorate trainees, and professionals that are interested in expanding their skillset to include recent advances in data analysis methods relevant to environmental health, toxicology, exposure science, epidemiology, and bioinformatics/cheminformatics. Modules were developed by study coauthors using annotated script and were organized into three chapters within a GitHub Bookdown site. The first chapter of modules focuses on introductory data science, which includes the following topics: setting up R/RStudio and coding in the R environment; data organization basics; finding and visualizing data trends; high-dimensional data visualizations; and Findability, Accessibility, Interoperability, and Reusability (FAIR) data management practices. The second chapter of modules incorporates chemical-biological analyses and predictive modeling, spanning the following methods: dose-response modeling; machine learning and predictive modeling; mixtures analyses; -omics analyses; toxicokinetic modeling; and read-across toxicity predictions. The last chapter of modules was organized to provide examples on environmental health database mining and integration, including chemical exposure, health outcome, and environmental justice indicators. Training modules and associated data are publicly available online (<ext-link ext-link-type="uri" xlink:href="https://uncsrp.github.io/Data-Analysis-Training-Modules/">https://uncsrp.github.io/Data-Analysis-Training-Modules/</ext-link>). Together, this resource provides unique opportunities to obtain introductory-level training on current data analysis methods applicable to 21st century science and environmental health.</p>
</abstract>
<abstract abstract-type="graphical">
<title>Graphical Abstract</title>
<p>
<graphic xlink:href="FTOX_ftox-2022-893924_wc_abs.tif" position="anchor"/>
</p>
</abstract>
<kwd-group>
<kwd>bioinformatics and computational biology</kwd>
<kwd>cheminformatics</kwd>
<kwd>data science</kwd>
<kwd>epidemiology</kwd>
<kwd>exposure science</kwd>
<kwd>machine learning</kwd>
<kwd>public health</kwd>
<kwd>toxicology</kwd>
</kwd-group>
<contract-num rid="cn001">P42ES031007 T32ES007126 UH3OD023348 ZIA ES103316-05</contract-num>
<contract-sponsor id="cn001">National Institutes of Health<named-content content-type="fundref-id">10.13039/100000002</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Highlights</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; Training that translates data science into environmental health research is needed</p>
</list-item>
<list-item>
<p>&#x2022; Modules were developed to teach coding basics and introductory data science</p>
</list-item>
<list-item>
<p>&#x2022; Also cover chemical-biological modeling, machine learning, and database mining</p>
</list-item>
<list-item>
<p>&#x2022; Modules exemplify methods to uniquely address environmental health issues</p>
</list-item>
<list-item>
<p>&#x2022; Modules allow for improved training towards current data analysis methods</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2">
<title>1 Introduction</title>
<p>The field of environmental health is rapidly expanding efforts aimed at the improved data science methods and data integration. Data produced in environmental health studies are becoming larger, with increased resolution and expanded variable coverage paralleling technological advancements and improved record keeping. These data now serve as critical resources to increase the understanding of relationships between chemicals in the environment and disease outcomes. Multiple organizations have recently advocated for increased reliance and proficiency surrounding <italic>in silico</italic> approaches to advance the science of toxicity testing, improve chemical exposure assessments, and increase data sharing and associated analysis tools (<xref ref-type="bibr" rid="B51">NAS, 2007</xref>; <xref ref-type="bibr" rid="B52">NAS, 2017</xref>; <xref ref-type="bibr" rid="B27">EU, 2019</xref>; <xref ref-type="bibr" rid="B29">Florance, 2020</xref>; <xref ref-type="bibr" rid="B84">Sim et al., 2020</xref>; <xref ref-type="bibr" rid="B26">EPA, U.S, 2021b</xref>). However, there remains high demand for personnel that are adequately trained to analyze and manage large datasets to address environmental health issues, representing a timely concern that requires updated resources and training opportunities. We therefore aimed to contribute towards this critical gap through the development of an online toolkit, titled the inTelligence And Machine lEarning (TAME) Toolkit, to promote didactic data generation, management, and analysis methods to &#x201c;TAME&#x201d; data in environmental health studies.</p>
<p>The TAME Toolkit was developed to provide a publicly available, self-guided tour on topics spanning introduction to computer programming, chemical-biological analyses, predictive modeling, and environmental health database mining. The majority of computer programming information and examples provided within the TAME Toolkit were based in the R coding language, since this coding environment is publicly available, widely used, and well-documented (<xref ref-type="bibr" rid="B87">The R Project for Statistical Computing, 2021</xref>). R is specifically available as Free Software under the Free Software Foundation&#x2019;s GNU General Public License and can be run across all major platforms and operating systems, including Unix, Windows, and MacOS. Because of this open licensing format, R has emerged as an avenue for world-wide collaboration, benefiting from the continual expansion through thousands of user-developed packages that aid in improved data analyses and methods sharing. Packages have varying utilities, spanning basic organization and manipulation of data to cutting-edge approaches to parse and analyze data through artificial intelligence (AI) and/or machine learning (ML) (<xref ref-type="bibr" rid="B21">CRAN, 2021a</xref>; <xref ref-type="bibr" rid="B6">Bioconductor, 2021</xref>).</p>
<p>Data analysis examples were included in the TAME Toolkit to span topics relevant to environmental health, which is notably multi-disciplinary and includes exposure science, epidemiology, toxicology, bioinformatics/cheminformatics, and related disciplines. Examples were developed by the team of authors, pulling from their real-world datasets and expertise in environmental health data analytics. Training modules were organized to include examples of each authors&#x2019; area of expertise, to provide a broad foundation in data science methods relevant to environmental health. Modules contained within the TAME Toolkit were organized into three chapters spanning 1) introductory data science; 2) chemical-biological analyses and predictive modeling; and 3) environmental health database mining. Modules were designed to aid in the training of students, post-baccalaureate and post-doctorate trainees, and professionals that are interested in expanding their skillsets surrounding data analysis techniques relevant to environmental health, toxicology, exposure science, epidemiology, and bioinformatics/cheminformatics. These modules will continue to be expanded and improved upon in the coming years, to continue the expanded use of data management and analysis tools to address timely environmental health research topics and promote meaningful collaborations across this multi-disciplinary field of study.</p>
</sec>
<sec id="s3">
<title>2 Methods</title>
<sec id="s2-1">
<title>2.1 Overall Approach to Organizing the inTelligence And Machine lEarning Toolkit</title>
<p>The TAME Toolkit was developed with the goal of guiding participants with various backgrounds through data organization and analysis methods that are useful towards evaluating big data in exposure science, epidemiology, toxicology, and environmental health studies. Modules were developed to cover three primary focuses (organized into chapters): 1) introductory data science; 2) chemical-biological analyses and predictive modeling; and 3) environmental health database mining (<xref ref-type="fig" rid="F1">Figure 1</xref>). Applications-based environmental health questions are posed to keep participants actively engaged. These questions also aid in the translation of methods towards real-world exposure science, toxicology, and public health issues. These modules were developed based on examples from our ongoing research efforts using environmental health datasets and/or related data generated for these training purposes.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Overall organization of the TAME Toolkit, developed to promote trainee-driven data generation, management, and analysis methods to &#x201c;TAME&#x201d; data in exposure science, toxicology, and environmental health research. Individual training modules were developed in R coding language to provide applications-based training in the broad categories of data science, chemical-biological analyses and predictive modeling, and environmental health database mining.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g001.tif"/>
</fig>
<p>The TAME Toolkit was developed to provide an overview of example approaches to analyzing data that are highly relevant to exposure science, toxicology, and environmental health research applications. Rather than including an exhaustive list of techniques and tutorials that covers all potentially relevant methods, we instead highlight example methods and datasets that are tangible and cover important aspects of data organization, visualization, and analysis within the environmental health research field. Topics of training modules were selected to include current approaches that are of high interest in 21st century toxicology and exposure science and new approach methodologies (<xref ref-type="bibr" rid="B51">NAS, 2007</xref>; <xref ref-type="bibr" rid="B52">NAS, 2017</xref>; <xref ref-type="bibr" rid="B92">Wambaugh et al., 2019</xref>; <xref ref-type="bibr" rid="B99">Zavala et al., 2020</xref>) that also align with the coauthors&#x2019; areas of expertise. These examples are provided in the TAME Toolkit through organized training modules that are purposely stand-alone and discrete, as opposed to organizing modules that depend on participants having successfully completed all preceding modules and associated analyses. This format was selected to allow participants to engage in specific analysis topics they are interested in learning in achievable spans of time.</p>
<p>Modules were developed to include helpful resources throughout the code, such that users aiming to further their education/development have access to additional learning opportunities and analysis methodologies to explore. Notably, each module was not designed as a complete guide to conduct research on a specific topic; rather, modules were designed as a starting point for a data analysis technique. Additional guidance and resources were incorporated throughout each of the training modules, particularly within the introduction sections as well as the final concluding remarks sections, to point participants to additional examples and guidance when interested. These additional resources spanned book chapters and guidance documents dedicated to the specific data analysis topic, as well as example peer-reviewed, published literature. All concepts within the TAME Toolkit were selected to include important techniques that can be incorporated into environmental health studies, and thus, provide a cohesive set of skills that trainees can leverage within their current research studies, real-world applications, and/or future job marketability, depending on their career stage.</p>
</sec>
<sec id="s2-2">
<title>2.2 Target Audience of the TAME Toolkit</title>
<p>Data training modules were designed for the following target audiences: 1) academic students obtaining degrees in environmental health, toxicology, exposure science, epidemiology, bioinformatics, and related disciplines; 2) post-baccalaureate and post-doctorate trainees that are working in the environmental health research arena; 3) professionals in academia, government, or industry that are interested in expanding their skillset to include recent advances in data analysis methods relevant to environmental health, toxicology, exposure science, epidemiology, and bioinformatics/cheminformatics. These participants would ideally have some level of training in basic biology, chemistry, environmental science, toxicology, and/or epidemiology, though training modules were organized to provide background information and helpful resources to provide background reading/training materials for content that participants may benefit from if lacking a certain background across the multi-disciplinary field of environmental health.</p>
</sec>
<sec id="s2-3">
<title>2.3 Data Training Module Development and Underlying Technologies</title>
<p>The TAME training modules were developed and made publicly available through the UNC Superfund Research Program (UNC-SRP) Github website, specifically through a Bookdown website available at: <ext-link ext-link-type="uri" xlink:href="https://uncsrp.github.io/Data-Analysis-Training-Modules/">https://uncsrp.github.io/Data-Analysis-Training-Modules/</ext-link>. This interface was selected as the primary landing site for these modules because of its smooth communication between R/RStudio/Markdown. Furthermore, all module example datasets and script files could be easily organized and posted to the parent Github webpage, publicly available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/UNCSRP">https://github.com/UNCSRP</ext-link>. Github was selected as the primary warehouse of these data and associated script, as it currently represents the most commonly used platform for storing, tracking, and collaborating on software/computing projects spanning over 73 million developers, 4 million organizations, and 200 million repositories (<xref ref-type="bibr" rid="B32">Github, 2022</xref>). We organized the TAME Toolkit script and underlying data to be available to participants through both Bookdown and Github to meet the learning preferences of each participant. This structure specifically allows participants to follow through finalized training modules online that are published through Bookdown, and it also allows participants to download the raw data and script from the parent Github webpage and run the modules on their own computers through their local computing systems and preferred programming structure.</p>
<p>Each training module was specifically developed in R Markdown, which is a LaTeX-like documentation format that allows developers to draft comprehensive documentation throughout R-based scripts (<xref ref-type="bibr" rid="B5">Baumer and Udwin, 2015</xref>). R Markdown is also advantageous in that programmers can run their code in entirety on their computer and save a &#x201c;knitted&#x201d; version of the code that also displays messages, results, and graphics that are produced when running each line of code. These R Markdown files (with .Rmd extensions) represent the specific script documents that were uploaded to Github, alongside README files and associated input/output data files used during the scripted activities. The final knitted html files of each training module&#x2019;s R Markdown script represent the actual file that was used in the online posting to Bookdown.</p>
</sec>
<sec id="s2-4">
<title>2.4 Data Training Module Evaluation</title>
<p>Modules were beta tested through the delivery of teaching materials within Dr. Julia Rager&#x2019;s new course at UNC, titled &#x201c;Computational Toxicology and Exposure Science.&#x201d; UNC students, largely consisting of graduate-level students in environmental science, toxicology, and public health, provided feedback on each module&#x2019;s content <italic>via</italic> anonymous course surveys and classroom-led conversations. These suggestions were incorporated into the final versions of the training modules with the goal of achieving the TAME Toolkit objectives and broadening target audiences and dissemination into the greater scientific community. Select training modules have also been disseminated <italic>via</italic> hands-on training workshops. Feedback surrounding training module content was similarly collected <italic>via</italic> anonymous course surveys and incorporated into revised training materials.</p>
</sec>
<sec id="s2-5">
<title>2.5 TAME Toolkit Contributors</title>
<p>TAME Toolkit content and associated training modules were developed by experts in environmental health research and data science. These experts were selected to contribute to the TAME Toolkit based on the following qualifications: Dr. Kyle Roell is the lead Data Analyst for the Institute for Environmental Health Solutions at the University of North Carolina at Chapel Hill (UNC) and is an experienced software developer and programmer with expertise in bioinformatics and statistical genetics (<xref ref-type="bibr" rid="B100">Zhang et al., 2017</xref>; <xref ref-type="bibr" rid="B80">Roell et al., 2019</xref>; <xref ref-type="bibr" rid="B79">Roell et al., 2021</xref>). Ms. Koval is a graduate student in the UNC Department of Environmental Sciences and Engineering and is contributing towards ongoing studies on human health and the environment (<xref ref-type="bibr" rid="B76">Ring et al., 2021</xref>; <xref ref-type="bibr" rid="B9">Carberry et al., 2022</xref>). Rebecca Boyles is a Director of the Center for Data Modernization Solutions at RTI International. Ms. Boyles is an expert at data driven research collaborations and the implementation of computational approaches to ensure research data are Findable, Accessible, Interoperable, and Reusable (FAIR) (<xref ref-type="bibr" rid="B7">Boyles et al., 2019</xref>; <xref ref-type="bibr" rid="B78">Robasky et al., 2020</xref>; <xref ref-type="bibr" rid="B36">Holmgren et al., 2021</xref>). Dr. Grace Patlewicz is a Chemist at the U.S. Environmental Protection Agency (U.S. EPA) and leader of chemical read-across applications towards chemical safety assessments (<xref ref-type="bibr" rid="B35">Helman et al., 2019b</xref>; <xref ref-type="bibr" rid="B54">Nelms et al., 2020</xref>; <xref ref-type="bibr" rid="B83">Shah et al., 2021</xref>). Dr. Caroline Ring is a Principal Investigator at the U.S. EPA and leader in computational exposure science and toxicology approaches for chemical regulatory safety assessments, with particular expertise in toxicokinetics (<xref ref-type="bibr" rid="B75">Ring et al., 2017</xref>; <xref ref-type="bibr" rid="B74">Ring et al., 2019</xref>; <xref ref-type="bibr" rid="B76">Ring et al., 2021</xref>). Dr. Cynthia Rider is a Toxicologist at the National Institute of Environmental Health Sciences and a leading expert in the chemical safety and risk assessment of chemical mixtures (<xref ref-type="bibr" rid="B10">Catlin et al., 2018</xref>; <xref ref-type="bibr" rid="B81">Ryan et al., 2019</xref>; <xref ref-type="bibr" rid="B73">Rider et al., 2021</xref>). Dr. Cavin Ward-Caviness is a Computational Biologist and Principal Investigator at the U.S. EPA, and he leads studies integrating geospatial exposure measures with molecular biomarkers and health outcome data to understand the impacts of chemical pollutants and social determinants of health (<xref ref-type="bibr" rid="B95">Ward-Caviness et al., 2020</xref>; <xref ref-type="bibr" rid="B48">Martin et al., 2021</xref>; <xref ref-type="bibr" rid="B94">Ward-Caviness et al., 2021</xref>). Dr. David Reif is a Professor in the Department of Biological Sciences at North Carolina State University (NCSU) and Director of the NCSU Bioinformatics Consulting and Services Core. Dr. Reif leads studies implementing computational modeling approaches to leverage big data in predicting exposure and disease outcomes (<xref ref-type="bibr" rid="B43">Kosnik and Reif, 2019</xref>; <xref ref-type="bibr" rid="B33">Green et al., 2021</xref>; <xref ref-type="bibr" rid="B49">Marvel et al., 2021</xref>). Dr. Jaspers is a Professor in the Department of Pediatrics, Microbiology and Immunology at UNC, and is the Director of the Curriculum in Toxicology and Environmental Medicine and Director of the Center for Center for Environmental Medicine, Asthma and Lung Biology. Dr. Jaspers leads studies integrating medicine with environmental health research, combining data from clinical, toxicological, and molecular biology study designs (<xref ref-type="bibr" rid="B37">Jaspers et al., 1997</xref>; <xref ref-type="bibr" rid="B63">Rager et al., 2013</xref>; <xref ref-type="bibr" rid="B70">Rebuli et al., 2021</xref>). Dr. Fry is a Professor of Environmental Sciences and Engineering and is the Director of the UNC-Chapel Hill Superfund Research Program and the Director of the Institute for Environmental Health Solutions. Dr. Fry leads studies integrating genomic and epigenomic approaches within epidemiological, toxicological, and clinical study designs to identify mechanisms of environmental exposure-induced disease and organize solution-oriented intervention (<xref ref-type="bibr" rid="B30">Fry et al., 2007</xref>; <xref ref-type="bibr" rid="B85">Smeester et al., 2011</xref>; <xref ref-type="bibr" rid="B31">Fry et al., 2012</xref>; <xref ref-type="bibr" rid="B46">Manuck et al., 2021a</xref>). Dr. Julia Rager is an Assistant Professor in the UNC Department of Environmental Sciences and Engineering, and she leads studies evaluating the health impacts of environmental exposures through bioinformatic approaches aimed at integrating chemical-biological signatures to elucidate primary disease drivers and their underlying biological mechanisms (<xref ref-type="bibr" rid="B69">Rager et al., 2015</xref>; <xref ref-type="bibr" rid="B59">Rager et al., 2017</xref>; <xref ref-type="bibr" rid="B16">Clark et al., 2021</xref>; <xref ref-type="bibr" rid="B64">Rager et al., 2021</xref>). Collectively, this team of environmental health research experts were well-equipped to develop training materials within the TAME Toolkit.</p>
</sec>
</sec>
<sec id="s4">
<title>3 Results</title>
<p>TAME Toolkit training modules are now publicly available, promoting trainee-driven data generation, management, and analysis methods to &#x201c;TAME&#x201d; data in environmental health studies. These modules are publicly accessible (<ext-link ext-link-type="uri" xlink:href="https://uncsrp.github.io/Data-Analysis-Training-Modules/">https://uncsrp.github.io/Data-Analysis-Training-Modules/</ext-link>), with underlying code and datasets available in the parent UNC-SRP GitHub website (<ext-link ext-link-type="uri" xlink:href="https://github.com/UNCSRP">https://github.com/UNCSRP</ext-link>). Descriptions of each training module are provided below alongside their associated datasets and primary analysis findings. Collectively, training modules serve as representative examples that address research questions relevant to environmental health including topics of toxicology, exposure science, epidemiology, bioinformatics, and related fields of study.</p>
<sec id="s3-1">
<title>3.1 Introductory Data Science</title>
<p>This series of TAME Toolkit training modules begins with introductory-level training on setting up R/RStudio, coding, data organization basics, basic methods to identify and visualize trends in data, and visualize high-dimensional data (modules 1.1&#x2013;1.4). Introductory data science materials have previously been covered by other groups/online resources (<xref ref-type="bibr" rid="B96">Wickham and Grolemund, 2017</xref>; <xref ref-type="bibr" rid="B1">Adair et al., 2021</xref>; <xref ref-type="bibr" rid="B20">Coursera, 2021</xref>), and we therefore provide a high-level overview of these introductory modules below. A more focused description begins with the next training module, which serves as a novel introduction to FAIR data management practices (module 1.5). This module is also the first to incorporate questions specific to environmental health, which are included throughout the remaining training modules.</p>
<sec id="s3-1-1">
<title>3.1.1 Introduction to Coding in R</title>
<p>The objective of this module is to provide an introduction to coding through the R language and its associated environment, RStudio. This objective is met by first detailing instructions with corresponding screenshots describing how to download/install both of these programs. An introduction on installing and loading packages in R is then provided. Scripting basics are detailed, including setting a working directory, importing and exporting files, and viewing data within the R console/RStudio environment. The importance of this module is that it provides the foundation needed for participants to become acclimated and set-up for running R programming on their computing systems.</p>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Data Organization Basics</title>
<p>The objective of this module is to provide an introduction on data organization methods. This objective is met by presenting basic data organization methods using an example environmentally relevant human cohort dataset. This cohort was generated by creating data distributions randomly pulled from our previous publications (<xref ref-type="bibr" rid="B60">Rager et al., 2014a</xref>; <xref ref-type="bibr" rid="B17">Clark et al., 2019</xref>; <xref ref-type="bibr" rid="B57">Payton et al., 2020</xref>; <xref ref-type="bibr" rid="B16">Clark et al., 2021</xref>), resulting in a bespoke dataset for these training purposes. Data include subject information/demographic data, as well as environmental exposure data, focusing on metals concentrations in drinking water and human urine samples. Data organization methods that are demonstrated in this training module include merging, filtering, subsetting, melting, and casting. These important methods are demonstrated using base R functions, as well as the commonly implemented package, <italic>Tidyverse</italic>, that allows users to more efficiently organize and manipulate datasets in R (<xref ref-type="bibr" rid="B22">CRAN, 2021b</xref>). The importance of this module is that it provides basic skills needed to organize data, in general, within the coding environment, representing a foundational skill that must be acquired prior to running any scripted analysis.</p>
</sec>
<sec id="s3-1-3">
<title>3.1.3 Finding and Visualizing Data Trends</title>
<p>The objective of this module is to provide an overview of basic statistical tests and data visualizations. This objective is met leveraging the same example cohort with environmentally relevant data introduced in <xref ref-type="sec" rid="s3-1-2">Section 3.1.2</xref>. Tests for normality are first presented, alongside methods to plot histograms and boxplots to view data distributions. Basic statistical tests are then presented, including the <italic>t</italic>-test, analysis of variance, regression modeling, chi-squared test, and Fisher&#x2019;s exact test. Additional example visualizations are provided alongside these statistical tests, including boxplots, scatterplots, and regression lines. These statistical tests are introductory-level, with more extensive examples and associated descriptions of statistical models in the proceeding applications-based training modules (e.g., modules 2.4, 2.5, 2.6, 3.2, and 3.3). The importance of this module is that it provides an overview of statistical methods that are very routinely employed within environmental health studies, and thus learning how to carry out these basic statistics represents a foundational skillset for anyone in this field of study.</p>
</sec>
<sec id="s3-1-4">
<title>3.1.4 High-Dimensional Data Visualizations</title>
<p>The objective of this module is to provide an introduction to methods that can be used to visualize high dimensional data. Approaches described in this training module include data formatting, data scaling, and the visualization of prepared datasets through density plots, GGally plots, boxplots, correlation plots, hierarchical clustering, and heatmaps. Visualization approaches are demonstrated using a large environmental chemistry dataset, based off a chemical analysis of smoke samples collected during lab-based simulations of wildfire events. These data have been previously published (<xref ref-type="bibr" rid="B40">Kim et al., 2018</xref>; <xref ref-type="bibr" rid="B64">Rager et al., 2021</xref>) and are used here as an example of an environmental dataset relevant to environmental health. These visualization methods are provided here at an introductory-level, with many other examples detailed throughout the majority of the next training modules. The importance of this module is that it provides ideas and techniques that can be used to visualize data relevant to environmental health, which are becoming increasingly high dimensional and thus, require these more sophisticated methods to adequately illustrate important data trends.</p>
</sec>
<sec id="s3-1-5">
<title>3.1.5 Findability, Accessibility, Interoperability, and Reusability Data Management Practices</title>
<p>The objective of this module is to introduce trainees to best data management practices in environmental health research. A method to ensure proper data management is the implementation of Findability, Accessibility, Interoperability, and Reusability (FAIR) practices (<xref ref-type="bibr" rid="B98">Wilkinson et al., 2016</xref>). This topic is receiving much attention in recent years through workshops, government reports, and publications which are published within the online training module. The following questions are addressed throughout this training module:<list list-type="simple">
<list-item>
<p>1) What is FAIR?</p>
</list-item>
<list-item>
<p>2) When was FAIR first developed?</p>
</list-item>
<list-item>
<p>3) When making data &#x201c;Findable,&#x201d; who and what should be able to find your data?</p>
</list-item>
<list-item>
<p>4) When saving/formatting your data, which of the following formats is preferred to meet FAIR principles: .pdf, .csv, or a proprietary output file from your instrument?</p>
</list-item>
<list-item>
<p>5) How can I find a suitable data repository for my data?</p>
</list-item>
</list>
</p>
<p>This module first provides an introduction to FAIR (<xref ref-type="fig" rid="F2">Figure 2</xref>), including a history of how this term was first developed and implemented. Trainees are then guided through each component of FAIR, organized by letter. To detail, the F in FAIR identifies components needed to make the meta(data) findable. These components include unique persistent identifiers and descriptive information (i.e., metadata) that can be searched by both humans and computer systems. The A components are designed to enable that meta(data) be available long-term, and accessed by humans and machines using standard communication protocols with clearly described limitations on reuse. The I components of the principles address needs for data exchange and interpretation by humans and machines which includes the use of controlled vocabularies or ontologies to describe meta(data) and to describe provenance relationships through appropriate data citation. The R components highlight needs for meta(data) to be reused and support integration such as sufficient description of the data and data use limitations. The training module then reviews different types of data repositories that can be used to publish datasets in exposure science, toxicology, and environmental health research. Lastly, this module provides participants with additional training resources, workshops, government reports, and example publications surrounding the use of FAIR data management practices. The importance of this module is that effective data management, organization, and longevity are becoming increasingly critical in ensuring studies are scientifically sound and reproducible, and thus, all scientists that are involved in the analysis of data for a project should be aware of these issues and implement them within their ongoing studies. Research funding agencies are additionally requiring increased attention surrounding data sharing and FAIR practices (<xref ref-type="bibr" rid="B55">NIH, 2022</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>An overview of the individual components of FAIR data management practices, resulting in the effective release of data products from exposure science, epidemiology, toxicology, and environmental health research.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g002.tif"/>
</fig>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Chemical-Biological Analyses and Predictive Modeling</title>
<p>This chapter of TAME Toolkit training modules covers approaches that can be used to carry out chemical and/or biological analyses and predictive modeling to better understand exposure-induced disease and underlying toxicological mechanisms. Modules span topics of dose-response modeling (module 2.1), machine learning and predictive modeling (2.2), mixtures analyses (2.3), -omics analyses and systems biology (2.4), toxicokinetic modeling (2.5), and read-across toxicity predictions (2.6). Environmental health questions are posed throughout these modules to maintain active engagement and provide tangibility on the use of the described methods towards environmental health applications. These training modules are detailed below.</p>
<sec id="s3-2-1">
<title>3.2.1 Dose-Response Modeling</title>
<p>The objective of this module is to provide an overview on analyzing toxicological response data in relation to exposure concentrations (or doses), resulting in the derivation of benchmark doses (BMDs). This topic is of high relevance to the field of environmental health, as BMDs represent values that are commonly used as the basis for evaluating risk in chemical safety evaluations, informing the levels at which chemicals may be regulated. This module specifically analyzes animal tumor incidence rates in response to exposure to a mock chemical tested across 12 different concentrations in drinking water. This dataset was generated for the specific purposes of this exercise, to allow for some interesting curve fits and a comparison between tissue site sensitivity to an example chemical insult. Several environmental health questions are posed throughout this module, including:<list list-type="simple">
<list-item>
<p>1) Which target tissue demonstrated the highest incidence of tumor formation from any single exposure dose?</p>
</list-item>
<list-item>
<p>2) Which target tissue&#x2019;s tumor incidence seems to not be related to dose?</p>
</list-item>
<list-item>
<p>3) Upon visual inspection of example log-logistic vs. Weibull model curve fits, can we confidently determine which of these two models best fits these data?</p>
</list-item>
<list-item>
<p>4) For the liver tumor response data, which model curve fits the resulting dose-response data the best? What are the final resulting BMD and BMDL estimates from this model?</p>
</list-item>
<list-item>
<p>5) In comparing between the intestinal vs. liver datasets, which tissue is estimated to show tumor responses at a lower exposure dose?</p>
</list-item>
</list>
</p>
<p>This module first provides a high-level introduction to BMD modeling, and then guides trainees through the process of downloading/loading required packages and example data used in this exercise. These data are then viewed, such that trainees can see the four different tissue sites evaluated for carcinogenicity in response to exposure (i.e., kidney, liver, intestinal, and stomach tissues) and also obtain information on the overall distributions of tissue-specific tumor incidence. Then, data are plotted in dose-response using standard scatter plots with exposure concentrations along the <italic>x</italic>-axis and tumor incidence along the <italic>y</italic>-axis. With these foundation plots generated, trainees are then guided through methods to fit various model curves to these dose-response data, spanning log-logistic, Weibull, and asymptotic regression models as core examples available through the <italic>drc</italic> package (<xref ref-type="bibr" rid="B77">Ritz et al., 2015</xref>). The best fitting curves are then identified through 1) visual inspection of curve fits, and 2) calculation of Akaike Information Criterion (AIC) values. These examples highlight the importance of evaluating model fit to ultimately determine which model should be used to derive final BMD estimates (<xref ref-type="fig" rid="F3">Figure 3</xref>). Trainees are lastly pointed to example dose-response publications that have addressed environmental health questions (<xref ref-type="bibr" rid="B59">Rager et al., 2017</xref>; <xref ref-type="bibr" rid="B3">Auerbach and Paules, 2018</xref>; <xref ref-type="bibr" rid="B88">Thompson et al., 2018</xref>; <xref ref-type="bibr" rid="B38">Johnson et al., 2020</xref>), as well as additional modeling tools and guidance documents surrounding dose-response assessments. The importance of this module is that BMD modeling represents a foundational topic in environmental health, where methods can be used to better understand which exposure concentrations/doses are required to elicit toxicity by evaluating trends in datasets.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Example model curves that are fit to liver tumor incidence data in this training module. This training module guides trainees through the plotting of dose-response data and the fitting of different types of models to describe dose-response trends in these data. The fit of each resulting curve is evaluated through visual inspection and evaluation of AIC values, and then the training module focuses on the model with the lowest AIC to derive benchmark dose estimates.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g003.tif"/>
</fig>
</sec>
<sec id="s3-2-2">
<title>3.2.2 Machine Learning and Predictive Modeling</title>
<p>The objective of this module is to provide an overview of machine learning (ML) approaches to evaluate high dimensional data relevant to environmental health applications. This module begins by introducing the need for predictive modeling, defining its use in the context of toxicology and environmental health, then establishing a working distinction between ML and traditional statistical methods. Recognizing the wide variety of machine learning methods currently available to researchers, this training module presents introductory-level information on two commonly employed methods, principal component analysis (PCA) and k-means clustering. Their use is illustrated on real data obtained from the National Toxicology Program&#x2019;s Integrated Chemical Environment (ICE) resource (<ext-link ext-link-type="uri" xlink:href="https://ice.ntp.niehs.nih.gov/">https://ice.ntp.niehs.nih.gov/</ext-link>). This module analyzes an example dataset of physicochemical property information for chemicals spanning two classes: per- and polyfluoroalkyl substances (PFAS) and statins. PFAS represent a ubiquitous and pervasive class of man-made industrial chemicals of high environmental relevance due to their persistence in the environment after contamination events (<xref ref-type="bibr" rid="B28">Fenton et al., 2021</xref>). Statins represent a class of lipid-lowering pharmaceuticals used for patients at risk of cardiovascular disease, and statins have been identified as present in water/wastewater effluent (<xref ref-type="bibr" rid="B86">Tete et al., 2020</xref>). The applied data example in this training module was designed to illustrate the concept of using ML methods to differentiate chemical class and &#x201c;predict&#x201d; (in this case, the group membership is known) chemical groupings that can inform a variety of environmental and toxicological applications. The following environmental health questions are addressed throughout this training module:<list list-type="simple">
<list-item>
<p>1) Can we differentiate between PFAS and statin chemical classes, when considering just the raw physicochemical property variables without applying machine learning techniques?</p>
</list-item>
<list-item>
<p>2) What are some of the physicochemical properties that seem to be driving chemical clustering patterns derived through k-means?</p>
</list-item>
<list-item>
<p>3) Upon reducing the data dimensionality through PCA, which physicochemical property contributes the most towards informing data variance captured in the primary principal component?</p>
</list-item>
<list-item>
<p>4) How do the data compare when physicochemical properties are reduced using PCA?</p>
</list-item>
<list-item>
<p>5) If we did not have information telling us which chemical belonged to which class, could we use PCA and k-means to accurately predict whether a chemical is a PFAS vs. statin?</p>
</list-item>
<list-item>
<p>6) What kinds of applications/endpoints can be better understood and/or predicted, because of these derived chemical groupings?</p>
</list-item>
</list>
</p>
<p>This module first provides a high-level introduction to the topics of machine learning and predictive modeling, and then guides trainees through the process of downloading/loading required packages and example data used in this exercise. These data are then viewed by plotting chemicals along their native physicochemical scales (e.g., boiling point versus molecular weight) for all 144 different chemicals, colored according to the two classes of PFAS and statins. Visualizing these data through two bivariate plots demonstrates that there is signal in the data, but substantial overlap between classes for most properties, i.e., individual physicochemical properties may not clearly differentiate between chemical classes. This limitation substantiates the need to employ machine learning methods to better describe group-level trends in these data. K-means clustering is then performed across all physicochemical property data in their native scale and visualized using a heat map. Next, PCA is carried out across all physicochemical property data and visualized to illustrate the concept of dimensionality reduction. The code is provided to calculate results from this PCA, including eigenvalues, percent variance captured by each principal component, and loading scores for the input variables. Finally, PCA is combined with k-means to generate predictions of two chemical groupings that almost entirely capture real-world classifications (<xref ref-type="fig" rid="F4">Figure 4</xref>). Lastly, these methods are discussed in relation to additional applications, including the evaluation of other outcomes such as environmental fate and transport and disease outcome predictions. Trainees are provided additional resources including recent example studies that incorporate machine learning to address environmental health questions (<xref ref-type="bibr" rid="B89">To et al., 2019</xref>; <xref ref-type="bibr" rid="B16">Clark et al., 2021</xref>; <xref ref-type="bibr" rid="B33">Green et al., 2021</xref>; <xref ref-type="bibr" rid="B56">Odenkirk et al., 2021</xref>; <xref ref-type="bibr" rid="B76">Ring et al., 2021</xref>). The importance of this module is that it provides a helpful introduction to foundational ML concepts, and upon receiving this training, participants should be positioned to apply these methods to make predictions within their own high dimensional datasets.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Example results and visualizations produced through this machine learning and predictive modeling activity, based on an example dataset of 144 PFAS and statins. <bold>(A)</bold> Principal components were derived to capture the majority of variance amongst physicochemical properties across PFAS and statins, using PCA methods. <bold>(B)</bold> Chemical classes were predicted using k-means clustering across PCA-reduced components, demonstrating that the predicted chemical classes were almost identical to the actual chemical classes [as shown in <bold>(A)</bold>].</p>
</caption>
<graphic xlink:href="ftox-04-893924-g004.tif"/>
</fig>
</sec>
<sec id="s3-2-3">
<title>3.2.3 Mixtures Analyses</title>
<p>The objective of this module is to provide an overview of chemical composition signatures and toxicological responses that can be evaluated to inform whether complex mixtures are &#x201c;sufficiently similar.&#x201d; Results from these analyses, referred to as sufficient similarity analyses, can be used to inform data extrapolation from a data-rich mixture to a data-poor mixture during a chemical safety/risk assessment, to adequately protect human health. In this example, data are re-analyzed from a study evaluating the chemical composition and toxicological effects of <italic>Ginkgo biloba</italic> extract, a common dietary supplement ingredient that is commercially available in the U.S. (<xref ref-type="bibr" rid="B10">Catlin et al., 2018</xref>). Here, 29 different sample lots of <italic>G. biloba</italic> extract were collected from several suppliers and analyzed. The chemical components of these sample extracts were evaluated using targeted methods, and associated toxicity was evaluated using gene-specific <italic>in vitro</italic> response assays. These data are leveraged in this training module to inform which of the <italic>G. biloba</italic> samples are sufficiently similar (and thus could use the same toxicological data for risk evaluation), and which are different (and thus would require additional testing). Several questions are posed throughout this module, including:<list list-type="simple">
<list-item>
<p>1) When viewing the variability between chemical profiles, how many groupings of potentially &#x201c;sufficiently similar&#x201d; <italic>G. biloba</italic> samples do you see?</p>
</list-item>
<list-item>
<p>2) Which chemicals do you think are important in differentiating between the different <italic>G. biloba</italic> samples?</p>
</list-item>
<list-item>
<p>3) When viewing the variability between toxicity profiles, how many groupings of potentially &#x201c;sufficiently similar&#x201d; <italic>G. biloba</italic> samples do you see?</p>
</list-item>
<list-item>
<p>4) Were similar chemical groups identified when looking at just the chemistry vs. just the toxicity? How could this impact regulatory decisions?</p>
</list-item>
</list>
</p>
<p>This module specifically guides trainees through the loading of required packages and data, and then carries out an example sufficient similarity analysis first using the chemistry data. Trainees are guided through data processing and scaling, leading to two different grouping and visualization approaches: 1) PCA and associated scatter plot, and 2) hierarchical clustering and associated heat map visualization. Results are used to inform which <italic>G. biloba</italic> extracts display similar chemical composition profiles and which do not. Participants are also guided through the evaluation of potential outlier samples, gauging whether these impact overall data distributions. Similar methods are then used to evaluate the toxicological response data. This analysis concludes with a side-by-side comparison of the sample groupings that result when considering chemical composition vs. toxicity profile data (<xref ref-type="fig" rid="F5">Figure 5</xref>), highlighting the importance of considering both data streams when determining sufficient similarity in the evaluation of complex mixtures. Trainees are then provided additional resources and references for further information on sufficient similarity analyses in environmental health research (<xref ref-type="bibr" rid="B72">Rice et al., 2009</xref>; <xref ref-type="bibr" rid="B10">Catlin et al., 2018</xref>; <xref ref-type="bibr" rid="B81">Ryan et al., 2019</xref>; <xref ref-type="bibr" rid="B19">Collins et al., 2020</xref>). The importance of this module stems from real-world exposures to complex mixtures that often have incomplete toxicity data, which necessitate training to determine when data from a reference mixture can be extrapolated to a mixture-of-concern for risk evaluation.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>PCA plots used to inform Ginkgo Biloba extract (GbE) groupings in mixtures-based sufficient similarity analyses. <bold>(A)</bold> This training module first guides participants through the derivation of chemical groups within GbE produced when reviewing only chemical composition data. <bold>(B)</bold> Then, chemical groups are derived by reviewing <italic>in vitro</italic> toxicity response profiles associated with GbE exposures. These grouping results are then compared to highlight that important patterns may be missed when evaluating just chemistry or just toxicity response profiles in a mixtures-based sufficient similarity analysis. Groups are derived across these examples using PCA, representing a very common data reduction/visualization method used to explain the variance across high dimensional datasets.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g005.tif"/>
</fig>
</sec>
<sec id="s3-2-4">
<title>3.2.4 -Omics Analyses and Systems Biology</title>
<p>The objective of this module is to provide an overview of the -omics field and its relation to environmental health research, highlighting transcriptomics as an important -omics endpoint to analyze as a scripted example. The field of -omics initiated from genome-wide information obtained through the Human Genome Project, and has since then expanded to include -omic endpoints spanning the genome, epigenome, transcriptome, proteome, metabolome, microbiome, and the exposome (<xref ref-type="bibr" rid="B15">Cho and Blaser, 2012</xref>; <xref ref-type="bibr" rid="B97">Wild, 2012</xref>; <xref ref-type="bibr" rid="B66">Rager and Fry, 2013</xref>; <xref ref-type="bibr" rid="B18">Clark and Rager, 2020</xref>). Stressors within the environment have the potential to alter -omic signatures, impacting downstream biological processes, cellular function, tissue phenotypes, and overall health (<xref ref-type="bibr" rid="B66">Rager and Fry, 2013</xref>; <xref ref-type="bibr" rid="B18">Clark and Rager, 2020</xref>; <xref ref-type="bibr" rid="B62">Rager et al., 2020</xref>). When interpreting the potential consequences of -omic alterations, it is often helpful to place findings into the context of systems biology. In these systems-level analyses, molecules can be overlaid onto molecular networks to uncover biological pathways and cellular functions that are altered under the condition being tested (<xref ref-type="bibr" rid="B66">Rager and Fry, 2013</xref>; <xref ref-type="bibr" rid="B50">Meisner and Reif, 2015</xref>). This training module provides an overview of these strategies, using an example transcriptomics dataset acquired from lung tissues of mice exposed to biomass burn conditions indicative of the potential wildfire exposure scenarios (<xref ref-type="bibr" rid="B40">Kim et al., 2018</xref>; <xref ref-type="bibr" rid="B64">Rager et al., 2021</xref>). Several questions are posed through this module, including:<list list-type="simple">
<list-item>
<p>1) What two input data files are commonly needed in the analysis of -omics (e.g., transcriptomics) data?</p>
</list-item>
<list-item>
<p>2) When preparing transcriptomics data for statistical analyses, what are common data filtering steps that are completed during the data QA/QC process?</p>
</list-item>
<list-item>
<p>3) How many genes showed significant differential expression in the mouse lung associated with flaming pine needles, smoldering pine needles, and lipopolysaccharide (LPS)?</p>
</list-item>
<list-item>
<p>4) What biological pathways are disrupted in association with flaming/smoldering pine needles exposure in the lung, identified through systems level analyses?</p>
</list-item>
</list>
</p>
<p>This training module specifically guides users through the loading, viewing, and formatting of the example transcriptomics datasets and associated metadata. Methods to carry out QA/QC of the transcriptomics data are then detailed, including background filtering, sample filtering, and identification of potential sample outliers. Data are adjusted for potential sources of heterogeneity, including mixed cell population distributions that are commonly present when analyzing bulk tissue samples. Statistical models are then designed and implemented to identify genes that were significantly differentially expressed by the evaluated biomass burn scenarios, as enabled through the commonly implemented DESeq2 statistical pipeline (<xref ref-type="bibr" rid="B44">Love et al., 2014</xref>). We find that exposure to both flaming and smoldering of pine needles caused substantial disruptions in gene expression profiles. LPS serves as a positive control for inflammation and produced the greatest transcriptomic response. Gene expression alterations are then summarized <italic>via</italic> visualizations using MA and volcano plots (<xref ref-type="fig" rid="F6">Figure 6</xref>). Resulting lists of differentially expressed genes are lastly evaluated in the context of systems biology, through pathway enrichment analysis based off relationships to KEGG pathways (<xref ref-type="bibr" rid="B39">KEGG, 2021</xref>) using gene set analysis enabled through the PIANO package (<xref ref-type="bibr" rid="B91">Varemo et al., 2013</xref>). We find that pathways involved in cardiopulmonary function, carcinogenesis, and hormone signaling were altered in response to these wildfire-relevant exposure scenarios. Trainees are lastly pointed to additional resources, including further information on -omics and systems biology, as well as additional research examples that have evaluated -omic alterations occurring in relation to the environment and involved in disease (<xref ref-type="bibr" rid="B85">Smeester et al., 2011</xref>; <xref ref-type="bibr" rid="B45">Lu et al., 2014</xref>; <xref ref-type="bibr" rid="B67">Rager et al., 2016</xref>; <xref ref-type="bibr" rid="B13">Chappell and Rager, 2017</xref>; <xref ref-type="bibr" rid="B4">Balik-Meisner et al., 2018</xref>; <xref ref-type="bibr" rid="B14">Chappell et al., 2019</xref>; <xref ref-type="bibr" rid="B47">Manuck et al., 2021b</xref>; <xref ref-type="bibr" rid="B12">Chang et al., 2021</xref>). The importance of this module lies in the training of systems biology concepts and analysis of -omics data, including RNA sequencing data, which are becoming increasingly standard molecular endpoints used in the evaluation of exposure-induced biological responses and disease etiologies.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Example <bold>(A)</bold> MA plot and <bold>(B)</bold> volcano plot illustrating changes to the transcriptome occurring from exposure to the wildfire-relevant exposure condition of flaming pine needles, identified through this training module. Here, each individual dot represents a gene that was queried for <italic>via</italic> transcriptome technologies, color-coded according to level of significance (multiple test corrected <italic>p</italic>-values) in association with exposure vs. control conditions within the mouse lung. Grey dots indicate genes that were not significant (<italic>p</italic> &#x3e; 0.10), and colored dots indicate genes that were significant (<italic>p</italic> &#x3c; 0.10). Dots of significant genes were further colored according to fold change (ratios of average exposed/unexposed samples), with red indicating positive fold change values (i.e., exposure-associated increased expression) and blue indicating negative fold change values (i.e., exposure-associated decreased expression) for the MA plot. For the volcano plot, colors indicate different filters that were implemented to identify levels of gene expression changes. Expression levels, fold change, and <italic>p</italic>-values are used to visualize the distribution of these statistical results produced from this example -omics analysis.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g006.tif"/>
</fig>
</sec>
<sec id="s3-2-5">
<title>3.2.5 Toxicokinetic Modeling</title>
<p>The objective of this module is to provide an overview of the basics of toxicokinetic (TK) modeling and how this type of modeling can be used in the high-throughput setting for environmental health research applications. TK modeling refers to the evaluation of the uptake and disposition of a chemical in the body. In this activity, the capabilities of the high-throughput TK modeling package, &#x201c;httk,&#x201d; are demonstrated on a suite of environmentally relevant chemicals. The httk R package implements high-throughput TK modeling, including a generic physiologically based toxicokinetic model, as well as chemical-specific parameters needed to solve the model for hundreds of chemicals (<xref ref-type="bibr" rid="B58">Pearce et al., 2017</xref>). Several questions are posed through this module, including:<list list-type="simple">
<list-item>
<p>1) What is the maximum concentration of bisphenol-A estimated to occur in human plasma, after one exposure dose of 1&#xa0;mg/kg/day?</p>
</list-item>
<list-item>
<p>2) What is the estimated range of benzo(a)pyrene concentrations in plasma that can occur in a human population, assuming single doses of 1&#xa0;mg/kg/day and steady-state conditions?</p>
</list-item>
<list-item>
<p>3) How many chemicals have available AC50 values to evaluate in the current ToxCast/Tox21 high-throughput screening database?</p>
</list-item>
<list-item>
<p>4) Based on httk modeling estimates, are chemicals with higher bioactivity exposure ratios always less toxic than chemicals with lower bioactivity exposure ratios?</p>
</list-item>
<list-item>
<p>5) How are chemical risk prioritization results different when using only toxicity information vs. only exposure information vs. bioactivity exposure ratios?</p>
</list-item>
</list>
</p>
<p>This module specifically guides trainees through a general introduction to TK, TK modeling, and the types of TK modeling that can be employed to understand how chemicals travel throughout the body. The model provides scripted examples of TK modeling, starting with the estimation of plasma concentrations over time for a human exposed to bisphenol-A. Then, population variability is considered using information from CDC National Health and Nutrition Examination Survey (NHANES) to inform a distribution of possible plasma concentrations resulting from daily exposure to benzo(a)pyrene. Then, an example high-throughput analysis is carried out over &#x223c;1,000 chemicals, in which population variability is captured to derive estimated quantile distributions of chemical plasma concentrations during steady-state conditions of 1&#xa0;mg/kg/day exposures. Trainees are then guided through the process of deriving administered equivalent doses that associate with concentrations eliciting toxicity derived through toxicity testing. Equivalent doses are specifically derived across &#x223c;1,000 chemicals that are estimated to elicit toxicity in humans, based on <italic>in vitro</italic> data, through &#x201c;reverse TK&#x201d; calculations. The <italic>in vitro</italic> dataset used in these derivations is the ToxCast high-throughput screening program. ToxCast activity concentrations that elicit 50% maximal bioactivity (AC<sub>50</sub>) are uploaded and organized as inputs, and the 10th percentile ToxCast AC<sub>50</sub> is calculated for each chemical and carried forward in the analysis as concentration estimates for potency. Bioactivity exposure ratios (BERs) are then calculated to place findings into the context of risk assessment. Here, previously generated exposure estimates that have been inferred from CDC NHANES urinary biomonitoring data are used as estimates of chemical exposures. These hazard and exposure estimates are then visualized (<xref ref-type="fig" rid="F7">Figure 7</xref>). The final BERs are calculated as the ratio of the lower-end hazard equivalent dose (for the most-sensitive 5% of the population) divided by the upper-end estimated exposure (here, the upper bound on the inferred population median exposure). The importance of these BERs in chemical prioritization efforts are lastly discussed in relation to environmental health research and corresponding government regulatory decisions. Trainees are provided additional resources and cases studies that have incorporated TK/httk to address environmental health issues (<xref ref-type="bibr" rid="B93">Wambaugh et al., 2015</xref>; <xref ref-type="bibr" rid="B75">Ring et al., 2017</xref>; <xref ref-type="bibr" rid="B41">Klaren et al., 2019</xref>; <xref ref-type="bibr" rid="B8">Breen et al., 2021</xref>; <xref ref-type="bibr" rid="B76">Ring et al., 2021</xref>). The importance of this module is that how chemicals travel throughout the body and elicit different toxicities based upon target organs significantly depends upon toxicokinetics, and being able to model these relationships is therefore critical towards understanding chemical-induced impacts throughout the body.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Chemicals that were analyzed using high-throughput toxicokinetic (httk) modeling in this training module. Shown here is an example visualization produced in this module illustrating how doses estimated to cause toxicity (&#x201c;Equiv. dose&#x201d;), that were produced through httk, compare against doses estimated as human exposures (&#x201c;Exposure&#x201d;). Chemicals are ranked according to bioactivity exposure ratios (BERs), indicating high potential risk (left) to low potential risk (right) to human health.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g007.tif"/>
</fig>
</sec>
<sec id="s3-2-6">
<title>3.2.6 Read-Across Toxicity Predictions</title>
<p>The objective of this module is to provide an overview of read-across methods to computationally predict chemical toxicity based on molecular structure information. This training module represents a timely topic, paralleling increased impetus for reducing reliance upon animal testing (<xref ref-type="bibr" rid="B51">NAS, 2007</xref>; <xref ref-type="bibr" rid="B26">EPA, U.S, 2021b</xref>). In this example, data are analyzed spanning approximately 7,000 chemicals that have known structure and acute toxicity data. The specific acute toxicity endpoint that is analyzed is LD<sub>50</sub>, reflecting the dose required to cause lethality in 50% of animals, collected through historical animal testing. These data have been previously summarized and analyzed (<xref ref-type="bibr" rid="B34">Helman et al., 2019a</xref>). In this activity, we aimed to estimate an LD<sub>50</sub> value for an example target chemical of interest that is commonly used in the production of industrial compounds, 1-chloro-4-nitrobenzene. To achieve this aim, we explore ways in which we can search for structurally similar chemicals that have LD<sub>50</sub> data already available. Data on these structurally similar chemicals, termed &#x201c;analogues,&#x201d; are then used to predict acute toxicity for the target chemical. The following questions are addressed throughout this module:<list list-type="simple">
<list-item>
<p>1) How many chemicals with acute toxicity data are structurally similar to 1-chloro-4-nitrobenzene?</p>
</list-item>
<list-item>
<p>2) What is the predicted LD<sub>50</sub> for 1-chloro-4-nitrobenzene, derived from read-across?</p>
</list-item>
<list-item>
<p>3) How different is the predicted vs. experimentally observed LD<sub>50</sub> for 1-chloro-4-nitrobenzene?</p>
</list-item>
</list>
</p>
<p>This module specifically guides trainees through the loading of required packages and example data, and then carries out an example read-across analysis specifically using the generalized read-across method (GenRA) (<xref ref-type="bibr" rid="B82">Shah et al., 2016</xref>). Trainees are guided through viewing the distribution of LD<sub>50</sub> values across all evaluated chemicals. Steps are then detailed to convert SMILES nomenclature into computed molecular fingerprint data. Using these molecular fingerprint data, the degree to which each chemical is structurally similar to another chemical is evaluated based on the Tanimoto similarity index. This structural similarity analysis yields an overall similarity matrix, containing all possible pairwise similarity values. Data are then filtered to focus on chemicals with Tanimoto similarity values &#x3e;0.75 to the target chemical, 1-chloro-4-nitrobenzene, resulting in a list of 11 chemical analogues that could then be used to predict toxicity for the target chemical (<xref ref-type="fig" rid="F8">Figure 8</xref>). Finally, generalized read-across was carried out by calculating a similarity-weighted activity score (<xref ref-type="bibr" rid="B82">Shah et al., 2016</xref>), using information from the 11 analogues to predict a LD<sub>50</sub> for 1-chloro-4-nitrobenzene. This <italic>in silico</italic> prediction was then compared to the experimentally observed LD<sub>50</sub> value for this chemical, which were very similar, highlighting the utility of read-across models to inform and predict toxicity for chemicals lacking data. The importance of this module is that predicting chemical-induced toxicity using entirely <italic>in silico</italic> approaches represents a highly efficient skillset that scientists can leverage to better understand chemical-toxicity relationships and predict which chemicals may induce harm to public health.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Overall schematic summarizing the steps employed in this example read-across analysis to predict chemical toxicity. This training module guides trainees through the generation of chemical structure fingerprint data and use of these data to identify analogues that can be used to predict toxicity for chemicals lacking data. This example uses chemicals with acute toxicity data (LD<sub>50</sub> values) to predict an example target chemical&#x2019;s acute toxicity that is structurally similar.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g008.tif"/>
</fig>
</sec>
</sec>
<sec id="s3-3">
<title>3.3 Environmental Health Database Mining</title>
<p>This series of TAME Toolkit training modules covers introductory-level approaches to mining and analyzing data that can be accessed through publicly available environmental health databases. Modules span topics of mining the Comparative Toxicogenomics Database (CTD) (module 3.1), Gene Expression Omnibus (GEO) (3.2), and database integration across Air Quality, Mortality, and Environmental Justice data (3.3). These training modules also include applications-based environmental health questions and are described below.</p>
<sec id="s3-3-1">
<title>3.3.1 Comparative Toxicogenomics Database</title>
<p>The objective of this module is to provide an exercise on organizing and analyzing chemical-gene lists aggregated through the Comparative Toxicogenomics Database (CTD) (<xref ref-type="bibr" rid="B23">CTD, 2021</xref>; <xref ref-type="bibr" rid="B24">Davis et al., 2021</xref>). Data were specifically pulled for published chemical-gene relationships mapping to the example environmental contaminant, arsenic. The following environmental health questions were addressed through this training module:<list list-type="simple">
<list-item>
<p>1) Which genes show altered expression in response to arsenic exposure?</p>
</list-item>
<list-item>
<p>2) Of the genes showing altered expression, which may be under epigenetic control?</p>
</list-item>
</list>
</p>
<p>This module specifically guides trainees through steps used to query CTD, including the specific selections used in this training dataset to organize chemical-gene interaction data for arsenic. These data are then uploaded into the training module R environment and used as an example for trainees to learn how to view file content and overall dimensions. Then data are filtered to include chemical-gene interactions that map specifically to changes in expression levels, yielding a list of genes that show arsenic-associated expression changes compiled from published literature. Data are additionally filtered using a different approach to yield genes that also show arsenic-associated gene methylation changes. These gene lists are then compared to result in the final elucidation of arsenic-altered genes that have published evidence for epigenetic modifications. Resulting genes represent critical mediators of inflammation and oxidative stress, among other important cellular processes. A visualization of these gene list comparison results is also scripted for using Venn diagram illustrations (<xref ref-type="fig" rid="F9">Figure 9</xref>). Trainees are then provided additional resources, including reference to additional case examples that leveraged data from CTD to identify new mechanisms of environmental exposure-induced disease (<xref ref-type="bibr" rid="B2">Ahir et al., 2013</xref>), fill gaps on data poor chemicals to elucidate environmental influences on disease pathways (<xref ref-type="bibr" rid="B42">Kosnik et al., 2019</xref>), and derive new chemical risk values for prioritizing links between environmental factors, genetic variants, and human diseases (<xref ref-type="bibr" rid="B43">Kosnik and Reif, 2019</xref>). Together, this training module serves as an applications-based example to learn basic data manipulation, filtering, and organization steps in R, while highlighting the utility of CTD to identify novel genomic/epigenomic relationships to environmental exposures. The importance of this module is that analyzing data within CTD represents a powerful skillset within the environmental health field, which can be leveraged to improve the understanding of environmental influences on disease outcomes.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>CTD findings from this training module highlight genes that have shown differential CpG methylation (left) and differential expression (right) in association with arsenic exposure. Results are visualized here using an example Venn diagram, highlighting a group of 315 genes with altered expression that may be influenced <italic>via</italic> epigenetic regulators through CpG methylation alterations.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g009.tif"/>
</fig>
</sec>
<sec id="s3-3-2">
<title>3.3.2 Gene Expression Omnibus</title>
<p>The objective of this module is to provide an overview of pulling, organizing, visualizing, and analyzing -omics data from the GEO database (<xref ref-type="bibr" rid="B53">NCBI, 2021</xref>). Data were specifically pulled from an example GEO dataset (accession number GSE42394) representing gene expression data originally used in a publication evaluating the genomic effects of formaldehyde inhalation exposure in the rat (<xref ref-type="bibr" rid="B61">Rager et al., 2014b</xref>). The following environmental health questions were addressed through this training module:<list list-type="simple">
<list-item>
<p>1) What kind of molecular identifiers are commonly used in microarray-based -omics technologies?</p>
</list-item>
<list-item>
<p>2) How can we convert platform-specific molecular identifiers used in -omics study designs to gene-level information?</p>
</list-item>
<list-item>
<p>3) Why do we often scale gene expression signatures prior to heat map visualizations?</p>
</list-item>
<list-item>
<p>4) What genes are altered in expression by formaldehyde inhalation exposure?</p>
</list-item>
<list-item>
<p>5) What are the potential biological consequences of these gene-level perturbations?</p>
</list-item>
</list>
</p>
<p>This module specifically guides trainees through the loading of required packages and data, including the manual upload of GEO data as well as the automated upload of data leveraging the GEO query package. Data are then further organized for downstream analyses. Trainees are then provided an overview of the types of molecular identifiers used in this example dataset, originally centered around microarray-based probeset identifiers. To increase interpretability of analysis findings, methods to merge platform-specific identifiers with gene-level annotation information are carried out. Example visualizations are then produced, including boxplots to evaluate the overall distribution of expression data across samples, as well as heat map visualizations that compare unscaled versus scaled gene expression values to emphasize the utility of scaled values for improved visualization of patterns between samples (<xref ref-type="fig" rid="F10">Figure 10</xref>). Statistical analyses are then included to identify which genes are the most significantly altered in expression upon exposure to formaldehyde. The gene identified with the most significantly increased expression in the rat nose is olfactory receptor 633 (<italic>Olr633</italic>), demonstrating that formaldehyde inhalation exposure induced olfactory-related signaling. Together, this training module serves as an important example on how scientists can efficiently leverage existing genome-wide datasets to address new environmental health questions. Trainees are also pointed to previous publications applying these methods to existing GEO datasets that address additional environmental health questions (<xref ref-type="bibr" rid="B65">Rager and Fry, 2012</xref>; <xref ref-type="bibr" rid="B68">Rager et al., 2019</xref>). The importance of this module is that online -omics databases, such as GEO, represent robust resources that can be mined to better understand mechanisms of disease and biological responses to insults, and becoming familiar with such resources will expand data reusability and interpretation in future environmental health studies.2</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Heat map visualizations of gene expression data that are produced as examples within the GEO training module. This training module guides trainees through visualizing normalized gene expression data and highlights the differences between plotting <bold>(A)</bold> unscaled versus <bold>(B)</bold> scaled values. This example shows the utility of scaling data prior to visualizations, allowing for improved visualizations of patterns between samples.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g010.tif"/>
</fig>
</sec>
<sec id="s3-3-3">
<title>3.3.3 Database Integration: Air Quality, Mortality, and Environmental Justice Data</title>
<p>The objective of this module is to provide an example analysis based on the integration of data across multiple environmental health databases. Specifically, air quality monitoring data from the U.S. EPA&#x2019;s Air Quality System (AQS) (<xref ref-type="bibr" rid="B25">EPA, U.S, 2021a</xref>) were analyzed, focusing on the 2016 EPA Monitoring Data Annual Average database. These data included average measures of particles &#x2264;2.5&#xa0;&#x3bc;m in diameter (PM<sub>2.5</sub>), nitrogen dioxide (NO<sub>2</sub>), and sulfur dioxide (SO<sub>2</sub>). Health outcome data were also analyzed, specifically from the Center for Disease Control (CDC) Wide-ranging ONline Data for Epidemiologic Research (WONDER) database (<xref ref-type="bibr" rid="B11">CDC, 2021</xref>). These data included the 2016 all-cause mortality rates. Population-level variables were additionally analyzed, including race, and included in the statistical modeling as well as the evaluation of population-level information that can be used to examine Environmental Justice issues. All data were pulled and summarized at the county-level across the entire U.S. for the year 2016 (<xref ref-type="bibr" rid="B71">Remington et al., 2015</xref>; <xref ref-type="bibr" rid="B90">UWPHI, 2021</xref>). The following environmental health questions were addressed through this training module:<list list-type="simple">
<list-item>
<p>1) What areas of the U.S. are most heavily monitored for air quality?</p>
</list-item>
<list-item>
<p>2) Is there an association between long-term, ambient PM2.5 concentrations and mortality at the county level?</p>
</list-item>
<list-item>
<p>3) What is the difference when running crude statistical models vs. statistical models that adjust for potential confounding, when evaluating the relationship between PM<sub>2.5</sub> and mortality?</p>
</list-item>
<list-item>
<p>4) Do observed associations differ when comparing between counties with a higher vs. lower percentage of African-Americans which can indicate Environmental Justice concerns?</p>
</list-item>
</list>
</p>
<p>This module specifically guides trainees through an explanation of how the data were downloaded and organized, and then details the loading of required packages and datasets. Then, this module provides code for visualizing county-level air pollution measures obtained through U.S. EPA monitoring stations throughout the U.S. Air pollution measures include PM<sub>2.5</sub>, NO<sub>2</sub>, and SO<sub>2</sub>, and are visualized here as the yearly average (<xref ref-type="fig" rid="F11">Figure 11A</xref>). Air pollution concentrations are then evaluated for potential relationship to the health outcome, mortality (<xref ref-type="fig" rid="F11">Figure 11B</xref>). Specifically, age adjusted mortality rates are organized and associated with PM<sub>2.5</sub> concentrations through linear regression modeling. Crude (univariate) statistical models are first provided that do not take into account the influence of potential confounders. Then, statistical models are used that adjust for potential county-level confounders, including adult smoking, obesity, food environment indicators, physical activity, employment status, rural vs. urban living percentages, sex, ethnicity, and race. Results from these models point to the preliminary finding that PM<sub>2.5</sub> is associated with elevated county-level mortality rates. Previous studies have shown that minority populations reside closer to air pollution sources and as a result are exposed to poorer air quality. This is also seen in these data, with measured air quality differing by percent African-American race in each county. Race is then evaluated further in this analysis as a potential differentiating factor in the models. Here, data distributions are pulled for counties with the highest percentage of African-Americans (top 25%) as well as those with the lowest percentage of African-Americans (bottom 25%). Models associating PM<sub>2.5</sub> with all-cause mortality rates are then re-run in these groups and the PM<sub>2.5</sub>-mortality associations are compared. Counties with the highest percentages of African-American race had a significant association with mortality, with magnitudes substantially greater than counties with the lowest percentages of African-American race. This result corresponds with known Environmental Justice concerns, and demonstrates how even a cross-sectional, ecological analysis can highlight differences in environmental health risks. The importance of this module is that it demonstrates ways to integrate disparate health and environmental exposure databases in order to study key questions in environmental public health, including examinations of important Environmental Justice issues.</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Example visualizations of <bold>(A)</bold> air quality data, highlighting the 2016 annual PM<sub>2.5</sub> concentrations across U.S. counties, and <bold>(B)</bold> potential relationships against mortality rates. Code that supports the generation of these visualizations and example statistics evaluating trends between air pollution and mortality rates, and analyses including potential confounders, is described within this training module.</p>
</caption>
<graphic xlink:href="ftox-04-893924-g011.tif"/>
</fig>
</sec>
<sec id="s3-3-4">
<title>3.3.4 Additional Resources</title>
<p>A final module is included that lists additional resources to aid in the continued training of users on data management and analysis strategies. We specifically include online websites and other training resources that we have found to be useful towards programming and data analysis approaches. These resources are sorted into the following four categories: 1) R programming resources; 2) R packages resources; 3) community discussions on R and R packages; 4) R interfaces; and 5) data science and statistical analysis resources.</p>
</sec>
</sec>
</sec>
<sec id="s5">
<title>4 Discussion and Conclusion</title>
<p>Together, this TAME Toolkit aims to serve as a helpful resource to promote trainee-driven data generation, management, and analysis methods to address the growing demands of 21st century environmental health concerns. Training modules are provided to serve as timely data analysis examples, all describing methods used to extract meaningful results to inform environmental health research applications. R was selected as the example coding platform, leveraging R Markdown and Bookdown formatting; though we recognize that additional training across other computing platforms could expand trainee data analysis skills and capabilities. The training modules are not designed as an exhaustive list of all resources and techniques available to analyze data relevant to environmental health. Rather, these modules highlight examples of methods and databases that can be leveraged in this research field, such that trainees can effectively navigate their way through each training lesson and translate methods learned to future questions. Modules were designed as a starting point for a data analysis technique, where additional resources are provided for further learning opportunities and technical guidance. The content within each module was selected to highlight important methods that can enhance environmental health studies, and thus, these modules collectively provide a cohesive set of skills that participants can leverage within their current research studies, real-world applications, and/or future job marketability, depending on their career stage. In conclusion, this resource serves as a unique training opportunity for future data analysts to learn timely data science and analysis methodologies in an applications-driven manner relevant to environment health research.</p>
</sec>
</body>
<back>
<sec id="s6">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article and online through the TAME Toolkit, available at: <ext-link ext-link-type="uri" xlink:href="https://uncsrp.github.io/Data-Analysis-Training-Modules/">https://uncsrp.github.io/Data-Analysis-Training-Modules/</ext-link>, with underlying code and datasets available in the parent UNC-SRP GitHub website (<ext-link ext-link-type="uri" xlink:href="https://github.com/UNCSRP">https://github.com/UNCSRP</ext-link>).</p>
</sec>
<sec id="s7">
<title>Author Contributions</title>
<p>All authors contributed to the development and QC of the scripted activities, as detailed herein. JR coordinated the overall organization of the training modules and descriptive manuscript text, with contributions and content review from all study coauthors.</p>
</sec>
<sec id="s8">
<title>Funding</title>
<p>This study was supported by the National Institutes of Health (NIH) from the National Institute of Environmental Health Sciences (NIEHS), including grant funds (P42ES031007, T32ES007126, and UH3OD023348) and in part by the Intramural Research Program of the NIH, NIEHS, Intramural Research project ZIA ES103316-05. Support was additionally provided through the Institute for Environmental Health Solutions at the University of North Carolina Gillings School of Global Public Health.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The handling editor ST declared a past collaboration with the author JER.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>The research described in this manuscript has been reviewed by the Center for Public Health and Environmental Assessment, U.S. EPA, and the National Institute of Environmental Health Sciences, NIH, and approved for publication. Approval does not signify that contents necessarily reflect the views and policies of the agency, nor does the mention of trade names or commercial products constitute endorsement or recommendation for use. The authors would like to thank Dr. Richard Judson (U.S. EPA), Dr. Jason Sacks (U.S. EPA), Dr. Kristin Eccles (NIH), and Dr. Jui-Hua Hsieh (NIH) for providing internal technical reviews of this manuscript.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Adair</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Braun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cohn</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Dubernet</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Towards Data Science</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://towardsdatascience.com/">https://towardsdatascience.com/</ext-link>
</comment>(<comment>Accessed Nov 15, 2021)</comment>. </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahir</surname>
<given-names>B. K.</given-names>
</name>
<name>
<surname>Sanders</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Systems Biology and Birth Defects Prevention: Blockade of the Glucocorticoid Receptor Prevents Arsenic-Induced Birth Defects</article-title>. <source>Environ. Health Perspect.</source> <volume>121</volume> (<issue>3</issue>), <fpage>332</fpage>&#x2013;<lpage>338</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.1205659</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Auerbach</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Paules</surname>
<given-names>R. S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Genomic Dose Response: Successes, Challenges, and Next Steps</article-title>. <source>Curr. Opin. Toxicol.</source> <volume>11-12</volume>, <fpage>84</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1016/j.cotox.2019.04.002</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Balik-Meisner</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Truong</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Scholl</surname>
<given-names>E. H.</given-names>
</name>
<name>
<surname>La Du</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Tanguay</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Reif</surname>
<given-names>D. M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Elucidating Gene-By-Environment Interactions Associated with Differential Susceptibility to Chemical Exposure</article-title>. <source>Environ. Health Perspect.</source> <volume>126</volume> (<issue>6</issue>), <fpage>067010</fpage>. <pub-id pub-id-type="doi">10.1289/EHP2662</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baumer</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Udwin</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>R Markdown</article-title>. <source>WIREs Comput. Stat.</source> <volume>7</volume> (<issue>3</issue>), <fpage>167</fpage>&#x2013;<lpage>177</lpage>. <pub-id pub-id-type="doi">10.1002/wics.1348</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="web">
<collab>Bioconductor</collab> (<year>2021</year>). <article-title>Bioconductor Open Source Software for Bioinformatics</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.bioconductor.org/">https://www.bioconductor.org/</ext-link>
</comment>(<comment>Accessed Aug 31, 2021)</comment>. </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boyles</surname>
<given-names>R. R.</given-names>
</name>
<name>
<surname>Thessen</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Waldrop</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Haendel</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ontology-Based Data Integration for Advancing Toxicological Knowledge</article-title>. <source>Curr. Opin. Toxicol.</source> <volume>16</volume>, <fpage>67</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1016/j.cotox.2019.05.005</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ring</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Kreutz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Goldsmith</surname>
<given-names>M.-R.</given-names>
</name>
<name>
<surname>Wambaugh</surname>
<given-names>J. F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>High-throughput PBTK Models for <italic>In Vitro</italic> to <italic>In Vivo</italic> Extrapolation</article-title>. <source>Expert Opin. Drug Metabolism Toxicol.</source> <volume>17</volume> (<issue>8</issue>), <fpage>903</fpage>&#x2013;<lpage>921</lpage>. <pub-id pub-id-type="doi">10.1080/17425255.2021.1935867</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carberry</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Turla</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Koval</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hartwell</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fry</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Chemical Mixtures in Household Environments: In Silico Predictions and <italic>In Vitro</italic> Testing of Potential Joint Action on PPAR&#x3b3; in Human Liver Cells</article-title>. <source>Toxics</source> <volume>10</volume> (<issue>5</issue>), <fpage>199</fpage>. <pub-id pub-id-type="doi">10.3390/toxics10050199</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Catlin</surname>
<given-names>N. R.</given-names>
</name>
<name>
<surname>Collins</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Auerbach</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Ferguson</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Harnly</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Gennings</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>How Similar Is Similar Enough? A Sufficient Similarity Case Study with Ginkgo Biloba Extract</article-title>. <source>Food Chem. Toxicol.</source> <volume>118</volume>, <fpage>328</fpage>&#x2013;<lpage>339</lpage>. <pub-id pub-id-type="doi">10.1016/j.fct.2018.05.013</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="web">
<collab>CDC</collab> (<year>2021</year>). <article-title>CDC About Underlying Cause of Death</article-title>. <comment>1999<italic>-2019</italic> [Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://wonder.cdc.gov/ucd-icd10.html">https://wonder.cdc.gov/ucd-icd10.html</ext-link> (Accessed Aug 1, 2021)</comment>. </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Tilton</surname>
<given-names>S. C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Linking Coregulated Gene Modules with Polycyclic Aromatic Hydrocarbon-Related Cancer Risk in the 3D Human Bronchial Epithelium</article-title>. <source>Chem. Res. Toxicol.</source> <volume>34</volume> (<issue>6</issue>), <fpage>1445</fpage>&#x2013;<lpage>1455</lpage>. <pub-id pub-id-type="doi">10.1021/acs.chemrestox.0c00333</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chappell</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Epigenetics in Chemical-Induced Genotoxic Carcinogenesis</article-title>. <source>Curr. Opin. Toxicol.</source> <volume>6</volume>, <fpage>10</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1016/j.cotox.2017.06.007</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chappell</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Wolf</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Babic</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>LeBlanc</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Ring</surname>
<given-names>C. L.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Comparison of Gene Expression Responses in the Small Intestine of Mice Following Exposure to 3 Carcinogens Using the S1500&#x2b; Gene Set Informs a Potential Common Adverse Outcome Pathway</article-title>. <source>Toxicol. Pathol.</source> <volume>47</volume> (<issue>7</issue>), <fpage>851</fpage>&#x2013;<lpage>864</lpage>. <pub-id pub-id-type="doi">10.1177/0192623319873882</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cho</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Blaser</surname>
<given-names>M. J.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>The Human Microbiome: at the Interface of Health and Disease</article-title>. <source>Nat. Rev. Genet.</source> <volume>13</volume> (<issue>4</issue>), <fpage>260</fpage>&#x2013;<lpage>270</lpage>. <pub-id pub-id-type="doi">10.1038/nrg3182</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Clark</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Avula</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Ring</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Eaves</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Howard</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Santos</surname>
<given-names>H. P.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Comparing the Predictivity of Human Placental Gene, microRNA, and CpG Methylation Signatures in Relation to Perinatal Outcomes</article-title>. <source>Toxicol. Sci.</source> <volume>183</volume>, <fpage>269</fpage>&#x2013;<lpage>284</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfab089</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Clark</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Bulka</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Smeester</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Santos</surname>
<given-names>H. P.</given-names>
</name>
<name>
<surname>O&#x2019;Shea</surname>
<given-names>T. M.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Associations between Placental CpG Methylation of Metastable Epialleles and Childhood Body Mass Index across Ages One, Two and Ten in the Extremely Low Gestational Age Newborns (ELGAN) Cohort</article-title>. <source>Epigenetics</source> <volume>14</volume> (<issue>11</issue>), <fpage>1102</fpage>&#x2013;<lpage>1111</lpage>. <pub-id pub-id-type="doi">10.1080/15592294.2019.1633865</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Clark</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Chapter 1 - Epigenetics: An Overview of CpG Methylation, Chromatin Remodeling, and Regulatory/Noncoding RNAs</article-title>,&#x201d; in <source>Environmental Epigenetics in Toxicology and Public Health</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<publisher-name>Elsevier</publisher-name>), <fpage>3</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1016/b978-0-12-819968-8.00001-9</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Collins</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Kerns</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Aillon</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Mueller</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Rider</surname>
<given-names>C. V.</given-names>
</name>
<name>
<surname>DeRose</surname>
<given-names>E. F.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Comparison of Phytochemical Composition of Ginkgo Biloba Extracts Using a Combination of Non-Targeted and Targeted Analytical Approaches</article-title>. <source>Anal. Bioanal. Chem.</source> <volume>412</volume> (<issue>25</issue>), <fpage>6789</fpage>&#x2013;<lpage>6809</lpage>. <pub-id pub-id-type="doi">10.1007/s00216-020-02839-7</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="web">
<collab>Coursera</collab> (<year>2021</year>). <article-title>Coursera R Courses</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.coursera.org/courses?query=r">https://www.coursera.org/courses?query&#x3d;r</ext-link> (Accessed Nov 15, 2021)</comment>. </citation>
</ref>
<ref id="B21">
<citation citation-type="web">
<collab>CRAN</collab> (<year>2021a</year>). <article-title>CRAN Contributed Packages</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/">https://cran.r-project.org/web/packages/</ext-link>
</comment>(<comment>Accessed Aug 31, 2021)</comment>. </citation>
</ref>
<ref id="B22">
<citation citation-type="web">
<collab>CRAN</collab> (<year>2021b</year>). <article-title>Tidyverse: Easily Install and Load the &#x27;Tidyverse</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/tidyverse/index.html">https://cran.r-project.org/web/packages/tidyverse/index.html</ext-link> (Accessed Sept 23, 2021)</comment>. </citation>
</ref>
<ref id="B23">
<citation citation-type="web">
<collab>CTD</collab> (<year>2021</year>). <article-title>Comparative Toxicogenomics Database (CTD): Illuminating How Chemicals Affect Human Health</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="http://ctdbase.org/">http://ctdbase.org/</ext-link>
</comment>(<comment>Accessed Aug 1, 2021)</comment>. </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Davis</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Grondin</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Johnson</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Sciaky</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wiegers</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wiegers</surname>
<given-names>T. C.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Comparative Toxicogenomics Database (CTD): Update 2021</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume> (<issue>D1</issue>), <fpage>D1138</fpage>&#x2013;<lpage>D1143</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa891</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="web">
<collab>EPA, U.S</collab> (<year>2021a</year>). <article-title>Air Data Pre-generated Data Files</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://aqs.epa.gov/aqsweb/airdata/download_files.html">https://aqs.epa.gov/aqsweb/airdata/download_files.html</ext-link> (Accessed Aug 1, 2021)</comment>. </citation>
</ref>
<ref id="B26">
<citation citation-type="web">
<collab>EPA, U.S</collab> (<year>2021b</year>). <article-title>EPA New Approach Methods Work Plan: Reducing Use of Animals in Chemical Testing</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.epa.gov/chemical-research/epa-new-approach-methods-work-plan-reducing-use-animals-chemical-testing">https://www.epa.gov/chemical-research/epa-new-approach-methods-work-plan-reducing-use-animals-chemical-testing</ext-link> (Accessed Aug 15, 2021)</comment>. </citation>
</ref>
<ref id="B27">
<citation citation-type="web">
<collab>EU</collab> (<year>2019</year>). <article-title>European Commission Ban on Animal Testing</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://ec.europa.eu/growth/sectors/cosmetics/animal-testing_en">https://ec.europa.eu/growth/sectors/cosmetics/animal-testing_en</ext-link> (Accessed Jan 3, 2020)</comment>. </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fenton</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Ducatman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Boobis</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>DeWitt</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Lau</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ng</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Per- and Polyfluoroalkyl Substance Toxicity and Human Health Review: Current State of Knowledge and Strategies for Informing Future Research</article-title>. <source>Environ. Toxicol. Chem.</source> <volume>40</volume> (<issue>3</issue>), <fpage>606</fpage>&#x2013;<lpage>630</lpage>. <pub-id pub-id-type="doi">10.1002/etc.4890</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Florance</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>NIH Training and Education for Biomedical Data Science</article-title>. <source>Inf. Educ. Healthc.</source>, <fpage>125</fpage>&#x2013;<lpage>133</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-53813-2_10</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Navasumrit</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Valiathan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Svensson</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Hogan</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>Activation of Inflammation/NF-&#x39a;b Signaling in Infants Born to Arsenic-Exposed Mothers</article-title>. <source>Plos Genet.</source> <volume>3</volume> (<issue>11</issue>), <fpage>e207</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pgen.0030207</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Brickey</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Ting</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Individuals with Increased Inflammatory Response to Ozone Demonstrate Muted Signaling of Immune Cell Trafficking Pathways</article-title>. <source>Respir. Res.</source> <volume>13</volume>, <fpage>89</fpage>. <pub-id pub-id-type="doi">10.1186/1465-9921-13-89</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="web">
<collab>Github</collab> (<year>2022</year>). <article-title>Github: Where the World Builds Software</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/about">https://github.com/about</ext-link> (Accessed Apr 26, 2022)</comment>. </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Green</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Mohlenkamp</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chaudhari</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Truong</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tanguay</surname>
<given-names>R. L.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Leveraging High-Throughput Screening Data, Deep Neural Networks, and Conditional Generative Adversarial Networks to Advance Predictive Toxicology</article-title>. <source>PLoS Comput. Biol.</source> <volume>17</volume> (<issue>7</issue>), <fpage>e1009135</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1009135</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Helman</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Patlewicz</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2019a</year>). <article-title>Transitioning the Generalised Read-Across Approach (GenRA) to Quantitative Predictions: A Case Study Using Acute Oral Toxicity Data</article-title>. <source>Comput. Toxicol.</source> <volume>12</volume>, <fpage>100097</fpage>. <pub-id pub-id-type="doi">10.1016/j.comtox.2019.100097</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Helman</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Edwards</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dunne</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Patlewicz</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2019b</year>). <article-title>Generalized Read-Across (GenRA): A Workflow Implemented into the EPA CompTox Chemicals Dashboard</article-title>. <source>ALTEX</source> <volume>36</volume> (<issue>3</issue>), <fpage>462</fpage>&#x2013;<lpage>465</lpage>. <pub-id pub-id-type="doi">10.14573/altex.1811292</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Holmgren</surname>
<given-names>S. D.</given-names>
</name>
<name>
<surname>Boyles</surname>
<given-names>R. R.</given-names>
</name>
<name>
<surname>Cronk</surname>
<given-names>R. D.</given-names>
</name>
<name>
<surname>Duncan</surname>
<given-names>C. G.</given-names>
</name>
<name>
<surname>Kwok</surname>
<given-names>R. K.</given-names>
</name>
<name>
<surname>Lunn</surname>
<given-names>R. M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Catalyzing Knowledge-Driven Discovery in Environmental Health Sciences through a Community-Driven Harmonized Language</article-title>. <source>Int. J. Environ Res Public Health</source> <volume>18</volume> (<issue>17</issue>), <fpage>8985</fpage>. <pub-id pub-id-type="doi">10.3390/ijerph18178985</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jaspers</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Flescher</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L. C.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Respiratory Epithelial Cells Display Polarity in Their Release of the Chemokine IL-8 after Exposure to Ozone</article-title>. <source>Inflamm. Res.</source> <volume>46</volume> (<issue>Suppl. 2</issue>), <fpage>173</fpage>&#x2013;<lpage>174</lpage>. <pub-id pub-id-type="doi">10.1007/s000110050166</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Johnson</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Auerbach</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Costa</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A Rat Liver Transcriptomic Point of Departure Predicts a Prospective Liver or Non-liver Apical Point of Departure</article-title>. <source>Toxicol. Sci.</source> <volume>176</volume> (<issue>1</issue>), <fpage>86</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfaa062</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="web">
<collab>KEGG</collab> (<year>2021</year>). <article-title>KEGG PATHWAY Database: Wiring Diagrams of Molecular Interactions, Reactions and Relations</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.genome.jp/kegg/pathway.html">https://www.genome.jp/kegg/pathway.html</ext-link> (Accessed Dec 15, 2021)</comment>. </citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>Y. H.</given-names>
</name>
<name>
<surname>Warren</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Krantz</surname>
<given-names>Q. T.</given-names>
</name>
<name>
<surname>King</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jaskot</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Preston</surname>
<given-names>W. T.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Mutagenicity and Lung Toxicity of Smoldering vs. Flaming Emissions from Various Biomass Fuels: Implications for Health Effects from Wildland Fires</article-title>. <source>Environ. Health Perspect.</source> <volume>126</volume> (<issue>1</issue>), <fpage>017011</fpage>. <pub-id pub-id-type="doi">10.1289/EHP2200</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Klaren</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Ring</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Harris</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Thompson</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Borghoff</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sipes</surname>
<given-names>N. S.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Identifying Attributes That InfluenceIn Vitro-To-In VivoConcordance by ComparingIn VitroTox21 Bioactivity VersusIn VivoDrugMatrix Transcriptomic Responses Across 130 Chemicals</article-title>. <source>Toxicol. Sci.</source> <volume>167</volume> (<issue>1</issue>), <fpage>157</fpage>&#x2013;<lpage>171</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfy220</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kosnik</surname>
<given-names>M. B.</given-names>
</name>
<name>
<surname>Planchart</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Marvel</surname>
<given-names>S. W.</given-names>
</name>
<name>
<surname>Reif</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Mattingly</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Integration of Curated and High-Throughput Screening Data to Elucidate Environmental Influences on Disease Pathways</article-title>. <source>Comput. Toxicol.</source> <volume>12</volume>, <fpage>100094</fpage>. <pub-id pub-id-type="doi">10.1016/j.comtox.2019.100094</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kosnik</surname>
<given-names>M. B.</given-names>
</name>
<name>
<surname>Reif</surname>
<given-names>D. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Determination of Chemical-Disease Risk Values to Prioritize Connections between Environmental Factors, Genetic Variants, and Human Diseases</article-title>. <source>Toxicol. Appl. Pharmacol.</source> <volume>379</volume>, <fpage>114674</fpage>. <pub-id pub-id-type="doi">10.1016/j.taap.2019.114674</pub-id> </citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Love</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Huber</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Anders</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Moderated Estimation of Fold Change and Dispersion for RNA-Seq Data with DESeq2</article-title>. <source>Genome Biol.</source> <volume>15</volume> (<issue>12</issue>), <fpage>550</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-014-0550-8</pub-id> </citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Abo</surname>
<given-names>R. P.</given-names>
</name>
<name>
<surname>Schlieper</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Graffam</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Levine</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wishnok</surname>
<given-names>J. S.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Arsenic Exposure Perturbs the Gut Microbiome and its Metabolic Profile in Mice: an Integrated Metagenomics and Metabolomics Analysis</article-title>. <source>Environ. Health Perspect.</source> <volume>122</volume> (<issue>3</issue>), <fpage>284</fpage>&#x2013;<lpage>291</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.1307429</pub-id> </citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manuck</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Eaves</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Sheffield-Abdullah</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<year>2021a</year>). <article-title>Nitric Oxide-Related Gene and microRNA Expression in Peripheral Blood in Pregnancy Vary by Self-Reported Race</article-title>. <source>Epigenetics</source>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1080/15592294.2021.1957576</pub-id> </citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manuck</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ru</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Glover</surname>
<given-names>A. V.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
<etal/>
</person-group> (<year>2021b</year>). <article-title>Metabolites from Midtrimester Plasma of Pregnant Patients at High Risk for Preterm Birth</article-title>. <source>Am. J. Obstetrics Gynecol. MFM</source> <volume>3</volume> (<issue>4</issue>), <fpage>100393</fpage>. <pub-id pub-id-type="doi">10.1016/j.ajogmf.2021.100393</pub-id> </citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martin</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Ward-Caviness</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Dhingra</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zikry</surname>
<given-names>T. M.</given-names>
</name>
<name>
<surname>Galea</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wildman</surname>
<given-names>D. E.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Neighborhood Environment, Social Cohesion, and Epigenetic Aging</article-title>. <source>Aging</source> <volume>13</volume> (<issue>6</issue>), <fpage>7883</fpage>&#x2013;<lpage>7899</lpage>. <pub-id pub-id-type="doi">10.18632/aging.202814</pub-id> </citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marvel</surname>
<given-names>S. W.</given-names>
</name>
<name>
<surname>House</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Wheeler</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.-H.</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>F. A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>The COVID-19 Pandemic Vulnerability Index (PVI) Dashboard: Monitoring County-Level Vulnerability Using Visualization, Statistical Modeling, and Machine Learning</article-title>. <source>Environ. Health Perspect.</source> <volume>129</volume> (<issue>1</issue>), <fpage>017701</fpage>. <pub-id pub-id-type="doi">10.1289/EHP8690</pub-id> </citation>
</ref>
<ref id="B50">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Meisner</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Reif</surname>
<given-names>D. M.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Computational Methods Used in Systems Biology</article-title>,&#x201d; in <source>Chpt 5 of &#x27;Systems Biology in Toxicology and Environmental Health</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
</person-group>, <fpage>85</fpage>&#x2013;<lpage>115</lpage>. </citation>
</ref>
<ref id="B51">
<citation citation-type="book">
<collab>NAS</collab> (<year>2007</year>). &#x201c;<article-title>Toxicity Testing in the 21st Century: A Vision and A Strategy</article-title>,&#x201d; in <source>Committee on Toxicity Testing and Assessment of Environmental Agents</source> (<publisher-loc>Washington, DC</publisher-loc>: <publisher-name>National Research Council</publisher-name>). </citation>
</ref>
<ref id="B52">
<citation citation-type="book">
<collab>NAS</collab> (<year>2017</year>). &#x201c;<article-title>Using 21st Century Science to Improve Risk-Related Evaluations</article-title>,&#x201d; in <source>Committee on Incorporating 21st Century Science into Risk-Based Evaluations; Board on Environmental Studies and Toxicology; Division on Earth and Life Studies</source> (<publisher-loc>Washington, DCEngineering, and Medicine)</publisher-loc>: <publisher-name>National Academies of Sciences</publisher-name>). </citation>
</ref>
<ref id="B53">
<citation citation-type="web">
<collab>NCBI</collab> (<year>2021</year>). <article-title>Gene Expression Omnibus</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/geo/">https://www.ncbi.nlm.nih.gov/geo/</ext-link>
</comment>(<comment>Accessed Aug 1, 2021)</comment>. </citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nelms</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Karmaus</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Patlewicz</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An Evaluation of the Performance of Selected (Q)SARs/expert Systems for Predicting Acute Oral Toxicity</article-title>. <source>Comput. Toxicol.</source> <volume>16</volume>, <fpage>100135</fpage>. <pub-id pub-id-type="doi">10.1016/j.comtox.2020.100135</pub-id> </citation>
</ref>
<ref id="B55">
<citation citation-type="web">
<collab>NIH</collab> (<year>2022</year>). <article-title>Data Management &#x26; Sharing Policy Overview</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://sharing.nih.gov/data-management-and-sharing-policy/about-data-management-sharing-policy/data-management-and-sharing-policy-overview">https://sharing.nih.gov/data-management-and-sharing-policy/about-data-management-sharing-policy/data-management-and-sharing-policy-overview</ext-link> (Accessed May 23, 2022)</comment>. </citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Odenkirk</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Reif</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Baker</surname>
<given-names>E. S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Multiomic Big Data Analysis Challenges: Increasing Confidence in the Interpretation of Artificial Intelligence Assessments</article-title>. <source>Anal. Chem.</source> <volume>93</volume> (<issue>22</issue>), <fpage>7763</fpage>&#x2013;<lpage>7773</lpage>. <pub-id pub-id-type="doi">10.1021/acs.analchem.0c04850</pub-id> </citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Payton</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Clark</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Eaves</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Santos</surname>
<given-names>H. P.</given-names>
<suffix>Jr.</suffix>
</name>
<name>
<surname>Smeester</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Bangma</surname>
<given-names>J. T.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Placental Genomic and Epigenomic Signatures Associated with Infant Birth Weight Highlight Mechanisms Involved in Collagen and Growth Factor Signaling</article-title>. <source>Reprod. Toxicol.</source> <volume>96</volume>, <fpage>221</fpage>&#x2013;<lpage>230</lpage>. <pub-id pub-id-type="doi">10.1016/j.reprotox.2020.07.007</pub-id> </citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pearce</surname>
<given-names>R. G.</given-names>
</name>
<name>
<surname>Setzer</surname>
<given-names>R. W.</given-names>
</name>
<name>
<surname>Strope</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Sipes</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Wambaugh</surname>
<given-names>J. F.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Httk: R Package for High-Throughput Toxicokinetics</article-title>. <source>J. Stat. Soft.</source> <volume>79</volume> (<issue>4</issue>), <fpage>1</fpage>&#x2013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v079.i04</pub-id> </citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Auerbach</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Chappell</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Thompson</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Benchmark Dose Modeling Estimates of the Concentrations of Inorganic Arsenic That Induce Changes to the Neonatal Transcriptome, Proteome, and Epigenome in a Pregnancy Cohort</article-title>. <source>Chem. Res. Toxicol.</source> <volume>30</volume> (<issue>10</issue>), <fpage>1911</fpage>&#x2013;<lpage>1920</lpage>. <pub-id pub-id-type="doi">10.1021/acs.chemrestox.7b00221</pub-id> </citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Bailey</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Smeester</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Parker</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Laine</surname>
<given-names>J. E.</given-names>
</name>
<etal/>
</person-group> (<year>2014a</year>). <article-title>Prenatal Arsenic Exposure and the Epigenome: Altered microRNAs Associated with Innate and Adaptive Immune Signaling in Newborn Cord Blood</article-title>. <source>Environ. Mol. Mutagen.</source> <volume>55</volume> (<issue>3</issue>), <fpage>196</fpage>&#x2013;<lpage>208</lpage>. <pub-id pub-id-type="doi">10.1002/em.21842</pub-id> </citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Moeller</surname>
<given-names>B. C.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Kracko</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Doyle-Eisele</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Swenberg</surname>
<given-names>J. A.</given-names>
</name>
<etal/>
</person-group> (<year>2014b</year>). <article-title>Formaldehyde-associated Changes in microRNAs: Tissue and Temporal Specificity in the Rat Nose, White Blood Cells, and Bone Marrow</article-title>. <source>Toxicol. Sci.</source> <volume>138</volume> (<issue>1</issue>), <fpage>36</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kft267</pub-id> </citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Bangma</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Carberry</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chao</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Grossman</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Review of the Environmental Prenatal Exposome and its Relationship to Maternal and Fetal Health</article-title>. <source>Reprod. Toxicol.</source> <volume>98</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1016/j.reprotox.2020.02.004</pub-id> </citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Bauer</surname>
<given-names>R. N.</given-names>
</name>
<name>
<surname>M&#xfc;ller</surname>
<given-names>L. L.</given-names>
</name>
<name>
<surname>Smeester</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Carson</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Brighton</surname>
<given-names>L. E.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>DNA Methylation in Nasal Epithelial Cells from Smokers: Identification of ULBP3-Related Effects</article-title>. <source>Am. J. Physiology-Lung Cell. Mol. Physiology</source> <volume>305</volume> (<issue>6</issue>), <fpage>L432</fpage>&#x2013;<lpage>L438</lpage>. <pub-id pub-id-type="doi">10.1152/ajplung.00116.2013</pub-id> </citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Clark</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Eaves</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Avula</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Niehoff</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>Y. H.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Mixtures Modeling Identifies Chemical Inducers versus Repressors of Toxicity Associated with Wildfire Smoke</article-title>. <source>Sci. Total Environ.</source> <volume>775</volume>, <fpage>145759</fpage>. <pub-id pub-id-type="doi">10.1016/j.scitotenv.2021.145759</pub-id> </citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>The Aryl Hydrocarbon Receptor Pathway: a Key Component of the microRNA-Mediated AML Signalisome</article-title>. <source>Int. J. Environ Res Public Health</source> <volume>9</volume> (<issue>5</issue>), <fpage>1939</fpage>&#x2013;<lpage>1953</lpage>. <pub-id pub-id-type="doi">10.3390/ijerph9051939</pub-id> </citation>
</ref>
<ref id="B66">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Fry</surname>
<given-names>R. C.</given-names>
</name>
</person-group> (<year>2013</year>). <source>Systems Biology and Environmental Exposures. Chpt 4 of &#x2018;Network Biology&#x2019;</source>. <publisher-loc>Hauppauge, NY</publisher-loc>: <publisher-name>Nova Science Publishers, Inc.</publisher-name> (<comment>Accessed June 1, 2022</comment>) </citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Strynar</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>McMahen</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Richard</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Grulke</surname>
<given-names>C. M.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Linking High Resolution Mass Spectrometry Data with Exposure and Toxicity Forecasts to Advance High-Throughput Environmental Monitoring</article-title>. <source>Environ. Int.</source> <volume>88</volume>, <fpage>269</fpage>&#x2013;<lpage>280</lpage>. <pub-id pub-id-type="doi">10.1016/j.envint.2015.12.008</pub-id> </citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Suh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chappell</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Thompson</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Proctor</surname>
<given-names>D. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Review of Transcriptomic Responses to Hexavalent Chromium Exposure in Lung Cells Supports a Role of Epigenetic Mediators in Carcinogenesis</article-title>. <source>Toxicol. Lett.</source> <volume>305</volume>, <fpage>40</fpage>&#x2013;<lpage>50</lpage>. <pub-id pub-id-type="doi">10.1016/j.toxlet.2019.01.011</pub-id> </citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Tilley</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Tulenko</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Smeester</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ray</surname>
<given-names>P. D.</given-names>
</name>
<name>
<surname>Yosim</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Identification of Novel Gene Targets and Putative Regulators of Arsenic-Associated DNA Methylation in Human Urothelial Cells and Bladder Cancer</article-title>. <source>Chem. Res. Toxicol.</source> <volume>28</volume> (<issue>6</issue>), <fpage>1144</fpage>&#x2013;<lpage>1155</lpage>. <pub-id pub-id-type="doi">10.1021/tx500393y</pub-id> </citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rebuli</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Brocke</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Jaspers</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Impact of Inhaled Pollutants on Response to Viral Infection in Controlled Exposures</article-title>. <source>J. Allergy Clin. Immunol.</source> <volume>148</volume>, <fpage>1420</fpage>&#x2013;<lpage>1429</lpage>. <pub-id pub-id-type="doi">10.1016/j.jaci.2021.07.002</pub-id> </citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Remington</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Catlin</surname>
<given-names>B. B.</given-names>
</name>
<name>
<surname>Gennuso</surname>
<given-names>K. P.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>The County Health Rankings: Rationale and Methods</article-title>. <source>Popul. Health Metrics</source> <volume>13</volume>, <fpage>11</fpage>. <pub-id pub-id-type="doi">10.1186/s12963-015-0044-2</pub-id> </citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rice</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Teuschler</surname>
<given-names>L. K.</given-names>
</name>
<name>
<surname>Bull</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Simmons</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Feder</surname>
<given-names>P. I.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Evaluating the Similarity of Complex Drinking-Water Disinfection By-Product Mixtures: Overview of the Issues</article-title>. <source>J. Toxicol. Environ. Health, Part A</source> <volume>72</volume> (<issue>7</issue>), <fpage>429</fpage>&#x2013;<lpage>436</lpage>. <pub-id pub-id-type="doi">10.1080/15287390802608890</pub-id> </citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rider</surname>
<given-names>C. V.</given-names>
</name>
<name>
<surname>McHale</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Webster</surname>
<given-names>T. F.</given-names>
</name>
<name>
<surname>Lowe</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Goodson</surname>
<given-names>W. H.</given-names>
<suffix>3rd</suffix>
</name>
<name>
<surname>La Merrill</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Using the Key Characteristics of Carcinogens to Develop Research on Chemical Mixtures and Cancer</article-title>. <source>Environ. Health Perspect.</source> <volume>129</volume> (<issue>3</issue>), <fpage>35003</fpage>. <pub-id pub-id-type="doi">10.1289/EHP8525</pub-id> </citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ring</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Arnot</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Bennett</surname>
<given-names>D. H.</given-names>
</name>
<name>
<surname>Egeghy</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Fantke</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Consensus Modeling of Median Chemical Intake for the U.S. Population Based on Predictions of Exposure Pathways</article-title>. <source>Environ. Sci. Technol.</source> <volume>53</volume> (<issue>2</issue>), <fpage>719</fpage>&#x2013;<lpage>732</lpage>. <pub-id pub-id-type="doi">10.1021/acs.est.8b04056</pub-id> </citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ring</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Pearce</surname>
<given-names>R. G.</given-names>
</name>
<name>
<surname>Setzer</surname>
<given-names>R. W.</given-names>
</name>
<name>
<surname>Wetmore</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Wambaugh</surname>
<given-names>J. F.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Identifying Populations Sensitive to Environmental Chemicals by Simulating Toxicokinetic Variability</article-title>. <source>Environ. Int.</source> <volume>106</volume>, <fpage>105</fpage>&#x2013;<lpage>118</lpage>. <pub-id pub-id-type="doi">10.1016/j.envint.2017.06.004</pub-id> </citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ring</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sipes</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Hsieh</surname>
<given-names>J.-H.</given-names>
</name>
<name>
<surname>Carberry</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Koval</surname>
<given-names>L. E.</given-names>
</name>
<name>
<surname>Klaren</surname>
<given-names>W. D.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Predictive Modeling of Biological Responses in the Rat Liver Using <italic>In Vitro</italic> Tox21 Bioactivity: Benefits from High-Throughput Toxicokinetics</article-title>. <source>Comput. Toxicol.</source> <volume>18</volume>, <fpage>100166</fpage>. <pub-id pub-id-type="doi">10.1016/j.comtox.2021.100166</pub-id> </citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ritz</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Baty</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Streibig</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Gerhard</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Dose-Response Analysis Using R</article-title>. <source>Plos One</source> <volume>10</volume> (<issue>12</issue>), <fpage>e0146021</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0146021</pub-id> </citation>
</ref>
<ref id="B78">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Robasky</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Boyles</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bradford</surname>
<given-names>K. C.</given-names>
</name>
<name>
<surname>Gold</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lenhardt</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>McKeen</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <source>How to Launch Transdisciplinary Research Communication</source>. <publisher-loc>Research Triangle Park, NC</publisher-loc>: <publisher-name>RTI Press</publisher-name>. </citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roell</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Harmon</surname>
<given-names>Q. E.</given-names>
</name>
<name>
<surname>Klungs&#xf8;yr</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Bauer</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Magnus</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Engel</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Clustering Longitudinal Blood Pressure Trajectories to Examine Heterogeneity in Outcomes Among Preeclampsia Cases and Controls</article-title>. <source>Hypertension</source> <volume>77</volume> (<issue>6</issue>), <fpage>2034</fpage>&#x2013;<lpage>2044</lpage>. <pub-id pub-id-type="doi">10.1161/HYPERTENSIONAHA.120.16239</pub-id> </citation>
</ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roell</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Havener</surname>
<given-names>T. M.</given-names>
</name>
<name>
<surname>Reif</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Jack</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>McLeod</surname>
<given-names>H. L.</given-names>
</name>
<name>
<surname>Wiltshire</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Synergistic Chemotherapy Drug Response Is a Genetic Trait in Lymphoblastoid Cell Lines</article-title>. <source>Front. Genet.</source> <volume>10</volume>, <fpage>829</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2019.00829</pub-id> </citation>
</ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ryan</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Ferguson</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Waidyanatha</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ramaiahgari</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rice</surname>
<given-names>J. R.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Evaluating Sufficient Similarity of Botanical Dietary Supplements: Combining Chemical and <italic>In Vitro</italic> Biological Data</article-title>. <source>Toxicol. Sci.</source> <volume>172</volume> (<issue>2</issue>), <fpage>316</fpage>&#x2013;<lpage>329</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfz189</pub-id> </citation>
</ref>
<ref id="B82">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shah</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Judson</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Patlewicz</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Systematically Evaluating Read-Across Prediction and Performance Using a Local Validity Approach Characterized by Chemical Structure and Bioactivity Information</article-title>. <source>Regul. Toxicol. Pharmacol.</source> <volume>79</volume>, <fpage>12</fpage>&#x2013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1016/j.yrtph.2016.05.008</pub-id> </citation>
</ref>
<ref id="B83">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shah</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Tate</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Patlewicz</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Generalized Read-Across Prediction Using Genra-Py</article-title>. <source>Bioinformatics</source> <volume>37</volume>, <fpage>3380</fpage>&#x2013;<lpage>3381</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btab210</pub-id> </citation>
</ref>
<ref id="B84">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sim</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Stebbins</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bierer</surname>
<given-names>B. E.</given-names>
</name>
<name>
<surname>Butte</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Drazen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dzau</surname>
<given-names>V.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Time for NIH to Lead on Data Sharing</article-title>. <source>Science</source> <volume>367</volume> (<issue>6484</issue>), <fpage>1308</fpage>&#x2013;<lpage>1309</lpage>. <pub-id pub-id-type="doi">10.1126/science.aba4456</pub-id> </citation>
</ref>
<ref id="B85">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smeester</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Rager</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Bailey</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Garc&#xed;a-Vargas</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Epigenetic Changes in Individuals with Arsenicosis</article-title>. <source>Chem. Res. Toxicol.</source> <volume>24</volume> (<issue>2</issue>), <fpage>165</fpage>&#x2013;<lpage>167</lpage>. <pub-id pub-id-type="doi">10.1021/tx1004419</pub-id> </citation>
</ref>
<ref id="B86">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tete</surname>
<given-names>V. S.</given-names>
</name>
<name>
<surname>Nyoni</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mamba</surname>
<given-names>B. B.</given-names>
</name>
<name>
<surname>Msagati</surname>
<given-names>T. A. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Occurrence and Spatial Distribution of Statins, Fibrates and Their Metabolites in Aquatic Environments</article-title>. <source>Arabian J. Chem.</source> <volume>13</volume> (<issue>2</issue>), <fpage>4358</fpage>&#x2013;<lpage>4373</lpage>. <pub-id pub-id-type="doi">10.1016/j.arabjc.2019.08.003</pub-id> </citation>
</ref>
<ref id="B87">
<citation citation-type="web">
<collab>The R Project for Statistical Computing</collab> (<year>2021</year>). <article-title>The R Project for Statistical Computing</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/">https://www.r-project.org/</ext-link>
</comment>(<comment>Accessed Aug 1, 2021)</comment>. </citation>
</ref>
<ref id="B88">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thompson</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Kirman</surname>
<given-names>C. R.</given-names>
</name>
<name>
<surname>Hays</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Suh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Harvey</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Proctor</surname>
<given-names>D. M.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Integration of Mechanistic and Pharmacokinetic Information to Derive Oral Reference Dose and Margin-Of-Exposure Values for Hexavalent Chromium</article-title>. <source>J. Appl. Toxicol.</source> <volume>38</volume> (<issue>3</issue>), <fpage>351</fpage>&#x2013;<lpage>365</lpage>. <pub-id pub-id-type="doi">10.1002/jat.3545</pub-id> </citation>
</ref>
<ref id="B89">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>To</surname>
<given-names>K. T.</given-names>
</name>
<name>
<surname>Truong</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Edwards</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tanguay</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Reif</surname>
<given-names>D. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Multivariate Modeling of Engineered Nanomaterial Features Associated with Developmental Toxicity</article-title>. <source>NanoImpact</source> <volume>16</volume>, <fpage>100185</fpage>. <pub-id pub-id-type="doi">10.1016/j.impact.2019.100185</pub-id> </citation>
</ref>
<ref id="B90">
<citation citation-type="web">
<collab>UWPHI</collab> (<year>2021</year>). <article-title>National Data &#x26; Documentation: 2010-2019</article-title>. <comment>[Online]. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.countyhealthrankings.org/explore-health-rankings/rankings-data-documentation/national-data-documentation-2010-2019">https://www.countyhealthrankings.org/explore-health-rankings/rankings-data-documentation/national-data-documentation-2010-2019</ext-link> (Accessed Aug 1, 2021)</comment>. </citation>
</ref>
<ref id="B91">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>V&#xe4;remo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nielsen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Nookaew</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Enriching the Gene Set Analysis of Genome-wide Data by Incorporating Directionality of Gene Expression and Combining Statistical Hypotheses and Methods</article-title>. <source>Nucleic Acids Res.</source> <volume>41</volume> (<issue>8</issue>), <fpage>4378</fpage>&#x2013;<lpage>4391</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkt111</pub-id> </citation>
</ref>
<ref id="B92">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wambaugh</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Bare</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Carignan</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Dionisio</surname>
<given-names>K. L.</given-names>
</name>
<name>
<surname>Dodson</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Jolliet</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>New Approach Methodologies for Exposure Science</article-title>. <source>Curr. Opin. Toxicol.</source> <volume>15</volume>, <fpage>76</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1016/j.cotox.2019.07.001</pub-id> </citation>
</ref>
<ref id="B93">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wambaugh</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Wetmore</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Pearce</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Strope</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Goldsmith</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sluka</surname>
<given-names>J. P.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Toxicokinetic Triage for Environmental Chemicals</article-title>. <source>Toxicol. Sci.</source> <volume>147</volume> (<issue>1</issue>), <fpage>55</fpage>&#x2013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfv118</pub-id> </citation>
</ref>
<ref id="B94">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ward&#x2010;Caviness</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Danesh Yazdi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Moyer</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Weaver</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Cascio</surname>
<given-names>W. E.</given-names>
</name>
<name>
<surname>Di</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Long&#x2010;Term Exposure to Particulate Air Pollution Is Associated With 30&#x2010;Day Readmissions and Hospital Visits Among Patients With Heart Failure</article-title>. <source>JAm Heart Assoca</source> <volume>10</volume> (<issue>10</issue>), <fpage>e019430</fpage>. <pub-id pub-id-type="doi">10.1161/JAHA.120.019430</pub-id> </citation>
</ref>
<ref id="B95">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ward-Caviness</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Russell</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Weaver</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Slawsky</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Dhingra</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kwee</surname>
<given-names>L. C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Accelerated Epigenetic Age as a Biomarker of Cardiovascular Sensitivity to Traffic-Related Air Pollution</article-title>. <source>Aging</source> <volume>12</volume> (<issue>23</issue>), <fpage>24141</fpage>&#x2013;<lpage>24155</lpage>. <pub-id pub-id-type="doi">10.18632/aging.202341</pub-id> </citation>
</ref>
<ref id="B96">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Wickham</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Grolemund</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>R for Data Science</article-title>. <comment>[Online]. O&#x2019;Reilly. Available at: <ext-link ext-link-type="uri" xlink:href="https://r4ds.had.co.nz/">https://r4ds.had.co.nz/</ext-link>
</comment>(<comment>Accessed Aug 1, 2021)</comment>. </citation>
</ref>
<ref id="B97">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wild</surname>
<given-names>C. P.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>The Exposome: from Concept to Utility</article-title>. <source>Int. J. Epidemiol.</source> <volume>41</volume> (<issue>1</issue>), <fpage>24</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1093/ije/dyr236</pub-id> </citation>
</ref>
<ref id="B98">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilkinson</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Dumontier</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Aalbersberg</surname>
<given-names>I. J.</given-names>
</name>
<name>
<surname>Appleton</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Axton</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Baak</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>The FAIR Guiding Principles for Scientific Data Management and Stewardship</article-title>. <source>Sci. Data</source> <volume>3</volume>, <fpage>160018</fpage>. <pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id> </citation>
</ref>
<ref id="B99">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zavala</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Freedman</surname>
<given-names>A. N.</given-names>
</name>
<name>
<surname>Szilagyi</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Jaspers</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Wambaugh</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Higuchi</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>New Approach Methods to Evaluate Health Risks of Air Pollutants: Critical Design Considerations for <italic>In Vitro</italic> Exposure Testing</article-title>. <source>Int. J. Environ. Res. Public Health</source> <volume>17</volume> (<issue>6</issue>), <fpage>2124</fpage>. <pub-id pub-id-type="doi">10.3390/ijerph17062124</pub-id> </citation>
</ref>
<ref id="B100">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Roell</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Truong</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tanguay</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Reif</surname>
<given-names>D. M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A Data-Driven Weighting Scheme for Multivariate Phenotypic Endpoints Recapitulates Zebrafish Developmental Cascades</article-title>. <source>Toxicol. Appl. Pharmacol.</source> <volume>314</volume>, <fpage>109</fpage>&#x2013;<lpage>117</lpage>. <pub-id pub-id-type="doi">10.1016/j.taap.2016.11.010</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>