<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Toxicol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Toxicology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Toxicol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-3080</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1778353</article-id>
<article-id pub-id-type="doi">10.3389/ftox.2026.1778353</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Perspectives on variability of <italic>in vivo</italic> toxicology studies: considerations for next-generation toxicology</article-title>
<alt-title alt-title-type="left-running-head">Karmaus et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/ftox.2026.1778353">10.3389/ftox.2026.1778353</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Karmaus</surname>
<given-names>Agnes L.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/691669"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kreutz</surname>
<given-names>Anna L.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3262664"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Oyetade</surname>
<given-names>Oluwakemi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Friedman</surname>
<given-names>Katie Paul</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Paparella</surname>
<given-names>Martin</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1166575"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Reinke</surname>
<given-names>Emily N.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Allen</surname>
<given-names>David</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Hogberg</surname>
<given-names>Helena T.</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/53430"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kleinstreuer</surname>
<given-names>Nicole C.</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/54869"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Inotiv, Research Triangle Park</institution>, <city>Durham</city>, <state>NC</state>, <country country="US">United States</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Independent Expert</institution>, <city>Durham</city>, <state>NC</state>, <country country="US">United States</country>
</aff>
<aff id="aff3">
<label>3</label>
<institution>Institute for Medical Biochemistry, Medical University Innsbruck</institution>, <city>Innsbruck</city>, <country country="AT">Austria</country>
</aff>
<aff id="aff4">
<label>4</label>
<institution>National Toxicology Program Interagency Center for the Evaluation of Alternative Toxicological Methods, Division of Translational Toxicology, National Institute of Environmental Health Sciences</institution>, <city>Durham</city>, <state>NC</state>, <country country="US">United States</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Helena T. Hogberg, <email xlink:href="mailto:Helena.hogberg-durdock@nih.gov">Helena.hogberg-durdock@nih.gov</email>
</corresp>
<fn fn-type="present-address" id="fn001">
<label>&#x2020;</label>
<p>
<bold>Present Address:</bold> Agnes L. Karmaus, Syngenta Crop Protection, LLC, Greensboro, NC, United States David Allen, International Collaboration on Cosmetics Safety, Research Triangle Park, NC, United States Nicole C. Kleinstreuer, Division of Program Coordination, Planning, and Strategic Initiatives, Office of the Director, National Institutes of Health, Research Triangle Park, NC, United States</p>
</fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-02">
<day>02</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>8</volume>
<elocation-id>1778353</elocation-id>
<history>
<date date-type="received">
<day>30</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>25</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Karmaus, Kreutz, Oyetade, Friedman, Paparella, Reinke, Allen, Hogberg and Kleinstreuer.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Karmaus, Kreutz, Oyetade, Friedman, Paparella, Reinke, Allen, Hogberg and Kleinstreuer</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-02">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Animal studies have historically informed toxicological testing and safety assessments. However, assessment of the variability in both quantitative and qualitative results has been limited. Biological variability, experimental differences, interpretation of categorical endpoints, and data availability and curation approaches all contribute to the quantified variability.</p>
</sec>
<sec>
<title>Methods</title>
<p>A literature review was conducted to identify publications describing variability analyses for <italic>in vivo</italic> toxicology studies. Variability analyses were evaluated and summarized for a variety of toxicological endpoints: ocular irritation, dermal sensitization and irritation, acute oral and inhalation lethality, subchronic and chronic toxicity, carcinogenicity, neurotoxicity including DNT, endocrine, and genotoxicity.</p>
</sec>
<sec>
<title>Results</title>
<p>This review summarizes published investigations of variability within mammalian toxicological studies that have been largely conducted in accordance with health effects test guidelines. The results of this review suggest that replicability of in vivo toxicological guideline studies varies widely by study type, endpoint complexity, and classification approach.</p>
</sec>
<sec>
<title>Discussion</title>
<p>While any test system will have inherent variability, understanding its sources and impact on study interpretation will help ensure that appropriate confidence is applied when using the test method. Furthermore, such information aids in establishing relevant metrics to serve as baselines for informing performance characterization of new approach methodologies (NAMs). Future evaluation of NAMs should be contextualized using estimates of uncertainty and variance of the traditional study data to demonstrate &#x201c;better&#x201d; performance compared to traditional testing approaches. Robust understanding of guideline study performance is important for risk assessments, where it is important to find species-relevant NAMs that can perform at least as well as existing bioassays.</p>
</sec>
</abstract>
<kwd-group>
<kwd>generalizability</kwd>
<kwd>NAM evaluation</kwd>
<kwd>performance metrics</kwd>
<kwd>replicability</kwd>
<kwd>risk assessment</kwd>
<kwd>variability</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This research was supported by the Intramural Research Program (ES&#x23; 103386) of the National Institute of Environmental Health Sciences, National Institutes of Health under contract HHSN273201500010C. This research was funded in part by PETA Science Consortium International, the Institute for <italic>In Vitro</italic> Science, and the International Collaboration on Cosmetics Safety. The work of MP at the Medical University of Innsbruck in Austria is funded by the Austrian Federal Ministry for Environment, Department V/5 -Chemicals Policy and Biocides.</funding-statement>
</funding-group>
<counts>
<fig-count count="0"/>
<table-count count="4"/>
<equation-count count="0"/>
<ref-count count="62"/>
<page-count count="12"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Regulatory Toxicology</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Advancing the field of toxicology from animal models to new approach methodologies (NAMs) requires a robust understanding of animal study performance to ensure that the performance of NAMs is equivalent or better. &#x201c;Equivalent or better&#x201d; can be defined by many metrics, including but not limited to, more mechanistically insightful, more biologically relevant (e.g., human-based test systems for predicting human effects), and less variable as compared to the traditional animal-based test with respect to either replicate data themselves or the conclusions made for safety assessments. Herein we focus on characterizing variability of <italic>in vivo</italic> study outcomes by reviewing variability of replicate studies with quantitative and/or qualitative endpoints. Studies reviewed included different types of health effects guidelines, including ocular irritation, dermal sensitization and irritation, acute lethality, subchronic and chronic repeated dose toxicity, endocrine, carcinogenicity, neurotoxicity, developmental neurotoxicity (DNT), and genotoxicity tests. Characterizing the variability of data derived from guideline animal studies (or guideline-like, defined as having only minor variations from guideline study conduct) informs benchmarks for NAM performance with respect to NAM variability and replicability.</p>
<p>The concept of evaluating toxicological study variability is multifaceted, with different understandings and definitions dependent on where and how the concept of variability is applied. Evaluations of variability could consider differences in replicate study data, overall interpretation, and prediction of the toxicological outcome of interest. The National Academy of Sciences has defined the related term &#x201c;reproducibility&#x201d; in a programmatic manner, i.e., consistent results should be obtained when computational evaluations use the same input data, analysis steps, methods, code, and conditions of analysis (<xref ref-type="bibr" rid="B44">National Academies of Sciences E, 2019</xref>). Conversely, the National Academy of Science defines &#x201c;replicability&#x201d; as the concept of a repeated study outcome across multiple studies aimed at answering the same scientific question. The concept of data variability can also include &#x201c;generalizability,&#x201d; wherein a study outcome can be applied to other contexts or populations (e.g., species extrapolation for human health assessments and extrapolations from few model species to the diversity of species in the ecosystem) (<xref ref-type="bibr" rid="B38">Kukull and Ganguli, 2012</xref>). To clarify definitions used herein, we have summarized these terms in <xref ref-type="table" rid="T1">Table 1</xref>. The appropriate data and methods to evaluate variability and inform on whether a study is reproducible, replicable, and generalizable must be understood to gain insight into whether an existing method is reliable for a specific purpose and should be used to benchmark NAMs for the same or a similar endpoint. Our analysis primarily focused on replicability, as the available data included replicate studies derived from the same (or similar) guideline.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Terminology for the different types of variability, as used in this manuscript.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Term</th>
<th align="left">Explanation</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Reproducibility</td>
<td align="left">Variability of data from identical methods (e.g., a computational model)</td>
</tr>
<tr>
<td align="left">Replicability</td>
<td align="left">Variability of data from similar methods (e.g., test-protocol variability allowed within OECD TGs, including performance-based TGs or key-event-based TGs)</td>
</tr>
<tr>
<td align="left">Generalizability</td>
<td align="left">Variability of data from methods addressing the same endpoint for different species, populations or ecosystems</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The standard approach for conducting toxicity testing for regulatory application is to utilize test guidelines. These can include Organisation for Economic Co-operation and Development (OECD) harmonized test guidelines, US Environmental Protection Agency (<xref ref-type="bibr" rid="B20">EPA, 2026</xref>) Health Effects Series 870 test guidelines, the International Council for Harmonisation of Technical Requirements for Pharmaceuticals for Human Use (ICH) guidelines, or International Organization for Standardization (ISO) testing standards. Such standardized test guidelines (TGs) provide acceptable study designs for international regulatory submissions. OECD harmonized TGs are intended to be robust, with emphasis on standardization of species, study length, exposure routes, and endpoints. However, OECD TGs are not standard operating procedures; they deliberately allow some protocol flexibility in terms of animal strains, exposure vehicles, experimental methods to analyze the endpoints, and statistics to assess the data. Study reports are still required to contain descriptions of how a study was conducted and to provide records of all aspects of the study. Further, regulatory agencies for all OECD member countries require the application of Good Laboratory Practice (GLP), which ensures detailed documentation of the quality and integrity of the study conduct. Adherence to GLP ensures proper storage of reagents and test articles, maintenance and accuracy of laboratory equipment, housing of animals, analytical approaches, test article characterization, and more (<xref ref-type="bibr" rid="B20">EPA, 2026</xref>). Studies conducted using a TG under GLP may provide sufficient documentation of a testing method to facilitate replicability analysis given shared study design parameters. One primary practical challenge is to gain access to well-documented study data and to then harmonize and curate them into databases that support retrospective analyses.</p>
<p>While adhering to TGs and GLP can support methodological consistency of <italic>in vivo</italic> guideline studies, inherent variability in terms of limited reproducibility, replicability, and generalizability is still frequently observed within and across studies. Toxicology is now at a critical time where toxicological data generated over decades are being curated for evaluation of their ability to inform appropriate benchmarks for NAMs that are often required to demonstrate performance that is &#x201c;equivalent or better&#x201d; when compared to traditional approaches. The availability of curated data has supported several analyses of variability across <italic>in vivo</italic> study types, but to date, these analyses have not been compiled and reviewed in one place. Ultimately, better understanding of <italic>in vivo</italic> variability is critical in leveraging NAMs for regulatory assessments as this understanding informs more realistic data-driven expectations for NAM assay performance. Therefore, we sought to aggregate available characterizations of variability from <italic>in vivo</italic> guideline study types. Rather than attempting to assess what the lowest theoretical variability from TGs optimally conducted and following the most recent scientific standards could be, this review is focused on curating the calculated replicability of existing TG data as they were generated and used for toxicological decision-making.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2-1">
<label>2.1</label>
<title>Literature review</title>
<p>A literature review was conducted to identify publications describing variability analyses for <italic>in vivo</italic> toxicology studies. Initially, a literature search was conducted using PubMed (including MEDLINE) and Causaly databases. Medical Subject Heading (MeSH) terms and text words included &#x201c;variability&#x201d;, &#x201c;reproducibility&#x201d; and their variants combined with &#x201c;<italic>in vivo</italic>&#x201d;, &#x201c;animal studies&#x201d;, &#x201c;experimental studies&#x201d; and other relevant alternatives. The literature search focused on publications from 1990 to April 2023 (the date when the original search was conducted). This broad search, without restriction to toxicology, TG, or type of variability/reproducibility analysis yielded approximately 8,500 publications, confirming that evaluation of variability in the life sciences is a widely addressed topic.</p>
<p>Limiting to only publications containing variability analyses, systematic reviews, and meta-analyses of <italic>in vivo</italic> studies dramatically reduced the literature set. Subsequent manual screening, first by title and abstract, then subsequently by reviewing full text for a prioritized subset, was conducted to exclude non-relevant articles and adhere to the defined scope. This tiered manual screening resulted in the identification of approximately 100 relevant manuscripts; this corpus of literature was reviewed for data extraction and inclusion based on either quantitative analyses or relevant discussion addressing sources of variability in bioassays. This provided a set of about 60 manuscripts that included state-of-the-science reports containing opinions about scientific reproducibility of mammalian <italic>in vivo</italic> studies (not necessarily toxicology) and background on potential sources of variability that were used to inform our discussion.</p>
<p>From this body of literature, we selected toxicologically relevant study evaluations, specifically summaries of studies following human health-relevant TGs (from OECD and EPA Series 870 Health Effects). These guideline-like studies adhered to a set of criteria that were either aligned with existing regulatory guidelines or followed a standardized protocol used in regulatory assessment. We prioritized evaluations of data from mammalian studies, including analyses of variability from <italic>in vivo</italic> studies not specific to toxicology. The final number of publications yielding quantitative variability analyses that were useful for understanding replicability of relevant toxicological studies (e.g., TG or guideline-like) was 27. This subset was identified by excluding studies such as Ames assays that did not directly measure <italic>in vivo</italic> outcomes and endpoints such as metabolomics and vaccine evaluation that were considered too complex to evaluate for replicability.</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Analyses from literature</title>
<p>Within our literature set, some reports evaluated variability in studies that were generally similar but did have minor differences (e.g., used different dosing vehicles). For those, we evaluated replicability among all studies, regardless of minor study differences. This was done for consistency, as not all analyses made such distinctions to account for study design. In studies that included multiple analyses, the most broadly representative summary metric was retrieved. For example, when respective analyses were conducted for males only, for females only, and for all animals, we considered only the evaluation that included all animals. Thus, our data compilation contains disparate data formats not intended to draw comparisons across study types; the intention of this variability review was to compile data and present reference values that can serve as a resource for better understanding <italic>in vivo</italic> toxicological studies.</p>
<p>Summaries of reported replicability were compiled for both categorical and quantitative endpoints. Variability analyses that considered categorical classification schemes were limited to those using the EPA and the United Nations Globally Harmonized System of Classification and Labeling of Chemicals (GHS) schemes. The replicability of European-specific classification, labeling, and packaging (CLP) criteria was not included in our review. Categorical replicability was generally reported in the literature set in the form of conditional probabilities. A conditional probability represents the probability of a chemical being assigned to a category given its prior categorization, with consideration to the number of studies with which a chemical was categorized. Calculations were performed as previously described (<xref ref-type="bibr" rid="B33">Karmaus et al., 2022</xref>; <xref ref-type="bibr" rid="B40">Luechtefeld et al., 2016</xref>).</p>
<p>Replicability of continuous quantitative endpoint values was reported as described by the primary literature source. We considered statistical metrics that conveyed some aspect of variance in replicate studies or the variance in replicate studies explained by study metadata (e.g., standard deviation [SD], coefficient of variation [CV], coefficient of determination [<italic>R</italic>
<sup>2</sup>], and root mean squared error [RMSE]; see <xref ref-type="table" rid="T2">Table 2</xref>).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Summary of statistical metrics reviewed.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Statistical metric reported</th>
<th align="left">General calculation</th>
<th align="left">Relationship to replicability</th>
<th align="left">Working definition</th>
</tr>
</thead>
<tbody valign="top">
<tr style="background-color:#F2F2F2">
<td align="left">Percent replicable</td>
<td align="left">The number of times a chemical resulted in that categorization from the total number of times the chemical was tested</td>
<td align="left">100% indicates perfect replicability</td>
<td align="left">Percentage of successful reproduction of a study categorization</td>
</tr>
<tr>
<td align="left">Standard deviation (SD)</td>
<td align="left">Calculated as the square root of the sum of squared deviations divided by the number of data points</td>
<td align="left">Low SD indicates higher replicability</td>
<td align="left">Measure of data dispersion around the mean in the same units as the data</td>
</tr>
<tr style="background-color:#F2F2F2">
<td align="left">Root mean squared error (RMSE)</td>
<td align="left">Calculated as the square root of average squared distances between each actual and predicted value</td>
<td align="left">Low RMSE indicates high model accuracy and low variance between predicted and actual values</td>
<td align="left">Measure of model predictions from the true values (i.e., prediction error) in the same units as the data; when mean model prediction equals the data mean, the RMSE equals the SD of the model residuals</td>
</tr>
<tr>
<td align="left">Coefficient of variation (CV)</td>
<td align="left">SD divided by the mean, multiplied by 100</td>
<td align="left">Low CV indicates low variability in the data with respect to the mean</td>
<td align="left">Unitless value to indicate variability with respect to the mean; can be compared across datasets with different means but cannot be compared directly to SD or RMSE</td>
</tr>
<tr style="background-color:#F2F2F2">
<td align="left">Coefficient of determination (<italic>R</italic>
<sup>2</sup>)</td>
<td align="left">
<italic>R</italic>
<sup>2</sup> &#x3d; 1 - (SSE/TSS), where SSE is the sum of squared errors and TSS is the total sum of squares</td>
<td align="left">Ranges from 0&#x2013;1 with 1 being perfect replicability</td>
<td align="left">Amount of variance in the data explained by a variable or model</td>
</tr>
<tr>
<td align="left">Margin of uncertainty around the median</td>
<td align="left">&#xb1;2.5 x MAD (where MAD is the median absolute deviation)</td>
<td align="left">A narrower margin indicates lower variability</td>
<td align="left">A range of values in which the value of a parameter is expected to fall</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="results" id="s3">
<label>3</label>
<title>Results</title>
<p>Published retrospective evaluations of <italic>in vivo</italic> toxicological guideline study replicability were retrieved and encompassed a variety of toxicological endpoints: ocular irritation, dermal sensitization and irritation, acute oral and inhalation lethality, subchronic and chronic toxicity, carcinogenicity, neurotoxicity including DNT, endocrine, and genotoxicity (<xref ref-type="table" rid="T3">Tables 3</xref>, <xref ref-type="table" rid="T4">4</xref>). While the retrieved literature set included assessments of variability for study types that can be considered &#x201c;complex&#x201d; (i.e., carcinogenicity and DNT), relevant assessments were not found for other complex endpoints, such as offspring generation from prenatal developmental toxicity and multi-generation reproductive toxicity studies, delayed neurotoxicity, or toxicokinetics.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Variability of measured categorical endpoints for <italic>in vivo</italic> toxicological studies.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study type relevant TG</th>
<th align="left">Observation</th>
<th align="left">% replicable</th>
<th align="left">Number of test articles</th>
<th align="left">Number of studies</th>
<th align="left">References</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">Ocular Irritation (Draize rabbit eye irritation test<break/>OECD TG 405<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td align="left">GHS Cat</td>
<td align="left">
<break/>1: 73%<break/>2A: 33%<break/>2B: 16%<break/>NC: 94%</td>
<td align="left">491</td>
<td align="left">46<break/>138<break/>86<break/>400</td>
<td align="left">
<xref ref-type="bibr" rid="B40">Luechtefeld et al. (2016)</xref> Table 3</td>
</tr>
<tr>
<td align="left">GHS Cat</td>
<td align="left">
<break/>1: 62.5%<break/>2A/2B: 71.4%<break/>NC: 90%</td>
<td align="left">42<break/>16<break/>7<break/>20</td>
<td align="left">89</td>
<td align="left">
<xref ref-type="bibr" rid="B5">Barroso et al. (2017)</xref> Table 4 and 5</td>
</tr>
<tr>
<td align="left">GHS Cat</td>
<td align="left">
<break/>1: 92.8%<xref ref-type="table-fn" rid="Tfn2">
<sup>b</sup>
</xref>
<break/>2A/2B: 88.2%<break/>NC: 99.9%</td>
<td align="left">NA</td>
<td align="left">1860 studies, 582 animals<break/>128<break/>193<break/>1,536</td>
<td align="left">
<xref ref-type="bibr" rid="B1">Adriaens et al. (2014)</xref> Table 10NCD</td>
</tr>
<tr>
<td rowspan="2" align="left">Dermal Sensitization (LLNA)<break/>OECD TG 429<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td align="left">EC3 Cat</td>
<td align="left">
<break/>NS: 80%<break/>Weak: 68%<break/>Moderate: 63%<break/>Strong: 58%<break/>Extreme: 92%</td>
<td align="left">38<break/>8<break/>8<break/>12<break/>4<break/>6</td>
<td align="left">333<break/>61<break/>38<break/>128<break/>57<break/>49</td>
<td align="left">
<xref ref-type="bibr" rid="B29">Hoffmann et al. (2005)</xref> Table 1</td>
</tr>
<tr>
<td align="left">GHS Cat</td>
<td align="left">
<break/>1A: 69%<break/>1B: 68%<break/>NC: 52%</td>
<td align="left">87<break/>36<break/>65<break/>35</td>
<td align="left">400</td>
<td align="left">
<xref ref-type="bibr" rid="B18">Dumont et al. (2016)</xref> Table 3</td>
</tr>
<tr>
<td align="left">Dermal Irritation/Corrosion<break/>OECD TG 404<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td align="left">GHS Cat</td>
<td align="left">
<break/>
<break/>1: 86%<break/>2: 64%<break/>3: 45%<break/>4: 92%</td>
<td align="left">425</td>
<td align="left">1,065 endpoint study records<break/>207<break/>35<break/>133<break/>690</td>
<td align="left">
<xref ref-type="bibr" rid="B54">Rooney et al. (2021)</xref> Table 1D</td>
</tr>
<tr>
<td rowspan="2" align="left">Acute Lethality (oral LD50)<break/>OECD TG 420<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td align="left">GHS Cat</td>
<td align="left">54% (based on modeled LELs)</td>
<td align="left">97</td>
<td align="left">1,060</td>
<td align="left">
<xref ref-type="bibr" rid="B30">Hoffmann et al. (2010)</xref> Table 2</td>
</tr>
<tr>
<td align="left">EPA Cat</td>
<td align="left">
<break/>1: 53%<break/>2: 49%<break/>3: 62%<break/>4: 66%<break/>5: 75%<break/>I: 58%<break/>II: 67%<break/>III: 80%<break/>IV: 55%</td>
<td align="left">2,241<break/>53<break/>183<break/>556<break/>1,663<break/>1,490<break/>236<break/>910<break/>2,341<break/>458</td>
<td align="left">7,574<break/>104<break/>342<break/>1,166<break/>395<break/>2,857<break/>446<break/>1,694<break/>4,648<break/>788</td>
<td align="left">
<xref ref-type="bibr" rid="B33">Karmaus et al. (2022)</xref> Table 4</td>
</tr>
<tr>
<td align="left">Acute Lethality (inhalation LC50)<break/>OECD TG 433<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td align="left">EPA Office of Pesticide Programs categorization</td>
<td align="left">
<break/>I: 70%<break/>II: 68%<break/>III: 47%<break/>IV: 86%</td>
<td align="left">339</td>
<td align="left">75<break/>137<break/>100<break/>556</td>
<td align="left">Hull et al., <italic>in prep</italic>
</td>
</tr>
<tr>
<td align="left">Subchronic/Chronic Repeated Dose<break/>OECD TG 407<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td align="left">% concordance of findings in Subchronic or Chronic</td>
<td align="left">38.5%&#x2013;90% (mean: 69%) depending on species, study type, and organ</td>
<td align="left">169&#x2013;538</td>
<td align="left">306&#x2013;2,170</td>
<td align="left">
<xref ref-type="bibr" rid="B47">Paul Friedman et al. (2023)</xref> Figure 2 and Supp Table 2</td>
</tr>
<tr>
<td rowspan="3" align="left">Carcinogenicity (chronic testing)<break/>OECD TG 451<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td rowspan="2" align="left">Pos/Neg</td>
<td align="left">65% between rat sexes and 36% between species (rat and mouse)</td>
<td align="left">313</td>
<td align="left">379 (349 in rat, 339 mice)</td>
<td align="left">
<xref ref-type="bibr" rid="B27">Haseman and Lockhart (1993)</xref> Table 7 &#x26; 9</td>
</tr>
<tr>
<td align="left">86% between sexes<break/>74% between species (rat/mouse)</td>
<td align="left">&#x200b;</td>
<td align="left">379</td>
<td align="left">
<xref ref-type="bibr" rid="B31">Huff et al. (1991)</xref> Table 4</td>
</tr>
<tr>
<td align="left">GHS categorization</td>
<td align="left">&#x3c;50% for tumors in same GHS category</td>
<td align="left">121</td>
<td align="left">&#x200b;</td>
<td align="left">
<xref ref-type="bibr" rid="B25">Gottmann et al. (2001)</xref> Table 7</td>
</tr>
<tr>
<td align="left">Hershberger<break/>OECD TG 441<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td align="left">Pos/Neg</td>
<td align="left">72%</td>
<td align="left">25</td>
<td align="left">&#x2265;2 studies per chemical</td>
<td align="left">
<xref ref-type="bibr" rid="B8">Browne et al. (2015)</xref> Supp Table 6</td>
</tr>
<tr>
<td align="left">Uterotrophic<break/>OECD TG 440<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</td>
<td align="left">Pos/Neg</td>
<td align="left">74%</td>
<td align="left">118</td>
<td align="left">458 studies</td>
<td align="left">
<xref ref-type="bibr" rid="B36">Kleinstreuer et al. (2016)</xref>, <xref ref-type="table" rid="T2">Table 2</xref>
</td>
</tr>
<tr>
<td align="left">Genotoxicity</td>
<td align="left">Pos/ambiguous/Neg</td>
<td align="left">78%&#x2013;23%, depending on TG</td>
<td align="left">13 to 78, depending on TG</td>
<td align="left">3 to 23 replicates per substance, depending on TG</td>
<td align="left">
<xref ref-type="bibr" rid="B52">Raitano et al. (2026)</xref>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn1">
<label>
<sup>a</sup>
</label>
<p>OECD, test guideline numbers represent the assay focus, however, several of the references include aggregated datasets including &#x201c;guideline-like&#x201d; studies with minor variations from the TG.</p>
</fn>
<fn id="Tfn2">
<label>
<sup>b</sup>
</label>
<p>Values reflect modeled LELs, not direct experimental data.</p>
</fn>
<fn>
<p>Definitions&#x2013;Carc, carcinogen; Cat, category; EC3, effective concentration required to induce a three-fold upregulation of lymph node cell proliferation; NA, not available; NC, not categorized; Non-Carc, non-carcinogen; Neg, negative; Pos, positive; TG, test guideline.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Replicability of measured continuous endpoints for <italic>in vivo</italic> toxicological studies.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Study type/Relevant TG</th>
<th align="left">Endpoint measure</th>
<th align="left">Variability<xref ref-type="table-fn" rid="Tfn3">
<sup>a</sup>
</xref>
</th>
<th align="left">Number of test articles</th>
<th align="left">Number of studies evaluated<xref ref-type="table-fn" rid="Tfn4">
<sup>b</sup>
</xref>
</th>
<th align="left">References</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">Ocular Irritation (Draize rabbit eye irritation test)<break/>OECD TG 405<xref ref-type="table-fn" rid="Tfn5">
<sup>c</sup>
</xref>
</td>
<td align="left">MAS</td>
<td align="left">Interlaboratory CV: 42%&#x2013;59%</td>
<td align="left">9</td>
<td align="left">24 labs</td>
<td align="left">
<xref ref-type="bibr" rid="B60">Weil and Scala (1971)</xref>, <xref ref-type="bibr" rid="B19">Earl et al. (1997)</xref>
</td>
</tr>
<tr>
<td align="left">MAS</td>
<td align="left">Intralaboratory CV: 38%</td>
<td align="left">&#x200b;</td>
<td align="left">4 labs, 13 tests</td>
<td align="left">
<xref ref-type="bibr" rid="B19">Earl et al. (1997)</xref>, <xref ref-type="bibr" rid="B11">Cormier et al. (1996)</xref>
</td>
</tr>
<tr>
<td align="left">MAS</td>
<td align="left">Intralaboratory CV: 3%&#x2013;65%</td>
<td align="left">4</td>
<td align="left">2 labs, 5 occasions</td>
<td align="left">
<xref ref-type="bibr" rid="B19">Earl et al. (1997)</xref>, <xref ref-type="bibr" rid="B7">Blein et al. (1991)</xref>
</td>
</tr>
<tr>
<td align="left">Dermal Sensitization (LLNA)<break/>OECD TG 429<xref ref-type="table-fn" rid="Tfn5">
<sup>c</sup>
</xref>
</td>
<td align="left">LogEC3</td>
<td align="left">SD: 0.147 logEC3 values</td>
<td align="left">12</td>
<td align="left">94 assays</td>
<td align="left">
<xref ref-type="bibr" rid="B53">Roberts et al. (2016)</xref>
</td>
</tr>
<tr>
<td align="left">Acute Lethality (Oral LD50)</td>
<td align="left">LogLD50</td>
<td align="left">SD: &#x3c;0.42 log (mg/kg)<xref ref-type="table-fn" rid="Tfn6">
<sup>d</sup>
</xref>
<break/>Rat-Mouse Interspecies <italic>R</italic>
<xref ref-type="table-fn" rid="Tfn7">
<sup>e</sup>
</xref>: 0.80</td>
<td align="left">57<break/>40</td>
<td align="left">504 studies<break/>622 values</td>
<td align="left">
<xref ref-type="bibr" rid="B30">Hoffmann et al. (2010)</xref>
</td>
</tr>
<tr>
<td align="left">OECD TG 420<xref ref-type="table-fn" rid="Tfn5">
<sup>c</sup>
</xref>
</td>
<td align="left">LogLD50</td>
<td align="left">Margin of uncertainty<xref ref-type="table-fn" rid="Tfn7">
<sup>e</sup>
</xref>: 0.095 &#xb1; 0.24 log (mg/kg)</td>
<td align="left">1885</td>
<td align="left">5,826 studies</td>
<td align="left">
<xref ref-type="bibr" rid="B33">Karmaus et al. (2022)</xref>
</td>
</tr>
<tr>
<td rowspan="2" align="left">Subchronic/Chronic Oral Repeated Dose<break/>OECD TG 407<xref ref-type="table-fn" rid="Tfn5">
<sup>c</sup>
</xref>
</td>
<td align="left">LEL (study-level)</td>
<td align="left">Full dataset LEL<xref ref-type="table-fn" rid="Tfn8">
<sup>f</sup>
</xref>: RMSE 0.589 log10-mg/kg/day</td>
<td align="left">563</td>
<td align="left">2,724</td>
<td align="left">
<xref ref-type="bibr" rid="B41">Ly Pham et al. (2020)</xref> <xref ref-type="table" rid="T3">Table 3</xref>
</td>
</tr>
<tr>
<td align="left">LEL (organ-level)</td>
<td align="left">RMSE: 0.41&#x2013;0.68 log10-(mg/kg/day) mean RMSE across organ-level LELs<xref ref-type="table-fn" rid="Tfn8">
<sup>f</sup>
</xref>: 0.59 &#xb1; 0.09 log10-mg/kg/day</td>
<td align="left">58&#x2013;364, depending on target organ</td>
<td align="left">151&#x2013;1,353 studies</td>
<td align="left">
<xref ref-type="bibr" rid="B47">Paul Friedman et al. (2023)</xref> Fig 3/Supp File 3</td>
</tr>
<tr>
<td align="left">Carcinogenicity (chronic testing)<break/>OECD TG 451<xref ref-type="table-fn" rid="Tfn5">
<sup>c</sup>
</xref>
</td>
<td align="left">TD50</td>
<td align="left">
<italic>R</italic>
<xref ref-type="table-fn" rid="Tfn7">
<sup>e</sup>
</xref>: 0.63<xref ref-type="table-fn" rid="Tfn9">
<sup>g</sup>
</xref>
</td>
<td align="left">121</td>
<td align="left">70 studies</td>
<td align="left">
<xref ref-type="bibr" rid="B25">Gottmann et al. (2001)</xref>
</td>
</tr>
<tr>
<td align="left">Neurotoxicity</td>
<td align="left">Motor activity<break/>Motor activity (across time)<break/>LOEL</td>
<td align="left">Intralaboratory between subject control CV: 18.9%&#x2013;30.7%<break/>Intralaboratory between subject control CV across time: 9.6%&#x2013;26.2%<break/>LOEL-ratio range motor activity &#x3d; 1&#x2013;6<xref ref-type="table-fn" rid="Tfn10">
<sup>h</sup>
</xref>
</td>
<td align="left">1 (vehicle)<break/>9</td>
<td align="left">variable methods (cage configurations, rat strains, sex, age, test duration, interval-duration, housing conditions) in six laboratories</td>
<td align="left">
<xref ref-type="bibr" rid="B13">Crofton et al. (1991)</xref> <xref ref-type="table" rid="T3">Table 3</xref>
</td>
</tr>
<tr>
<td align="left">(DNT)</td>
<td align="left">Motor activity (Negative control)<break/>Startle response (Negative control)</td>
<td align="left">Intralaboratory CV: 20%&#x2013;140%<break/>Intralaboratory CV: 20%&#x2013;110%</td>
<td align="left">NA</td>
<td align="left">NA</td>
<td align="left">
<xref ref-type="bibr" rid="B43">Moser et al. (2016)</xref>
</td>
</tr>
<tr>
<td align="left">(DNT)</td>
<td align="left">Brain morphometry<break/>Brain weight</td>
<td align="left">Interlaboratory CV: 5%&#x2013;30%<break/>Interlaboratory CV: 4%&#x2013;12%</td>
<td align="left">NA</td>
<td align="left">12 studies, 7 labs<break/>22 studies, 10 labs</td>
<td align="left">
<xref ref-type="bibr" rid="B12">Crofton (2001)</xref>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn3">
<label>
<sup>a</sup>
</label>
<p>Replicate of same chemical.</p>
</fn>
<fn id="Tfn4">
<label>
<sup>b</sup>
</label>
<p>Numbers indicate number of studies unless otherwise specified.</p>
</fn>
<fn id="Tfn5">
<label>
<sup>c</sup>
</label>
<p>OECD, test guideline numbers represent the assay focus, however, several of the references include aggregated datasets including &#x201c;guideline-like&#x201d; studies with minor variations from the TG.</p>
</fn>
<fn id="Tfn6">
<label>
<sup>d</sup>
</label>
<p>Upon exclusion of five most variable chemicals (SD, were reported per chemical; the highest SD, is reported as an upper bound).</p>
</fn>
<fn id="Tfn7">
<label>
<sup>e</sup>
</label>
<p>Bootsrapping of all LD50 values 5,000 times yielded a representative MAD, the margin of uncertainty was then calculated as &#xb1;2.5 x the representative MAD.</p>
</fn>
<fn id="Tfn8">
<label>
<sup>f</sup>
</label>
<p>Values based on multilinear regression modeling.</p>
</fn>
<fn id="Tfn9">
<label>
<sup>g</sup>
</label>
<p>Correlation in the TD50 between data sources in the Carcinogenic Potency Database&#x2014;the National Cancer Institute or National Toxicology Program <italic>versus</italic> the general literature.</p>
</fn>
<fn id="Tfn10">
<label>
<sup>h</sup>
</label>
<p>Work presented at the Society of Toxicology Annual Meeting in 2003 analyzed animal control data from 21 studies from 10 laboratories and concluded that &#x201c;Further consideration of how to reduce variability of these motor activity measurements is warranted&#x201d; (<xref ref-type="bibr" rid="B51">Raffaele et al., 2004</xref>).</p>
</fn>
<fn>
<p>Definitions&#x2013;CV, coefficient of variation; DNT, developmental neurotoxicity; LD50, dose resulting in lethality in half of test animals; LEL, lowest-effect level; LOEL, lowest observed effect level; LLNA, local lymph node assay; MAS, maximum average score; NA, not available; SD, standard deviation; TD50, dose resulting in tumors in half of test animals.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Quantifying replicability was noted as a significant challenge in most retrieved publications. Animal-based guideline studies have largely not been subjected to a validation process, and have instead been adopted based on historical precedent, providing few points of reference for comparison (<xref ref-type="bibr" rid="B46">Oyetade et al., 2023</xref>). Over the years there have been updates to some guidelines and more stringent GLP requirements have been introduced but insufficient data are available at this time to robustly evaluate the impact of these changes on variability; as such all analyses summarized herein aggregate all historical results into one compendium for analysis. Furthermore, it should be noted that testing data for all chemicals tested were summarized together as cheminformatics evaluation to dive into difference in chemical class variability per study have not been conducted herein (though some referenced studies have done cursory evaluations of chemical structure impact on variability for some endpoints (<xref ref-type="bibr" rid="B33">Karmaus et al., 2022</xref>; <xref ref-type="bibr" rid="B49">Pradeep et al., 2017</xref>; <xref ref-type="bibr" rid="B41">Ly Pham et al., 2020</xref>), often due to insufficient data for robust evaluation. Thus, replicability evaluations summarized herein relied upon replicate testing of single test articles (no formulations or mixtures). As noted above, we focused our analysis on two major types of replicability: replicability of an outcome, i.e., categorical (<xref ref-type="table" rid="T3">Table 3</xref>) and replicability of continuous data (<xref ref-type="table" rid="T4">Table 4</xref>).</p>
<sec id="s3-1">
<label>3.1</label>
<title>Categorical replicability</title>
<p>Categorical replicability analyses focus on study types with a categorical interpretation, which can be either a binary outcome of positive or negative or assignment to a hazard classification. Study types with binary classification endpoints included in our review are endocrine assays (Hershberger and uterotrophic), genotoxicity studies, carcinogenicity studies, and DNT studies (<xref ref-type="table" rid="T3">Table 3</xref>). The Hershberger and uterotrophic assays gave similar degrees of replicability: 72% and 74%, respectively (<xref ref-type="bibr" rid="B36">Kleinstreuer et al., 2016</xref>; <xref ref-type="bibr" rid="B8">Browne et al., 2015</xref>). For carcinogenicity, the two prior variability evaluations yielded different outcomes: 65%&#x2013;86% replicability when comparing between sexes of a single species, or 36%&#x2013;74% replicability between different species (rat and mouse) (<xref ref-type="bibr" rid="B31">Huff et al., 1991</xref>; <xref ref-type="bibr" rid="B27">Haseman and Lockhart, 1993</xref>).</p>
<p>More complex categorical analyses were conducted for studies using multicategory hazard classification schema to assign chemicals categories based on quantitative or qualitative study results. As noted above, our evaluation of this aspect of replicability was limited to studies using the GHS (the most common classification scheme for chemical hazard categorization) and the EPA categorization scheme (specific to endpoints of interest for the EPA). These systems have different category cutoffs and varying numbers of categories.</p>
<p>Multicategorical replicability was evaluated using conditional probabilities where possible, gathered from retrospective variability analyses or calculated based on available study information. As a point of reference, assignment of categories by random chance would equate to 50% replicability for a binary categorization scheme or 33% and 25% for systems of three and four categories, respectively. Endpoints with such data included ocular irritation/corrosion, dermal sensitization, dermal irritation/corrosion, rat acute oral lethality, and rat acute inhalation lethality (<xref ref-type="table" rid="T3">Table 3</xref>). Replicability was generally higher for categorization systems with fewer categories. For genotoxicity studies, the replicability of three categories (positive, negative, or ambiguous outcomes) varied widely, ranging between 23% and 78%, depending on the TG and its protocol variants (<xref ref-type="bibr" rid="B52">Raitano et al., 2026</xref>). It was rare to observe replicability above 75% across many study and endpoint types, especially for complex or targeted endpoints such as organ-specific effects. Depending on how replicate studies were aggregated across study type and species, the concordance of any target organ effects for subchronic and chronic repeated dose studies ranged from 38.5% to 90% per organ and the frequency of positive findings (<xref ref-type="bibr" rid="B29">Hoffmann et al., 2005</xref>).</p>
<p>For dermal and ocular study types, increased replicability was observed for categories representing the lowest and highest toxicities, suggesting robust results when toxicity is either absent or overt. This is most notable in the Draize rabbit eye irritation test for which the GHS Not Categorized classification was replicated as much as 100%, while the GHS Category 2B classification, representing mild irritation, had a conditional probability of only 16% replicability (<xref ref-type="bibr" rid="B40">Luechtefeld et al., 2016</xref>; <xref ref-type="bibr" rid="B5">Barroso et al., 2017</xref>). Given the low replicability for such mid-categories (e.g., GHS categories representing hazard of mild to moderate ocular irritation), subsequent studies were conducted to evaluate the impact of combining these categories to determine whether the replication improved with grouping. When authors collapsed GHS Categories 2A and 2B for ocular irritation, replicability was improved to as high as 71% or 88% in two independently conducted analyses (<xref ref-type="bibr" rid="B5">Barroso et al., 2017</xref>; <xref ref-type="bibr" rid="B1">Adriaens et al., 2014</xref>). Two evaluations of dermal sensitization data conflicted with the increased replicability seen for fewer categories (<xref ref-type="bibr" rid="B18">Dumont et al., 2016</xref>; <xref ref-type="bibr" rid="B29">Hoffmann et al., 2005</xref>). One study that considered reproducibility of three GHS classifications found Not Classified outcomes to have 52% replicability, while another study that considered five classifications found these outcomes to have 80% replicability. The discrepancy may be explained based on the analysis approach used: the study finding 52% replicability was based on any chemical for which two or more studies were available, and all categories were assigned in a normalized approach with equal study weights. The study finding 80% replicability, in comparison, was found for chemicals with three or more studies and a single categorization determined based on the majority outcome. These findings underscore the importance of methods and review assumptions as well as curation protocols for retrospective analyses.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Quantitative replicability</title>
<p>The replicability of continuous numeric endpoints was reported using different quantitative approaches. Retrospective consideration of all of these disparate reported metrics is particularly important because most of these studies were not subject to replicability analysis for a range of chemicals prior to acceptance of the relevant TGs. Variability among quantitative endpoints was evaluated for ocular irritation, dermal sensitization, acute oral lethality, subchronic/chronic repeated dose toxicity, and carcinogenicity studies (<xref ref-type="table" rid="T4">Table 4</xref>). For acute lethality, variability analyses were available for both rat and mouse studies. Studies using either species showed similar degrees of variability when evaluated by the same authors (<xref ref-type="bibr" rid="B30">Hoffmann et al., 2010</xref>). Where possible, CVs were provided, but these were not available for all study outcomes due to a lack of reported data. Other quantitative metrics describing dispersion of the data included SD and a margin of uncertainty based on the median absolute deviation (MAD). Some variability analyses constructed models using study metadata to quantify the variance in replicate studies; these studies reported RMSE as a measure of spread of the predicted values from the true values, and <italic>R</italic>
<sup>2</sup> as the amount of variance in the data explained by the model. When the model mean prediction is the same as the data mean, the RMSE is equal to the SD of the residuals. We have reported SD, margin of uncertainty, and RMSE in the same units as the data, whereas <italic>R</italic>
<sup>2</sup> is expressed as a proportion of the variance explained by the model or variable (<xref ref-type="table" rid="T4">Table 4</xref>). Importantly, maximal <italic>R</italic>
<sup>2</sup> attainable by using curated study metadata to explain variance is limited by unexplained variance due to undocumented parameters (i.e., experimental factors that were not collected with the study or not curated consistently) or inherent biological variance (<xref ref-type="bibr" rid="B41">Ly Pham et al., 2020</xref>; <xref ref-type="bibr" rid="B36">Kleinstreuer et al., 2016</xref>; <xref ref-type="bibr" rid="B18">Dumont et al., 2016</xref>; <xref ref-type="bibr" rid="B47">Paul Friedman et al., 2023</xref>; <xref ref-type="bibr" rid="B39">Lubet et al., 2018</xref>; <xref ref-type="bibr" rid="B2">Ashby, 2002</xref>; <xref ref-type="bibr" rid="B13">Crofton et al., 1991</xref>).</p>
<p>Although these different metrics measure different statistical observations of the data or models of the data, examining the set of statistical metrics revealed multiple high-level findings. Inter- and intralaboratory CV values were available for ocular irritation with comparable CV values ranging between 40% and 60% (<xref ref-type="bibr" rid="B60">Weil and Scala, 1971</xref>; <xref ref-type="bibr" rid="B19">Earl et al., 1997</xref>; <xref ref-type="bibr" rid="B11">Cormier et al., 1996</xref>; <xref ref-type="bibr" rid="B7">Blein et al., 1991</xref>). Single-dose acute studies demonstrated less dispersion of effect-level values, likely the product of study design (e.g., limit tests at 2000&#xa0;mg/kg), with the margin of uncertainty equal to 0.25&#xa0;log10-mg/kg in one evaluation (<xref ref-type="bibr" rid="B33">Karmaus et al., 2022</xref>) and the SD falling below 0.42&#xa0;log10-mg/kg for most studies in another evaluation (<xref ref-type="bibr" rid="B30">Hoffmann et al., 2010</xref>). Repeat-dose studies demonstrated greater dispersion of replicate values that typically approached &#xb1;0.5&#xa0;log10-mg/kg/day, depending on how the dispersion was quantified. The linear correlation between 50% tumorigenic doses between two separate sources of carcinogenicity studies showed an <italic>R</italic>
<sup>2</sup> of 0.63 (<xref ref-type="bibr" rid="B11">Cormier et al., 1996</xref>), suggesting major differences in study conduct, curation, and/or biological observations. In the aggregate, estimates of variance suggested large amounts of spread in replicate study data for quantitative endpoints. Linear correlation of values related to carcinogenicity (<xref ref-type="bibr" rid="B25">Gottmann et al., 2001</xref>) and the amount of variance in replicate oral repeat-dose study toxicity values explained by multilinear regression modeling (<xref ref-type="bibr" rid="B41">Ly Pham et al., 2020</xref>; <xref ref-type="bibr" rid="B47">Paul Friedman et al., 2023</xref>) suggest some upper bound exists in the amount of variability in replicate toxicity values that can be explained by study metadata, which likely approaches 60%&#x2013;70%.</p>
<p>One evaluation of replicability for behavioral endpoints assessed as part of neurotoxicity studies indicated good replicability for motor activity studies, with a lowest-observed-effect level maximum-to-minimum ratio range of one to six for nine positive control compounds tested within six laboratories (<xref ref-type="bibr" rid="B13">Crofton et al., 1991</xref>). Replicability of DNT motor activity was reported for negative controls showing CVs ranging from 20% to 140% (<xref ref-type="bibr" rid="B43">Moser et al., 2016</xref>). Despite the broad variability, these are notable data as negative controls are rarely reported. No other studies were available that could provide replicability estimates for this or other behavioral methods. Several abstracts presented at Society of Toxicology Annual Meetings between 2001 and 2005 (which were not peer-reviewed) indicated incomplete reporting and incomplete positive control data in evaluations of startle response data (<xref ref-type="bibr" rid="B56">Sette et al., 2004</xref>) and learning and memory tests (<xref ref-type="bibr" rid="B51">Raffaele et al., 2004</xref>), such that the considerable within- and between-lab variability could not be adequately analyzed for these endpoints. One of these, a 2004 evaluation of motor activity measurements, indicated that, &#x201c;<italic>Further consideration of how to reduce variability &#x2026; is warranted&#x201d;</italic> (<xref ref-type="bibr" rid="B51">Raffaele et al., 2004</xref>). A qualitative retrospective analysis of DNT studies reported that few laboratories (3/16) provided usable positive control data (<xref ref-type="bibr" rid="B14">Crofton et al., 2004</xref>), with studies lacking such data thus being uninformative for replicability estimates. However, negative control CV values for parameters of motor activity or startle response may provide some indication of the replicability of study results. Typically, CV values for such readouts range between 20% and more than 100%, depending on the laboratory, test conditions, and animal age (<xref ref-type="bibr" rid="B43">Moser et al., 2016</xref>). These high and variable CV values indicate that significant differences between control and dose groups may be recognized in some labs and test conditions but not in others. In contrast, as reported in a 2001 Society of Toxicology Annual Meeting presentation, CVs among brain weights and brain morphometrics were reported to be significantly lower, ranging from 4% to 12% and 5%&#x2013;30%, respectively (<xref ref-type="bibr" rid="B12">Crofton, 2001</xref>).</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<label>4</label>
<title>Discussion</title>
<p>With the recent spotlight on variability in the biomedical sciences (<xref ref-type="bibr" rid="B44">National Academies of Sciences E, 2019</xref>; <xref ref-type="bibr" rid="B4">Baker, 2016</xref>; <xref ref-type="bibr" rid="B6">Begley and Ioannidis, 2015</xref>; <xref ref-type="bibr" rid="B48">Poland et al., 2014</xref>), and the shift to NAMs for which comprehensive performance metrics are being generated as part of method development and validation (<xref ref-type="bibr" rid="B58">van der Zalm et al., 2022</xref>; <xref ref-type="bibr" rid="B24">Gamble et al., 2025</xref>; <xref ref-type="bibr" rid="B23">Foley et al., 2024</xref>; <xref ref-type="bibr" rid="B42">Miedel et al., 2025</xref>), it is more important than ever to compile variability metrics for available <italic>in vivo</italic> data. Much has been written about the challenges of scientific experimental reproducibility and variability, with multiple analyses specifically quantifying bioassay replicability. A Nature survey of nearly 1,600 scientists across multiple fields found that more than 70% of respondents had tried and failed to reproduce a previously published study from another scientist (<xref ref-type="bibr" rid="B4">Baker, 2016</xref>). Recent assessments of studies in psychology have suggested a 40% rate of replicability for these studies, with other evaluations suggesting a dismal 10% rate of replicability for cancer biology studies (<xref ref-type="bibr" rid="B4">Baker, 2016</xref>). This proposed &#x201c;reproducibility crisis in science&#x201d; (or &#x201c;replicability crisis&#x201d; based on definition of terms used herein) has garnered the attention of the general public and can impact the scrutiny applied to evaluating new approaches (<xref ref-type="bibr" rid="B28">Haven and Ioannidis, 2025</xref>).</p>
<p>Our review suggests replicability of <italic>in vivo</italic> toxicological guideline studies varies widely by study type, endpoint complexity, and classification approach depending on whether study designs employ binary or multicategorical outcomes or continuous endpoints. While optimizing assays to achieve low variability is fundamental to scientific investigation, it should be noted that variability should not be equated with lack of validity. Biology is inherently variable, and therefore variability in <italic>in vivo</italic> assays is not necessarily indicative of a poorly performing test method. However, it is imperative to properly integrate this information as uncertainty into any interpretation of the study data and associated analysis pipelines. Retrospective quantification of assay variability can establish acceptable levels for varying contexts of use and help identify which sources of variability lack adequate controls to ensure that the most robust available science is applied for toxicological safety assessments. The variability assessments summarized herein highlight a particular point of emphasis associated with test method evaluations: since <italic>in vivo</italic> tests used as a benchmark are not fully replicable, we cannot expect NAMs to have greater precision than a replicate <italic>in vivo</italic> study would. Thus, characterizing traditional guideline toxicological study variability can help establish baseline expectations for the use of NAMs.</p>
<sec id="s4-1">
<label>4.1</label>
<title>Distinguishing sources of variability</title>
<p>It is important to distinguish between quantified variability, sources of variability, and uncertainty about variability. For evaluating traditional <italic>in vivo</italic> toxicological assays, we must consider the effects of study conduct on both derivation of quantitative points-of-departure (hazard characterization) and interpretation frameworks (hazard identification, e.g., classification) for replicability. It is also important to note that different group size and statistical power between studies may explain limited replicability of study outcomes. Previous investigations have revealed that variability can stem from numerous sources related to either the assay protocol or study design variations, including elements such as animal strain, diet, and vehicle used (<xref ref-type="bibr" rid="B39">Lubet et al., 2018</xref>; <xref ref-type="bibr" rid="B2">Ashby, 2002</xref>). For example, a review focusing on variability in genotoxicity studies conducted a multivariate analysis to identify drivers of variability for both OECD TGs 474 and 475 across 31 chemicals with replicate data identifying strain and species as having the greatest contribution to variability (<xref ref-type="bibr" rid="B52">Raitano et al., 2026</xref>). Administration <italic>via</italic> injection as compared to the oral route has also been shown to increase the likelihood of a positive response in uterotrophic assays (<xref ref-type="bibr" rid="B36">Kleinstreuer et al., 2016</xref>), and lower variability is observed among studies that use the same vehicle (<xref ref-type="bibr" rid="B18">Dumont et al., 2016</xref>). Study parameters that can be customized such as species, dose spacing, and substance purity were found to contribute to more than half the total variance in organ-level lowest-effect levels (LELs) (<xref ref-type="bibr" rid="B47">Paul Friedman et al., 2023</xref>). This is particularly well-characterized for underlying physiological and metabolic differences across species, strain, and sex. For example, tumor incidence in the rat reproductive tract (<xref ref-type="bibr" rid="B63">You et al., 2002</xref>; <xref ref-type="bibr" rid="B9">Buelke-Sam et al., 1998</xref>) or the presence of thyroid tumors in male rats vs. female rats (<xref ref-type="bibr" rid="B10">Coperchini et al., 2025</xref>). In addition, one report indicated that endpoint selection in the Draize rabbit eye irritation test had a significant impact on the degree of variability: higher variability was seen when a GHS Category 2 classification was made based on conjunctiva effects without corneal involvement as compared to classification based on other drivers such as iritis (<xref ref-type="bibr" rid="B1">Adriaens et al., 2014</xref>). Protocol timepoints, such as the age of the animal, the timepoint following exposure, or timing of the response measurement, can also significantly impact study outcome (<xref ref-type="bibr" rid="B39">Lubet et al., 2018</xref>; <xref ref-type="bibr" rid="B2">Ashby, 2002</xref>). We must acknowledge that TGs are not strict standard operating procedures, recognizing that some flexibility in the study design is acceptable and within the scope of &#x201c;guideline-like&#x201d; study conduct. However, even where the source of the variability may be explained, the data variability may remain an uncertainty since it is difficult to understand which protocol variant is most relevant for the human population.</p>
<p>Addressing inherent biological variability is even more complex, but can be considered to be derived from four general factors: physiological, genetic, ontogenetic, and exposomic (<xref ref-type="bibr" rid="B37">Kreutz et al., 2024</xref>). These sources of variability have been quantified to some extent but require further characterization. For example, with regard to chemical toxicokinetics, studies have found that variability estimates usually, but not always, fall within the acceptable 10X uncertainty factor for some chemicals and lifestages, with many uncertainties remaining (<xref ref-type="bibr" rid="B50">Quignot et al., 2021</xref>; <xref ref-type="bibr" rid="B17">Dorne et al., 2001</xref>; <xref ref-type="bibr" rid="B34">Kasteel et al., 2020</xref>; <xref ref-type="bibr" rid="B15">Darney et al., 2020</xref>; <xref ref-type="bibr" rid="B16">Di Consiglio et al., 2021</xref>; <xref ref-type="bibr" rid="B62">Wetmore et al., 2015</xref>; <xref ref-type="bibr" rid="B57">Strikwold et al., 2017</xref>). There are also uncertainties regarding the generalizability of animal studies to human and ecosystem outcomes due to differences such as species, exposures, and endpoints. Both animal models and <italic>in vitro</italic> NAMs present challenges for generalizability of model outputs to humans, with neither paradigm fully representing the toxicokinetic and toxicodynamic processes present in humans. Rodent-based studies must be extrapolated to human equivalent doses for human health risk assessment using a set of assumptions regarding dosimetry, toxicokinetics, and applicability of targets in rodents for targets in humans. Human-based NAMs are typically isolated cell-based or protein-based assays that require extrapolation to the human body, including toxicokinetic and metabolic considerations, such as tissue-specific metabolism.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Considerations for addressing data gaps and future work</title>
<p>Notable data gaps in our retrospective analysis include reproductive and developmental toxicity study types and toxicokinetic studies as well as variability among controls for these studies, which are rarely reported. We also note that the different analyses summarized did not all represent unique data sets, as some of the larger replicability evaluations mined the same source databases, resulting in overlapping representation of primary data. However, as our goal was not to directly compare each variability study, we felt that some redundancy was acceptable as we sought to more comprehensively characterize variability. More specifically, even variability estimates reported from the same datasets may differ due to the filtering steps and statistical approaches applied. One such case is for the vehicle used: while some analyses provided separate estimates of variability for the same vehicle (<xref ref-type="bibr" rid="B1">Adriaens et al., 2014</xref>), most did not consider vehicle as a factor in their analyses.</p>
<p>Results summarized here are likely not unique for mammalian toxicology and are expected to be consistent across ecotoxicology studies as well. For example, 50% lethality or moribundity concentrations (EC50s) in acute fish toxicity studies performed according to OECD TG 203 may vary over several orders of magnitude, with variation only partly accounted for by the use of the 11 different fish species allowed in the TG. Similar levels of variability are observed in EC50 data from acute daphnid studies conducted according to TG 202 (<xref ref-type="bibr" rid="B55">Sch&#xfc;r et al., 2025</xref>). Extending this review to other such endpoints could prove useful to better characterize current toxicological test methods. However, it is necessary to acknowledge that performing literature searches and compiling comprehensive, harmonized, and robust datasets required for retrospective analyses is a laborious task that will never capture all existing data and important metadata.</p>
<p>To more readily access individual or collated study data for conducting retrospective evaluations, databases of <italic>in vivo</italic> results have been developed and are routinely being updated. Some of these from the United States include the EPA&#x2019;s ToxRefDB (<xref ref-type="bibr" rid="B21">Feshuk et al., 2023a</xref>) and ToxValDB (<xref ref-type="bibr" rid="B59">Wall et al., 2025</xref>), the National Toxicology Program&#x2019;s Chemical Effects in Biological Systems database (CEBS), and the National Institutes of Health&#x2019;s Integrated Chemical Environment (<ext-link ext-link-type="uri" xlink:href="https://ice.ntp.niehs.nih.gov/">https://ice.ntp.niehs.nih.gov/</ext-link>). Additional resources from Europe include the European Chemical Agency&#x2019;s (ECHA) IUCLID database (<ext-link ext-link-type="uri" xlink:href="https://iuclid6.echa.europa.eu/">https://iuclid6.echa.europa.eu/</ext-link>) and the European Food Safety Authority&#x2019;s (EFSA) Open FoodTox (<ext-link ext-link-type="uri" xlink:href="https://www.efsa.europa.eu/en/microstrategy/openfoodtox">https://www.efsa.europa.eu/en/microstrategy/openfoodtox</ext-link>). While each of these databases include summary endpoint metrics per chemical (e.g., LEL, NOAEL, LD50, <italic>etc.</italic>), a few of the resources (namely, ToxRefDB and CEBS) also have prioritized supporting detailed concentration-response data (e.g., response values per testing concentration) which can further support more granular retrospective evaluations. Of course, each of these databases has varying numbers of chemicals and studies, and compilation of a dataset for robust retrospective evaluation requires careful characterization and curation. Future retrospectives using data from such databases should be performed to gain further insights into reproducibility of study types or routes of administration that we were unable to consider here, e.g., reproductive and developmental, inhalation, or dermal repeat-dose toxicity studies. Investigation of whether reproducibility has improved over time, e.g., due to more stringent GLP requirements and given TG revisions, could also be considered. The increasing requirements for findability, accessibility, interoperability, and reusability of data (so-called &#x201c;FAIR&#x201d; principles) should help to facilitate future evaluations of study variability.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Implications for risk assessment and NAMs</title>
<p>Both traditional animal models and human-based NAMs can be used to derive protective points-of-departure for human health risk assessment. In recent work examining pharmaceuticals, the difference between preclinical animal and NAM-based points-of-departure in predicting doses at which human toxicity was observed were compared (<xref ref-type="bibr" rid="B61">Weitekamp et al., 2025</xref>). NAM-based values were consistently lower than rodent-based values. For values from rodent studies converted to a human equivalent dose to be protective of human adverse effect levels for at least 95% of pharmaceuticals in the dataset they needed to be divided by a composite factor of at least 100. However, for human NAM-based values converted to human equivalent doses to provide a similar level of protectivity, they needed to be divided by a factor of only 10. Further work is required to integrate uncertainties and variability to account for biological factors that are critical for understanding how NAM-based values can inform toxicity values when compared to traditional methods.</p>
<p>It is possible that study variability is accounted for in the conservatism of uncertainty factors applied when deriving toxicity values, such as for extrapolation from subchronic to chronic values or database uncertainty. That said, it should be emphasized that study variability, and the specific uncertainty introduced by study variability, is not typically directly recognized. In recent work to rapidly derive database-calibrated oral toxicity values, study variability is included in quantification of uncertainty in deriving these toxicity values (<xref ref-type="bibr" rid="B3">Aurisano et al., 2023</xref>; <xref ref-type="bibr" rid="B26">Harrill et al., 2026</xref>). In many regulatory paradigms, regulators must develop a weight-of-evidence strategy for decision-making, and this can include review of databases with heterogenous data and grouping or read-across approaches for the reuse of data from similar compounds. Therefore, the gross estimates for <italic>in vivo</italic> data variability provided in this review may be useful when considering current regulatory practice. Next-generation risk assessments focus on establishing scientific confidence through a weight-of-evidence approach that considers reproducibility and technical characterization in addition to biological relevance, mechanistic understanding, and fitness for purpose (<xref ref-type="bibr" rid="B58">van der Zalm et al., 2022</xref>; <xref ref-type="bibr" rid="B32">ICCVAM, 2024</xref>). Such considerations support innovation using modern science and facilitate regulatory acceptance of NAMs in safety assessments that ensure the protection of human health and the environment. However, building confidence in new test systems requires demonstration of robust, consistent, and interpretable outcomes. One suggested first step, which is already frequently being performed, would be to confirm assay performance in terms of replicability and minimal variability, relative to animal TG data variability. For example, variability can be more tightly controlled through rigorous and well-defined assay development frameworks for NAMs (<xref ref-type="bibr" rid="B32">ICCVAM, 2024</xref>). In many cases, studies involving NAMs include much higher replicate counts and internal positive controls. In addition, reporting of variability measures is becoming a common practice. Additional databases of <italic>in vitro</italic> data including PubChem (<xref ref-type="bibr" rid="B35">Kim et al., 2025</xref>) and the EPA&#x2019;s invitrodb (<xref ref-type="bibr" rid="B22">Feshuk et al., 2023b</xref>) contain some performance metrics and individual replicate data that can be leveraged for further retrospective evaluations of NAM variability. Such analyses of both biological and technical replicability can of course be included among the metrics for consideration of a NAM as &#x201c;better than&#x201d; traditional <italic>in vivo</italic> approaches. Building additional validation approaches beyond those that rely solely on direct comparisons to legacy animal tests and qualifying expectations of human-based NAMs to recapitulate rodent findings reflects a growing recognition that fundamentally different principles should be considered when evaluating NAMs.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>Here we provide a summary of variability analyses for <italic>in vivo</italic> TGs applied for human safety assessments. While such variability data are necessarily uncertain, they are nevertheless useful as legacy reference data and considering the current regulatory practice for data acceptance and weight-of-evidence assessment. We argue that benchmarking of NAMs must include integration of <italic>in vivo</italic> bioassay replicability. This means that the validation of NAMs for human toxicology should rely on multiple factors, including estimates of variability of NAM data; biological and mechanistic relevance of the NAM assay for the human target or process; an assessment of how the NAM data may achieve similar or better protection of human health when compared to animal study data; and the specific regulatory purpose of the data (<xref ref-type="bibr" rid="B58">van der Zalm et al., 2022</xref>). We hope the summarized variability metrics herein will help inform the regulatory acceptance of NAMs, particularly in regard to facilitating comparisons of replicability for NAMs vs. for historically used <italic>in vivo</italic> TGs. This is an important comparison to enable as NAMs may need to demonstrate &#x201c;equivalent or better&#x201d; assay performance.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: References to the data is included in the manuscript.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>AgK: Writing &#x2013; review and editing, Conceptualization, Writing &#x2013; original draft, Data curation, Formal Analysis, Methodology. AnK: Data curation, Writing &#x2013; original draft, Formal Analysis, Writing &#x2013; review and editing. OO: Resources, Writing &#x2013; review and editing. KF: Data curation, Writing &#x2013; original draft, Writing &#x2013; review and editing. MP: Writing &#x2013; review and editing, Writing &#x2013; original draft, Data curation. ER: Writing &#x2013; review and editing, Supervision, Project administration. DA: Writing &#x2013; original draft, Writing &#x2013; review and editing. HH: Writing &#x2013; review and editing, Supervision. NK: Conceptualization, Writing &#x2013; review and editing.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>The authors thank Catherine Sprankle, Inotiv, for editorial assistance in preparing the manuscript. The authors gratefully acknowledge the support and funding provided by PETA Science Consortium International to complete this manuscript.</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>Authors AgK, AnK, OO, ER and DA were employed by Inotiv.</p>
<p>The remaining author(s) declared that the research this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The handling editor AN declared a past co-authorship with the author KPF.</p>
<p>The authors AgK, HH, KF, NK declared that they were an editorial board member of Frontiers at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Adriaens</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Barroso</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Eskes</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Hoffmann</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>McNamee</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Al&#xe9;p&#xe9;e</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Retrospective analysis of the draize test for serious eye damage/eye irritation: importance of understanding the <italic>in vivo</italic> endpoints under UN GHS/EU CLP for the development and evaluation of <italic>in vitro</italic> test methods</article-title>. <source>Arch. Toxicol.</source> <volume>88</volume> (<issue>3</issue>), <fpage>701</fpage>&#x2013;<lpage>723</lpage>. <pub-id pub-id-type="doi">10.1007/s00204-013-1156-8</pub-id>
<pub-id pub-id-type="pmid">24374802</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashby</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Scientific issues associated with the validation of <italic>in vitro</italic> and <italic>in vivo</italic> methods for assessing endocrine disrupting chemicals</article-title>. <source>Toxicology</source> <volume>181&#x2013;182</volume>, <fpage>389</fpage>&#x2013;<lpage>397</lpage>. <pub-id pub-id-type="doi">10.1016/s0300-483x(02)00473-0</pub-id>
<pub-id pub-id-type="pmid">12505341</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aurisano</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Jolliet</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>W. A.</given-names>
</name>
<name>
<surname>Judson</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Unnikrishnan</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Probabilistic points of departure and reference doses for characterizing human noncancer and developmental/reproductive effects for 10,145 chemicals</article-title>. <source>Environ. Health Perspect.</source> <volume>131</volume> (<issue>3</issue>), <fpage>37016</fpage>. <pub-id pub-id-type="doi">10.1289/EHP11524</pub-id>
<pub-id pub-id-type="pmid">36989077</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baker</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>1,500 scientists lift the lid on reproducibility</article-title>. <source>Nature</source> <volume>533</volume> (<issue>7604</issue>), <fpage>452</fpage>&#x2013;<lpage>454</lpage>. <pub-id pub-id-type="doi">10.1038/533452a</pub-id>
<pub-id pub-id-type="pmid">27225100</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barroso</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pfannenbecker</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Adriaens</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Al&#xe9;p&#xe9;e</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Cluzel</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>De Smedt</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Cosmetics Europe compilation of historical serious eye damage/eye irritation <italic>in vivo</italic> data analysed by drivers of classification to support the selection of chemicals for development and evaluation of alternative methods/strategies: the draize eye test reference database (DRD)</article-title>. <source>Arch. Toxicol.</source> <volume>91</volume> (<issue>2</issue>), <fpage>521</fpage>&#x2013;<lpage>547</lpage>. <pub-id pub-id-type="doi">10.1007/s00204-016-1679-x</pub-id>
<pub-id pub-id-type="pmid">26997338</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Begley</surname>
<given-names>C. G.</given-names>
</name>
<name>
<surname>Ioannidis</surname>
<given-names>J. P. A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Reproducibility in science</article-title>. <source>Circulation Res.</source> <volume>116</volume> (<issue>1</issue>), <fpage>116</fpage>&#x2013;<lpage>126</lpage>. <pub-id pub-id-type="doi">10.1161/CIRCRESAHA.114.303819</pub-id>
<pub-id pub-id-type="pmid">25552691</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Blein</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Adolphe</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lakhdar</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Cambar</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gubanski</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Castelli</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>1991</year>). <article-title>Correlation and validation of alternative methods to the draize eye irritation test (OPAL project)</article-title>. <source>Toxicol Vitro</source> <volume>5</volume> (<issue>5&#x2013;6</issue>), <fpage>555</fpage>&#x2013;<lpage>557</lpage>. <pub-id pub-id-type="doi">10.1016/0887-2333(91)90092-r</pub-id>
<pub-id pub-id-type="pmid">20732076</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Browne</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Judson</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Casey</surname>
<given-names>W. M.</given-names>
</name>
<name>
<surname>Kleinstreuer</surname>
<given-names>N. C.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>R. S.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Screening chemicals for estrogen receptor bioactivity using a computational model</article-title>. <source>Environ. Sci. Technol.</source> <volume>49</volume> (<issue>14</issue>), <fpage>8804</fpage>&#x2013;<lpage>8814</lpage>. <pub-id pub-id-type="doi">10.1021/acs.est.5b02641</pub-id>
<pub-id pub-id-type="pmid">26066997</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buelke-Sam</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bryant</surname>
<given-names>H. U.</given-names>
</name>
<name>
<surname>Francis</surname>
<given-names>P. C.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>The selective estrogen receptor modulator, raloxifene: an overview of nonclinical pharmacology and reproductive and developmental testing</article-title>. <source>Reprod. Toxicol.</source> <volume>12</volume> (<issue>3</issue>), <fpage>217</fpage>&#x2013;<lpage>221</lpage>. <pub-id pub-id-type="doi">10.1016/s0890-6238(98)00003-3</pub-id>
<pub-id pub-id-type="pmid">9628546</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Coperchini</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Greco</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Caccavale</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chiardi</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Croce</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Teliti</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Sexual dimorphism in thyroid cancer: evidence from preclinical studies</article-title>. <source>Endocr. Relat. Cancer</source> <volume>32</volume> (<issue>5</issue>), <fpage>e240348</fpage>. <pub-id pub-id-type="doi">10.1530/ERC-24-0348</pub-id>
<pub-id pub-id-type="pmid">40197424</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cormier</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Parker</surname>
<given-names>R. D.</given-names>
</name>
<name>
<surname>Henson</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cruse</surname>
<given-names>L. W.</given-names>
</name>
<name>
<surname>Merritt</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Bruce</surname>
<given-names>R. D.</given-names>
</name>
<etal/>
</person-group> (<year>1996</year>). <article-title>Determination of the intra- and interlaboratory reproducibility of the low volume eye test and its statistical relationship to the draize eye test</article-title>. <source>Regul. Toxicol. Pharmacol.</source> <volume>23</volume> (<issue>2</issue>), <fpage>156</fpage>&#x2013;<lpage>161</lpage>. <pub-id pub-id-type="doi">10.1006/rtph.1996.0037</pub-id>
<pub-id pub-id-type="pmid">8661334</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Crofton</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2001</year>). <source>Developmental neurotoxicity testing guidelines: variability in morphometric assessments of neuropathology</source>. <publisher-loc>San Francisco, CA</publisher-loc>: <publisher-name>Society of Toxicology</publisher-name>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.researchgate.net/publication/394530164_DEVELOPMENTAL_NEUROTOXICITY_TESTING_GUIDELINES_VARIABILITY_IN_MORPHOMETRIC_ASSESSMENTS_OF_NEUROPATHOLOGY?channel=doi&#x26;linkId=68a3451e7984e374ace9693a&#x26;showFulltext=true">https://www.researchgate.net/publication/394530164_DEVELOPMENTAL_NEUROTOXICITY_TESTING_GUIDELINES_VARIABILITY_IN_MORPHOMETRIC_ASSESSMENTS_OF_NEUROPATHOLOGY?channel&#x3d;doi&#x26;linkId&#x3d;68a3451e7984e374ace9693a&#x26;showFulltext&#x3d;true</ext-link> (Accessed February 17, 2026).</comment>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Crofton</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Howard</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Moser</surname>
<given-names>V. C.</given-names>
</name>
<name>
<surname>Gill</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Reiter</surname>
<given-names>L. W.</given-names>
</name>
<name>
<surname>Tilson</surname>
<given-names>H. A.</given-names>
</name>
<etal/>
</person-group> (<year>1991</year>). <article-title>Interlaboratory comparison of motor activity experiments: implications for neurotoxicological assessments</article-title>. <source>Neurotoxicol Teratol.</source> <volume>13</volume> (<issue>6</issue>), <fpage>599</fpage>&#x2013;<lpage>609</lpage>. <pub-id pub-id-type="doi">10.1016/0892-0362(91)90043-v</pub-id>
<pub-id pub-id-type="pmid">1779947</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Crofton</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Makris</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Sette</surname>
<given-names>W. F.</given-names>
</name>
<name>
<surname>Mendez</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Raffaele</surname>
<given-names>K. C.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>A qualitative retrospective analysis of positive control data in developmental neurotoxicity studies</article-title>. <source>Neurotoxicol Teratol.</source> <volume>26</volume> (<issue>3</issue>), <fpage>345</fpage>&#x2013;<lpage>352</lpage>. <pub-id pub-id-type="doi">10.1016/j.ntt.2004.02.007</pub-id>
<pub-id pub-id-type="pmid">15113596</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Darney</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Turco</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Buratti</surname>
<given-names>F. M.</given-names>
</name>
<name>
<surname>Di Consiglio</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Vichi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Roudot</surname>
<given-names>A. C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Human variability in influx and efflux transporters in relation to uncertainty factors for chemical risk assessment</article-title>. <source>Food Chem. Toxicol.</source> <volume>140</volume>, <fpage>111305</fpage>. <pub-id pub-id-type="doi">10.1016/j.fct.2020.111305</pub-id>
<pub-id pub-id-type="pmid">32234423</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Di Consiglio</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Darney</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Buratti</surname>
<given-names>F. M.</given-names>
</name>
<name>
<surname>Turco</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Vichi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Testai</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Human variability in carboxylesterases and carboxylesterase-related uncertainty factors for chemical risk assessment</article-title>. <source>Toxicol. Lett.</source> <volume>350</volume>, <fpage>162</fpage>&#x2013;<lpage>170</lpage>. <pub-id pub-id-type="doi">10.1016/j.toxlet.2021.07.005</pub-id>
<pub-id pub-id-type="pmid">34256091</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dorne</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Walton</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Renwick</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Human variability in glucuronidation in relation to uncertainty factors for risk assessment</article-title>. <source>Food Chem. Toxicol.</source> <volume>39</volume> (<issue>12</issue>), <fpage>1153</fpage>&#x2013;<lpage>1173</lpage>. <pub-id pub-id-type="doi">10.1016/s0278-6915(01)00087-4</pub-id>
<pub-id pub-id-type="pmid">11696390</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dumont</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Barroso</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Matys</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Worth</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Casati</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Analysis of the local lymph node assay (LLNA) variability for assessing the prediction of skin sensitisation potential and potency of chemicals with non-animal approaches</article-title>. <source>Toxicol Vitro</source> <volume>34</volume>, <fpage>220</fpage>&#x2013;<lpage>228</lpage>. <pub-id pub-id-type="doi">10.1016/j.tiv.2016.04.008</pub-id>
<pub-id pub-id-type="pmid">27085510</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Earl</surname>
<given-names>L. K.</given-names>
</name>
<name>
<surname>Dickens</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Rowson</surname>
<given-names>M. J.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>A critical analysis of the rabbit eye irritation test variability and its impact on the validation of alternative methods</article-title>. <source>Toxicol Vitro</source> <volume>11</volume> (<issue>3</issue>), <fpage>295</fpage>&#x2013;<lpage>304</lpage>. <pub-id pub-id-type="doi">10.1016/s0887-2333(97)00016-7</pub-id>
<pub-id pub-id-type="pmid">20654315</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="book">
<collab>EPA</collab> (<year>2026</year>). <publisher-name>Code of Federal Regulations</publisher-name>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.ecfr.gov/current/title-40/chapter-I/subchapter-R/part-792">https://www.ecfr.gov/current/title-40/chapter-I/subchapter-R/part-792</ext-link> (Accessed February 17, 2026).</comment>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feshuk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kolaczkowski</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Watford</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Paul Friedman</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>ToxRefDB v2.1: update to curated <italic>in vivo</italic> study data in the toxicity reference database</article-title>. <source>Front. Toxicol.</source> <pub-id pub-id-type="doi">10.3389/ftox.2023.1260305</pub-id>
<pub-id pub-id-type="pmid">37753522</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feshuk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kolaczkowski</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Dunham</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Davidson-Fritz</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Carstens</surname>
<given-names>K. E.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2023b</year>). <article-title>The ToxCast pipeline: updates to curve-fitting approaches and database structure</article-title>. <source>Front. Toxicol.</source> <volume>5</volume>, <fpage>1275980</fpage>. <pub-id pub-id-type="doi">10.3389/ftox.2023.1275980</pub-id>
<pub-id pub-id-type="pmid">37808181</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Foley</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Hopperstad</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Gamble</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lynn</surname>
<given-names>S. G.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Deisenroth</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Technical evaluation and standardization of the human thyroid microtissue assay</article-title>. <source>Toxicol. Sci.</source> <volume>199</volume> (<issue>1</issue>), <fpage>89</fpage>&#x2013;<lpage>107</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfae014</pub-id>
<pub-id pub-id-type="pmid">38310358</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gamble</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Rogers</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Breaux</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Feshuk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Thunes</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Friedman</surname>
<given-names>K. P.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Screening industrial chemicals for human developmental toxicity in the DevTox germ layer reporter platform</article-title>. <source>Toxicology</source> <volume>517</volume>, <fpage>154232</fpage>. <pub-id pub-id-type="doi">10.1016/j.tox.2025.154232</pub-id>
<pub-id pub-id-type="pmid">40645555</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gottmann</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Kramer</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pfahringer</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Helma</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Data quality in predictive toxicology: reproducibility of rodent carcinogenicity experiments</article-title>. <source>Environ. Health Perspect.</source> <volume>109</volume> (<issue>5</issue>), <fpage>509</fpage>&#x2013;<lpage>514</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.01109509</pub-id>
<pub-id pub-id-type="pmid">11401763</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Harrill</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Hagiwara</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Weitekamp</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Stanish</surname>
<given-names>P. C.</given-names>
</name>
<name>
<surname>Wall</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Sayre</surname>
<given-names>R. R.</given-names>
</name>
<etal/>
</person-group> (<year>2026</year>). <article-title>Database-calibrated toxicity values for human health assessment based on existing toxicology data for one thousand chemicals</article-title>. <source>J. Toxicol. Environ. Health B</source>, <fpage>1</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1080/10937404.2025.2552108</pub-id>
<pub-id pub-id-type="pmid">40963224</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Haseman</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Lockhart</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>1993</year>). <article-title>Correlations between chemically related site-specific carcinogenic effects in long-term studies in rats and mice</article-title>. <source>Environ. Health Perspect.</source> <volume>101</volume> (<issue>1</issue>), <fpage>50</fpage>&#x2013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.9310150</pub-id>
<pub-id pub-id-type="pmid">8513764</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name>
<surname>Haven</surname>
<given-names>T. L.</given-names>
</name>
<name>
<surname>Ioannidis</surname>
<given-names>J. P. A.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Reproducibility failure in biomedical research: problems and solutions</article-title>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.annualreviews.org/content/journals/10.1146/annurev-med-050124-050859">https://www.annualreviews.org/content/journals/10.1146/annurev-med-050124-050859</ext-link> (Accessed February 17, 2026).</comment>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoffmann</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cole</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Hartung</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Skin irritation: prevalence, variability, and regulatory classification of existing <italic>in vivo</italic> data from industrial chemicals</article-title>. <source>Regul. Toxicol. Pharmacol.</source> <volume>41</volume> (<issue>3</issue>), <fpage>159</fpage>&#x2013;<lpage>166</lpage>. <pub-id pub-id-type="doi">10.1016/j.yrtph.2004.11.003</pub-id>
<pub-id pub-id-type="pmid">15748793</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoffmann</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kinsner-Ovaskainen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Prieto</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mangelsdorf</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bieler</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cole</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Acute oral toxicity: variability, reliability, relevance and interspecies comparison of rodent LD50 data from literature surveyed for the ACuteTox project</article-title>. <source>Regul. Toxicol. Pharmacol.</source> <volume>58</volume> (<issue>3</issue>), <fpage>395</fpage>&#x2013;<lpage>407</lpage>. <pub-id pub-id-type="doi">10.1016/j.yrtph.2010.08.004</pub-id>
<pub-id pub-id-type="pmid">20709128</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huff</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cirvello</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Haseman</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bucher</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>Chemicals associated with site-specific neoplasia in 1394 long-term carcinogenesis experiments in laboratory rodents</article-title>. <source>Environ. Health Perspect.</source> <volume>93</volume>, <fpage>247</fpage>&#x2013;<lpage>270</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.9193247</pub-id>
<pub-id pub-id-type="pmid">1773796</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="web">
<collab>ICCVAM</collab> (<year>2024</year>). <article-title>Validation, qualification, and regulatory acceptance of new approach methodologies</article-title>. <pub-id pub-id-type="doi">10.22427/NICEATM-2</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karmaus</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Mansouri</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>To</surname>
<given-names>K. T.</given-names>
</name>
<name>
<surname>Blake</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Fitzpatrick</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Strickland</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Evaluation of variability across rat acute oral systemic toxicity studies</article-title>. <source>Toxicol. Sci.</source> <volume>188</volume> (<issue>1</issue>), <fpage>34</fpage>&#x2013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfac042</pub-id>
<pub-id pub-id-type="pmid">35426934</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kasteel</surname>
<given-names>E. E. J.</given-names>
</name>
<name>
<surname>Darney</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kramer</surname>
<given-names>N. I.</given-names>
</name>
<name>
<surname>Dorne</surname>
<given-names>JLCM</given-names>
</name>
<name>
<surname>Lautz</surname>
<given-names>L. S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Human variability in isoform-specific UDP-glucuronosyltransferases: markers of acute and chronic exposure, polymorphisms and uncertainty factors</article-title>. <source>Arch. Toxicol.</source> <volume>94</volume> (<issue>8</issue>), <fpage>2637</fpage>&#x2013;<lpage>2661</lpage>. <pub-id pub-id-type="doi">10.1007/s00204-020-02765-8</pub-id>
<pub-id pub-id-type="pmid">32415340</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Gindulyte</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>PubChem 2025 update</article-title>. <source>Nucleic Acids Res.</source> <volume>53</volume> (<issue>D1</issue>), <fpage>D1516</fpage>&#x2013;<lpage>D1525</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkae1059</pub-id>
<pub-id pub-id-type="pmid">39558165</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kleinstreuer</surname>
<given-names>N. C.</given-names>
</name>
<name>
<surname>Ceger</surname>
<given-names>P. C.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>D. G.</given-names>
</name>
<name>
<surname>Strickland</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hamm</surname>
<given-names>J. T.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>A curated database of rodent uterotrophic bioactivity</article-title>. <source>Environ. Health Perspect.</source> <volume>124</volume> (<issue>5</issue>), <fpage>556</fpage>&#x2013;<lpage>562</lpage>. <pub-id pub-id-type="doi">10.1289/ehp.1510183</pub-id>
<pub-id pub-id-type="pmid">26431337</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kreutz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hogberg</surname>
<given-names>H. T.</given-names>
</name>
<name>
<surname>Wetmore</surname>
<given-names>B. A.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Advancing understanding of human variability through toxicokinetic modeling, in vitro-in vivo extrapolation, and new approach methodologies</article-title>. <source>Hum. Genomics</source> <volume>18</volume>, <fpage>129</fpage>. <pub-id pub-id-type="doi">10.1186/s40246-024-00691-9</pub-id>
<pub-id pub-id-type="pmid">39574200</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kukull</surname>
<given-names>W. A.</given-names>
</name>
<name>
<surname>Ganguli</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Generalizability</article-title>. <source>Neurology</source> <volume>78</volume> (<issue>23</issue>), <fpage>1886</fpage>&#x2013;<lpage>1891</lpage>. <pub-id pub-id-type="doi">10.1212/WNL.0b013e318258f812</pub-id>
<pub-id pub-id-type="pmid">22665145</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lubet</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Steele</surname>
<given-names>V. E.</given-names>
</name>
<name>
<surname>Shoemaker</surname>
<given-names>R. H.</given-names>
</name>
<name>
<surname>Grubbs</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Screening of chemopreventive agents in animal models: results on reproducibility, agents of a given class, and agents tested during tumor progression</article-title>. <source>Cancer Prev. Res.</source> <volume>11</volume> (<issue>10</issue>), <fpage>595</fpage>&#x2013;<lpage>606</lpage>. <pub-id pub-id-type="doi">10.1158/1940-6207.CAPR-18-0084</pub-id>
<pub-id pub-id-type="pmid">30045934</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luechtefeld</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Maertens</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Russo</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Rovida</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hartung</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Analysis of draize eye irritation testing and its prediction by mining publicly available 2008-2014 REACH data</article-title>. <source>ALTEX</source> <volume>33</volume> (<issue>2</issue>), <fpage>123</fpage>&#x2013;<lpage>134</lpage>. <pub-id pub-id-type="doi">10.14573/altex.1510053</pub-id>
<pub-id pub-id-type="pmid">26863293</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ly Pham</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Watford</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pradeep</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Judson</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Variability in <italic>in vivo</italic> studies: defining the upper limit of performance for predictions of systemic effect levels</article-title>. <source>Comput. Toxicol.</source> <volume>15</volume>, <fpage>1</fpage>&#x2013;<lpage>100126</lpage>. <pub-id pub-id-type="doi">10.1016/j.comtox.2020.100126</pub-id>
<pub-id pub-id-type="pmid">33426408</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Miedel</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Varmazyad</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brooks</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Gavlock</surname>
<given-names>D. C.</given-names>
</name>
<name>
<surname>Reese</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Validation of microphysiological systems for interpreting patient heterogeneity requires robust reproducibility analytics and experimental metadata</article-title>. <source>Cell Rep. Methods</source> <volume>5</volume> (<issue>4</issue>), <fpage>101028</fpage>. <pub-id pub-id-type="doi">10.1016/j.crmeth.2025.101028</pub-id>
<pub-id pub-id-type="pmid">40233763</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Moser</surname>
<given-names>V. C.</given-names>
</name>
<name>
<surname>Bailey</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Bowers</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Raffaele</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Crofton</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Gilbert</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Developmental neurotoxicity study guidance document</article-title>,&#x201d; in <source>North american free trade agreement (NAFTA) technical working group on pesticides</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.epa.gov/sites/default/files/2017-02/documents/developmental_neurotoxicity_study_internal_guidance_document_final_0.pdf">https://www.epa.gov/sites/default/files/2017-02/documents/developmental_neurotoxicity_study_internal_guidance_document_final_0.pdf</ext-link> (Accessed February 17, 2026).</comment>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="web">
<collab>National Academies of Sciences E</collab> (<year>2019</year>). <article-title>Reproducibility and replicability in science</article-title>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://nap.nationalacademies.org/catalog/25303/reproducibility-and-replicability-in-science">https://nap.nationalacademies.org/catalog/25303/reproducibility-and-replicability-in-science</ext-link> (Accessed February 17, 2026).</comment>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Oyetade</surname>
<given-names>O. B.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Variability of <italic>in vivo</italic> toxicology studies: impact on NAMs</article-title>,&#x201d; in <source>Poster presented at: 12th world congress on alternatives and animal use in the life sciences (WC12)</source>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://ntp.niehs.nih.gov/sites/default/files/2023-09/Oyetade-WC12-poster-FD.pdf">https://ntp.niehs.nih.gov/sites/default/files/2023-09/Oyetade-WC12-poster-FD.pdf</ext-link>.</comment>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paul Friedman</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Foster</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Pham</surname>
<given-names>L. L.</given-names>
</name>
<name>
<surname>Feshuk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Watford</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Wambaugh</surname>
<given-names>J. F.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Reproducibility of organ-level effects in repeat dose animal studies</article-title>. <source>Comput. Toxicol.</source> <volume>28</volume>, <fpage>100287</fpage>&#x2013;<lpage>100317</lpage>. <pub-id pub-id-type="doi">10.1016/j.comtox.2023.100287</pub-id>
<pub-id pub-id-type="pmid">37990691</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Poland</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Duffin</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Cassee</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>The elephant in the room: reproducibility in toxicology</article-title>. <source>Part. Fibre Toxicol.</source> <volume>11</volume> (<issue>1</issue>), <fpage>42</fpage>. <pub-id pub-id-type="doi">10.1186/s12989-014-0042-8</pub-id>
<pub-id pub-id-type="pmid">25149182</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pradeep</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mansouri</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Patlewicz</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Judson</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A systematic evaluation of analogs and automated read-across prediction of estrogenicity: a case study using hindered phenols</article-title>. <source>Comput. Toxicol.</source> <volume>4</volume>, <fpage>22</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1016/j.comtox.2017.09.001</pub-id>
<pub-id pub-id-type="pmid">30057968</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quignot</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wi&#x119;cek</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Lautz</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Dorne</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Amzal</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Inter-phenotypic differences in CYP2C9 and CYP2C19 metabolism: bayesian meta-regression of human population variability in kinetics and application in chemical risk assessment</article-title>. <source>Toxicol. Lett.</source> <volume>337</volume>, <fpage>111</fpage>&#x2013;<lpage>120</lpage>. <pub-id pub-id-type="doi">10.1016/j.toxlet.2020.11.016</pub-id>
<pub-id pub-id-type="pmid">33232775</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raffaele</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Gilbert</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Crofton</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Makris</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sette</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Learning and memory tests in developmental neurotoxicity testing: a cross-laboratory comparison of control data, poster presentation No. 1342, 43rd annual meeting of the Society of Toxicology</article-title>. <source>Toxicologist.</source> <volume>78.</volume>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raitano</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Pronk</surname>
<given-names>T. E.</given-names>
</name>
<name>
<surname>Battistelli</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Bossa</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Hatzi</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Nikolopoulou</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2026</year>). <article-title>Variability and uncertainty of data from genotoxicity test guidelines: what we know and why it matters</article-title>. <source>Arch. Toxicol</source>. <pub-id pub-id-type="doi">10.1007/s00204-025-04277-9</pub-id>
<pub-id pub-id-type="pmid">41699308</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roberts</surname>
<given-names>D. W.</given-names>
</name>
<name>
<surname>Api</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Aptula</surname>
<given-names>A. O.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Chemical applicability domain of the local lymph node assay (LLNA) for skin sensitisation potency. Part 2. The biological variability of the murine local lymph node assay (LLNA) for skin sensitisation</article-title>. <source>Regul. Toxicol. Pharmacol.</source> <volume>80</volume>, <fpage>255</fpage>&#x2013;<lpage>259</lpage>. <pub-id pub-id-type="doi">10.1016/j.yrtph.2016.07.013</pub-id>
<pub-id pub-id-type="pmid">27470439</pub-id>
</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rooney</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Choksi</surname>
<given-names>N. Y.</given-names>
</name>
<name>
<surname>Ceger</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Daniel</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>Truax</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Analysis of variability in the rabbit skin irritation assay</article-title>. <source>Regul. Toxicol. Pharmacol.</source> <volume>122</volume>, <fpage>104920</fpage>. <pub-id pub-id-type="doi">10.1016/j.yrtph.2021.104920</pub-id>
<pub-id pub-id-type="pmid">33757807</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sch&#xfc;r</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Paparella</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fa&#xdf;bender</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Stoddart</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Baity Jesi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Schirmer</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Daphnids can safeguard the use of alternative bioassays to the acute fish toxicity test: a focus on neurotoxicity</article-title>. <source>Environ. Toxicol. Chem.</source> <volume>44</volume> (<issue>9</issue>), <fpage>2635</fpage>&#x2013;<lpage>2647</lpage>. <pub-id pub-id-type="doi">10.1093/etojnl/vgaf014</pub-id>
<pub-id pub-id-type="pmid">39836637</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Sette</surname>
<given-names>W. F.</given-names>
</name>
<name>
<surname>Crofton</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Makris</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Doherty</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Raffaele</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Auditory startle reflex habituation in developmental neurotoxicity testing: a cross-laboratory comparison of control data</article-title>. <source>Poster presented at</source>. <publisher-loc>Baltimore, MD</publisher-loc>: <publisher-name>Society of Toxicology</publisher-name>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.researchgate.net/profile/Kevin-Crofton/publication/284695487_Auditory_startle_reflex_habituation_in_developmental_neurotoxicity_testing_A_cross-laboratory_comparison_of_control_data_poster_presentation_No_1341_43rd_annual_meeting_of_the_Society_of_Toxicology/links/5751773c08ae10d9336edf2c/Auditory-startle-reflex-habituation-in-developmental-neurotoxicity-testing-A-cross-laboratory-comparison-of-control-data-poster-presentation-No-1341-43rd-annual-meeting-of-the-Society-of-Toxicology.pdf?origin=publicationSearch&#x26;_rtd=e30%3D&#x26;_tp=eyJjb250ZXh0Ijp7ImZpcnN0UGFnZSI6ImhvbWUiLCJwYWdlIjoic2VhcmNoIiwicG9zaXRpb24iOiJwYWdlSGVhZGVyIn19">https://www.researchgate.net/profile/Kevin-Crofton/publication/284695487_Auditory_startle_reflex_habituation_in_developmental_neurotoxicity_testing_A_cross-laboratory_comparison_of_control_data_poster_presentation_No_1341_43rd_annual_meeting_of_the_Society_of_Toxicology/links/5751773c08ae10d9336edf2c/Auditory-startle-reflex-habituation-in-developmental-neurotoxicity-testing-A-cross-laboratory-comparison-of-control-data-poster-presentation-No-1341-43rd-annual-meeting-of-the-Society-of-Toxicology.pdf?origin&#x3d;publicationSearch&#x26;_rtd&#x3d;e30%3D&#x26;_tp&#x3d;eyJjb250ZXh0Ijp7ImZpcnN0UGFnZSI6ImhvbWUiLCJwYWdlIjoic2VhcmNoIiwicG9zaXRpb24iOiJwYWdlSGVhZGVyIn19</ext-link> (Accessed February 17, 2026).</comment>
</mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Strikwold</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Spenkelink</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Woutersen</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Rietjens</surname>
<given-names>IMCM</given-names>
</name>
<name>
<surname>Punt</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Development of a combined <italic>in vitro</italic> physiologically based kinetic (PBK) and monte carlo modelling approach to predict interindividual human variation in phenol-induced developmental toxicity</article-title>. <source>Toxicol. Sci.</source> <volume>157</volume> (<issue>2</issue>), <fpage>365</fpage>&#x2013;<lpage>376</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfx054</pub-id>
<pub-id pub-id-type="pmid">28498972</pub-id>
</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>van der Zalm</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Barroso</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Browne</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Casey</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Gordon</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Henry</surname>
<given-names>T. R.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A framework for establishing scientific confidence in new approach methodologies</article-title>. <source>Arch. Toxicol.</source> <volume>96</volume> (<issue>11</issue>), <fpage>2865</fpage>&#x2013;<lpage>2879</lpage>. <pub-id pub-id-type="doi">10.1007/s00204-022-03365-4</pub-id>
<pub-id pub-id-type="pmid">35987941</pub-id>
</mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wall</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Sayre</surname>
<given-names>R. R.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Winter</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Groover</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hope</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Development of the toxicity values database, ToxValDB: a curated resource for experimental and derived human health-relevant toxicity data</article-title>. <source>Comput. Toxicol.</source> <volume>35</volume>, <fpage>100365</fpage>. <pub-id pub-id-type="doi">10.1016/j.comtox.2025.100365</pub-id>
<pub-id pub-id-type="pmid">41743589</pub-id>
</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weil</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Scala</surname>
<given-names>R. A.</given-names>
</name>
</person-group> (<year>1971</year>). <article-title>Study of intra- and interlaboratory variability in the results of rabbit eye and skin irritation tests</article-title>. <source>Toxicol. Appl. Pharmacol.</source> <volume>19</volume> (<issue>2</issue>), <fpage>276</fpage>&#x2013;<lpage>360</lpage>. <pub-id pub-id-type="doi">10.1016/0041-008x(71)90112-8</pub-id>
<pub-id pub-id-type="pmid">5570968</pub-id>
</mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weitekamp</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Paul Friedman</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Harrill</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Auerbach</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bandele</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Barton-Maclaren</surname>
<given-names>T. S.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Quantitative and qualitative concordance between clinical and nonclinical toxicity data</article-title>. <source>Toxicol. Sci.</source> <volume>206</volume> (<issue>2</issue>), <fpage>253</fpage>&#x2013;<lpage>272</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfaf071</pub-id>
<pub-id pub-id-type="pmid">40378198</pub-id>
</mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wetmore</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Wambaugh</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ferguson</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Sochaski</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Setzer</surname>
<given-names>R. W.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Incorporating high-throughput exposure predictions with dosimetry-adjusted <italic>in vitro</italic> bioactivity to inform chemical toxicity testing</article-title>. <source>Toxicol. Sci.</source> <volume>148</volume> (<issue>1</issue>), <fpage>121</fpage>&#x2013;<lpage>136</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/kfv171</pub-id>
<pub-id pub-id-type="pmid">26251325</pub-id>
</mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>You</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Casanova</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bartolucci</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Fryczynski</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Dorman</surname>
<given-names>D. C.</given-names>
</name>
<name>
<surname>Everitt</surname>
<given-names>J. I.</given-names>
</name>
<etal/>
</person-group> (<year>2002</year>). <article-title>Combined effects of dietary phytoestrogen and synthetic endocrine-active compound on reproductive development in sprague-dawley rats: genistein and methoxychlor</article-title>. <source>Toxicol. Sci.</source> <volume>66</volume> (<issue>1</issue>), <fpage>91</fpage>&#x2013;<lpage>104</lpage>. <pub-id pub-id-type="doi">10.1093/toxsci/66.1.91</pub-id>
<pub-id pub-id-type="pmid">11861976</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/14640/overview">Andy Nong</ext-link>, Health Canada, Canada</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1770118/overview">Markus Frericks</ext-link>, BASF, Germany</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3338201/overview">Gen Sato</ext-link>, Eisai Co., Ltd., Japan</p>
</fn>
</fn-group>
<fn-group>
<fn fn-type="abbr" id="abbrev1">
<label>Abbreviations:</label>
<p>NAM, new approach methodology; DNT, developmental neurotoxicity; OECD, Organisation for Economic Co-operation and Development; EPA, US Environmental Protection Agency; TG, test guideline; GLP, Good Laboratory Practice; GHS, United Nations Globally Harmonized System of Classification and Labeling of Chemicals; SD, standard deviation; CV, coefficient of variation; R2, coefficient of determination; RMSE, root mean squared error; MAD, mean absolute deviation; LEL, lowest-effect level; MAS, maximum average score; LD50, dose resulting in lethality in half of test animals; LC50, concentration resulting in lethality in half of test animals; TD50, dose resulting in tumors in half of test animals; EC3, effective concentration required to induce a three-fold upregulation of lymph node cell proliferation.</p>
</fn>
</fn-group>
</back>
</article>