<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN" article-type="brief-report">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Educ.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Education</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Educ.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2504-284X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/feduc.2025.1515281</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Brief Research Report</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>How useful are interpretation aids for communicating large-scale assessment results to teachers?</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ludewig</surname> <given-names>Ulrich</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1672580/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Becher</surname> <given-names>Laura</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<uri xlink:href="http://loop.frontiersin.org/people/3238846/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>McElvany</surname> <given-names>Nele</given-names></name>
<xref ref-type="aff" rid="aff1"/>
<uri xlink:href="http://loop.frontiersin.org/people/3093422/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Center for Research on Education and School Development (IFS), Technical University Dortmund</institution>, <city>Dortmund</city>, <country country="de">Germany</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Ulrich Ludewig, <email xlink:href="mailto:ludewig.ulrich@gmail.com">ludewig.ulrich@gmail.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-11-12">
<day>12</day>
<month>11</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>10</volume>
<elocation-id>1515281</elocation-id>
<history>
<date date-type="received">
<day>22</day>
<month>10</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>21</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2025 Ludewig, Becher and McElvany.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Ludewig, Becher and McElvany</copyright-holder>
<license>
<ali:license_ref start_date="2025-11-12">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Large-scale assessments (LSAs) significantly influence educational policy and perceptions of inequality, extending their impact beyond researchers to educators. LSAs predominantly utilize standard deviation units, supplemented by interpretation aids to enhance result comprehension. This pre-registered research delves into the utility of interpretation aids for teachers and their potential to influence result interpretation. The study used an experimental study design with <italic>N</italic> = 75 in teachers evaluating the usefulness and perceived relevance of several LSA reporting vignettes. Findings indicate that while all interpretation aids surpass standard deviation units in perceived usefulness, they also slightly alter the perceived magnitude of differences. The study underscores the importance of portraying interpretation aids as supplementary tools for context and reference, rather than replacements for statistics. Effective communication strategies should integrate interpretation aids to enrich understanding without overshadowing statistical significance. This approach ensures educators can leverage LSAs for informed decision-making while maintaining fidelity to statistical rigor.</p>
</abstract>
<kwd-group>
<kwd>large-scale assessment (LSA)</kwd>
<kwd>reading literacy</kwd>
<kwd>teacher education</kwd>
<kwd>data literacy</kwd>
<kwd>science communication</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare that financial support was received for the research and/or publication of this article. Funding was provided by the Federal Ministry of Education and Research (BMBF) as well as the Standing Conference of the Ministers of Education and Cultural Affairs (KMK).</funding-statement>
</funding-group>
<counts>
<fig-count count="1"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="41"/>
<page-count count="9"/>
<word-count count="7148"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Teacher Education</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="S1" sec-type="intro">
<title>Introduction</title>
<p>It is important to communicate the findings of educational research to teachers and practitioners (e.g., <xref ref-type="bibr" rid="B20">Klieme, 2012</xref>) because research findings provide a basis for evidence-based thinking and action (<xref ref-type="bibr" rid="B4">Bauer and Kollar, 2023</xref>). In this context, research findings deepen the understanding of educational phenomena and spark innovative ideas for teaching that are more likely to be effective (<xref ref-type="bibr" rid="B16">Hinzke et al., 2021</xref>; <xref ref-type="bibr" rid="B40">Visscher and Coe, 2003</xref>; <xref ref-type="bibr" rid="B35">Rochnia et al., 2023</xref>).</p>
<p>Large-scale assessments (LSAs) are among the most influential sources of evidence shaping our understanding of phenomena in education systems. They raise awareness about the state of education, highlight areas in need of improvement, and draw attention to critical issues such as inequalities among students of different backgrounds (<xref ref-type="bibr" rid="B27">Ludewig et al., 2025</xref>). Although large-scale assessments do not provide information for refining lesson planning for teachers or for evaluating the effectiveness of teaching methods (<xref ref-type="bibr" rid="B37">Seidel et al., 2017</xref>), they often serve as a catalyst for political action and administrative reforms within education systems (<xref ref-type="bibr" rid="B14">Gro&#x00DF;-Ophoff et al., 2023</xref>). However, teachers, who represent the largest group of stakeholders, play a central role in implementing and sustaining reforms. Administrative and political personnel within education systems are frequently drawn from the teaching profession itself. Therefore, both current administrators and practicing teachers must be informed about, adhere to, and critically evaluate evidence-based decisions based on LSA results in order to actively engage in the process of educational change.</p>
<p>Although teachers show positive attitudes toward evidence-based teaching and learning about findings from educational research, they find it challenging to translate research evidence into their teaching practice (<xref ref-type="bibr" rid="B13">Georgiou et al., 2023</xref>). Moreover, teachers frequently express doubts about the transferability of research findings to their work (<xref ref-type="bibr" rid="B19">Joram et al., 2020</xref>), and if teachers consult evidence, it is primarily when the practical applications of the research to their own practice are highly evident (<xref ref-type="bibr" rid="B7">Cain, 2016</xref>). This presents a hurdle to integrating current evidence into teacher education and teaching practices.</p>
<p>Data and research literacy play a central role in this issue. First, data literacy can help teachers develop the necessary competencies to interpret students&#x2019; learning data and make evidence-informed decisions about their teaching and individual student support at the classroom level (<xref ref-type="bibr" rid="B24">Lee et al., 2024</xref>). Second, improved data and research literacy broaden the scope of what they believe is relevant for their practice and provide them with ways to link research findings to their prior knowledge (e.g., <xref ref-type="bibr" rid="B36">Schmidt et al., 2023</xref>; <xref ref-type="bibr" rid="B10">Filderman et al., 2022</xref>).</p>
<p>On the other hand, researchers can try to present and communicate their research findings in a way that helps teachers relate the findings to their own practice and thus understand their implications. For example, press releases, clearinghouse websites (<xref ref-type="bibr" rid="B38">Slavin, 2020</xref>), and infographics share the aim of making research findings more accessible for non-researchers. However, these approaches to scholarly communication face the challenge of rephrasing and presenting statistical results in a manner that allows non-researchers to correctly interpret them without losing any meaning to the greatest extent possible.</p>
</sec>
<sec id="S2">
<title>Theoretical background</title>
<p>Presenting statistical results to non-researchers goes along with different challenges, such as communicating the size of statistical effects, the certainty of findings, and their importance for practice. One very important aspect of presenting statistical results in a way that makes them relatable and useful for teachers is the units of measurement. In education research, intervention studies typically report in standard deviation units (e.g., Cohen&#x2019;s <italic>d</italic>; see <xref ref-type="bibr" rid="B22">Kraft, 2020</xref>). These units of standard deviation have several methodological advantages (<xref ref-type="bibr" rid="B30">McGraw and Wong, 1992</xref>) in the research context but are not very relatable for non-researchers because they are abstract mathematical concepts (<xref ref-type="bibr" rid="B26">Lortie-Forgues et al., 2021</xref>). Therefore, there are different ways of translating such statistics into other forms that aid interpretation. Many interpretation aids have been proposed (e.g., <xref ref-type="bibr" rid="B25">Lipsey et al., 2012</xref>), but to date, there is no consensus about the metric best suited for communication with practitioners (<xref ref-type="bibr" rid="B26">Lortie-Forgues et al., 2021</xref>; <xref ref-type="bibr" rid="B23">K&#x00FC;hlwein et al., 2025</xref>).</p>
<sec id="S2.SS1">
<title>Communicating results in LSA</title>
<p>Additionally, most previous research on communicating research findings to practitioners has focused on the effectiveness of interventions (<xref ref-type="bibr" rid="B22">Kraft, 2020</xref>; <xref ref-type="bibr" rid="B25">Lipsey et al., 2012</xref>; <xref ref-type="bibr" rid="B26">Lortie-Forgues et al., 2021</xref>; <xref ref-type="bibr" rid="B36">Schmidt et al., 2023</xref>). Another very important source of evidence for building an evidence-based understanding of education systems is the results from international large-scale assessments (ILSAs). ILSAs such as the Progress in International Reading Literacy Study (PIRLS), Programme for International Student Assessment (PISA), and Trends In International Mathematics And Science Study (TIMSS) are used to describe differences in student achievement between cohorts in a country over time (trends) and between countries within a year (cross-country comparisons), as well as between groups of students (disparities) over time and between countries.</p>
<p>In general, quantitative research findings can be characterized as (a) statistically significant, (b) quantified with a certain statistical magnitude, and (c) contextualized as practically relevant (<xref ref-type="bibr" rid="B2">Bakker et al., 2019</xref>; <xref ref-type="bibr" rid="B29">McCartney and Rosenthal, 2000</xref>). (a) Statistical significance is set at a threshold level and qualifies a finding as unlikely to be the result of random variation. The meaning of statistically significant: &#x201C;not by chance&#x201D; differs from the colloquial meaning of &#x201C;significant&#x201D;: important, substantial, or relevant. In LSA reporting, statistical significance is usually explicitly stated or visually signified. Statistical significance in LSA is conceptually equivalent to statistical significance in other contexts. (b) Statistical magnitude quantifies a finding using a specific unit of measurement. In most research contexts, statistical magnitudes are communicated using standardized effect sizes such as Cohen&#x2019;s <italic>d</italic> (or Hedges&#x2019; g). Most basically, effect sizes are a measure of differences in means between two subgroups divided by the standard deviation of the measure of interest (<xref ref-type="bibr" rid="B25">Lipsey et al., 2012</xref>).</p>
<p>In ILSAs, the statistical magnitudes are &#x201C;points&#x201D; that represent one-hundredth of one standard deviation on a normed scale (<xref ref-type="bibr" rid="B31">Mullis et al., 2023</xref>). A normalized scale represents test results in terms of their distance from the mean of a reference group. The mean of the reference group is set to 500 and its standard deviation to 100. For example, one point on the PIRLS scale is one hundredth of a standard deviation for the reference group of countries that participated in PIRLS 2001. Thus, it is abstract and not very relatable. Generally, the statistical magnitude has no clear relation to statistical significance. There may be relatively large point differences that are not statistically significant due to low-precision estimates, and small point differences that are statistically significant due to high-precision estimates. Teachers are not always able to disentangle the terms statistical &#x201C;significance&#x201D; and statistical magnitude (<xref ref-type="bibr" rid="B36">Schmidt et al., 2023</xref>). However, for ILSAs, the precision is often high enough that even small differences are likely to be statistically significant. (c) Quantitative findings can be interpreted as practically relevant if their statistical magnitude relates to ordinary life experience, known findings, or prior knowledge (<xref ref-type="bibr" rid="B12">Funder and Ozer, 2019</xref>).</p>
<p>Teachers pay attention to research if its practical relevance is evident. Statistically significant is less of an issue in LSA compared to other contexts because LSA has great statistical power. Thus, translating the ILSA results into more relatable units than &#x201C;points&#x201D; of standard deviation and enabling teachers to understand their relevance is a crucial issue for LSA reporting. Interpretation aids help non-researchers better connect the statistical magnitude to their prior knowledge and real-world experience by presenting more relatable units (<xref ref-type="bibr" rid="B26">Lortie-Forgues et al., 2021</xref>).</p>
</sec>
<sec id="S2.SS2">
<title>Interpretation aids</title>
<p>The driving purpose of interpretation aids is to increase the usefulness of results. However, the transformation of results into the form of interpretation aids could generate new interpretation problems by leading, for instance, to misinterpretation, inflating or deflating the <italic>perceived magnitude</italic> of an achievement difference. Additionally, interpretation aids can lead to inconsistencies in the communication of results because assumptions or translations are not obvious to the recipient. Both could cause faulty decision-making or a biased perception of differences (<xref ref-type="bibr" rid="B1">Baird and Pane, 2019</xref>). Therefore, we discuss the potential, advantages, and risks related to interpretation aids and use reading comprehension differences in elementary school as an example.</p>
<sec id="S2.SS2.SSS1">
<title>Months of learning</title>
<p>A popular interpretation aid is to convert results into years (or months, weeks, days) of learning. The average gain in students&#x2019; achievement over subsequent grades provides an important means of comparison (<xref ref-type="bibr" rid="B6">Brunner et al., 2023</xref>; <xref ref-type="bibr" rid="B15">Hill et al., 2008</xref>). Student achievement is the result of long-term, cumulative domain-specific processes of knowledge and skill acquisition (e.g., <xref ref-type="bibr" rid="B5">Baumert et al., 2009</xref>). Teachers have real-world experience seeing their students grow and learn over time; therefore, they often perceive years of learning comparisons as useful (e.g., <xref ref-type="bibr" rid="B25">Lipsey et al., 2012</xref>).</p>
<p><xref ref-type="bibr" rid="B1">Baird and Pane (2019)</xref> criticize learning-year equivalent calculations for several reasons. Among other things, learning year equivalents invite overinterpretation: The weaker student group should go to school for X additional months to catch up with the stronger student group. Group differences can have many causes, and average achievement growth describes how much students typically learn in a year, which includes maturational processes, out-of-school learning, and in-school learning. Learning-year equivalents invite attributing differences to differences in in-school learning, which is not necessarily backed by the presented evidence. <xref ref-type="bibr" rid="B1">Baird and Pane (2019)</xref> also describe the problem that years of learning effects get smaller in higher grades. Thus, very small years of learning effects could inflate the magnitude of group differences. Additionally, there are different learning-year estimations available, such that the difference between girls and boys in reading in first grade (<italic>d</italic> = 0.14) can be between four months (0.14/0.40 = 0.35 &#x2248; 4 months; <xref ref-type="bibr" rid="B15">Hill et al., 2008</xref>) and 6 months depending on the source of the year of learning calculation (0.14/0.29 = 0.54 &#x2248; 6 months; <xref ref-type="bibr" rid="B8">Ditton and Kr&#x00FC;sken, 2009</xref>). Therefore, different researchers or different publications may use different years of learning effects and, therefore, produce inconsistent results even though the underlying effects are the same.</p>
</sec>
<sec id="S2.SS2.SSS2">
<title>Proportion below a content-based threshold</title>
<p>Another very common approach is to choose an informative threshold on the norm distribution and describe the difference in proficiency by the proportion of each group that falls below (or above) this threshold (see, e.g., <xref ref-type="bibr" rid="B25">Lipsey et al., 2012</xref>). Most ILSAs describe each group using proficiency level descriptors, which represent specific requirements necessary to solve a task at a given proficiency level (<xref ref-type="bibr" rid="B9">Durda et al., 2020</xref>). In PIRLS and TIMSS, these thresholds are called international benchmarks (<xref ref-type="bibr" rid="B31">Mullis et al., 2023</xref>) in PISA Proficiency Levels (<xref ref-type="bibr" rid="B28">Mang et al., 2019</xref>).</p>
<p>For PIRLS 2021, the lower threshold for Proficiency Level III is 475 points (i.e., 0.25 <italic>SD</italic> below the international mean of 2001). Proficiency Level III is considered a minimum standard for reading achievement at the end of fourth grade in many countries. For instance, applied to German trend results, this means that 14% of students in 2006 and 25% of students in 2021 did not reach the minimum standard for reading achievement in the fourth grade.</p>
<p>The difference between these two percentages can be interpreted and summarized further. Two common ways of doing this are (1) percentage point differences and (2) relative risk. From the example, it can be concluded that children in 2021 are 12 percentage points more likely to fail to meet the minimum standards and that they have almost twice the relative risk of failing to meet the minimum standards.</p>
<p>Results presented as percentages are often considered useful because they represent a concrete entity (e.g., 26 of 100 or 1 of 4) and are used in many other areas of life (e.g., discounts). However, thresholds of proficiency levels invite recipients to make incorrect interpretations: Many more students &#x201C;cannot read&#x201D; today compared to prior years. Competences are continuous, quantitative constructs that are normally distributed; thus, while more students fall under the threshold today, it is not possible to divide students cleanly into those who &#x201C;can&#x201D; and &#x201C;cannot&#x201D; read. Additionally, thresholds are usually study-specific and therefore produce inconsistent results between studies, even if the underlying differences are the same. Moreover, a percentage point difference can result in different values depending on the relative position of the compared groups to the threshold. Therefore, percentage point differences should not be compared between studies (<xref ref-type="bibr" rid="B18">Hollingshead and Childs, 2011</xref>). For instance, the magnitudes of trends over time and gaps between groups depend on the selection of this cut-off score (<xref ref-type="bibr" rid="B17">Ho, 2009</xref>).</p>
<p>Relative risks are often considered useful in reporting results because they express differences as multiplicative factors. However, when based on thresholded outcomes such as proficiency levels, relative risks may invite similar misinterpretations as percentage score differences and introduce additional challenges. First, relative risks provide no information about the baseline rate, which is essential for understanding the practical significance of an effect. Second, relative risks can appear dramatic even when absolute differences are small, particularly when the overall prevalence of the outcome is low. While concerns about misleading interpretations of relative risks have been widely discussed in fields such as epidemiology (<xref ref-type="bibr" rid="B32">Novelli et al., 2021</xref>) and criminology&#x2014;where absolute risks are often very low&#x2014;international large-scale assessments (ILSAs) typically examine outcomes that are more prevalent. Nevertheless, it remains crucial to interpret relative risks in conjunction with absolute risk levels to avoid overstating the magnitude or relevance of observed group differences.</p>
</sec>
</sec>
</sec>
<sec id="S3">
<title>The present study</title>
<p>Communicating differences in achievement to practitioners in education is an important goal of empirical education research. For methodological reasons, LSAs use units of standard deviation as their main statistical magnitude. Interpretation aids aim to make results more accessible and easier to understand for educational practitioners. However, interpretation aids could potentially change the denotation of the result by inflating or reducing the perceived magnitude of differences. Thus, our pre-registered research questions (RQ) and hypotheses (H) are the following:</p>
<list list-type="bullet">
<list-item>
<p>RQ1: Do teachers actually perceive interpretation aids as useful?</p>
</list-item>
<list-item>
<p>H1: Teachers perceived the interpretation aids to be more useful than the points of standard deviation.</p>
</list-item>
</list>
<p>We assume that months of learning, percentage points, and relative risk factors improve the perceived usefulness of study results because they enable teachers to relate their real-world experiences with the statistical magnitudes and to associate a more concrete concept with the results.</p>
<list list-type="bullet">
<list-item>
<p>RQ2: How does the interpretation aid influence the perceived magnitude of a group difference?</p>
</list-item>
<list-item>
<p>H2: There is a difference in teachers&#x2019; perceived magnitude of differences between interpretation aids.</p>
</list-item>
</list>
<p>The numerical values of the interpretation aid, as well as the numerical distances between the reported LSA results, differ. Therefore, there is a risk that the interpretation aids may alter the perceived magnitude of the group differences.</p>
</sec>
<sec id="S4">
<title>Methods</title>
<sec id="S4.SS1">
<title>Participants</title>
<p>The sample comprised <italic>N</italic> = 75 in-service teachers with diverse demographic characteristics. The average age of the teachers in the sample was <italic>M</italic> = 40.76 years, with a standard deviation of SD = 12.64 and a range from 22 to 67 years. In terms of gender, 86.67% of the teachers identified as female. The average number of years the teachers have been in the profession is <italic>M</italic> = 12.24, with a standard deviation of SD = 9.93. The range of teaching experience extends from 0 to 40 years. Zero years means participants have been working in a school for less than a year. The sample includes teachers from different school levels. Specifically, 34.67% of the teachers worked at the primary level, 33.33% at the lower secondary level, and 32.00% at the upper secondary level (including schools with a vocational orientation). The participating teachers rated their familiarity with statistical concepts as rather low, with a mean of 2.40 on a scale of 1&#x2013;5 (SD = 0.85). Among the interpretation aids, teachers ranked grade-level comparisons first (most familiar), percentage point change second, differences on standardized scales third, and relative risk fourth (least familiar). Please find a detailed description of the teacher familiarity ratings in <xref ref-type="supplementary-material" rid="SF1">Appendix C</xref>.</p>
<p>The participants in this study were recruited mostly through social media platforms [e.g., X (formerly Twitter), Facebook] and email lists. Potential participants were informed about the study and given the opportunity to voluntarily participate. To incentivize participation, participants who completed the study could voluntarily enter into a drawing where a selected number of winners would receive 20&#x20AC;. We did not reach the target number of 200 teachers because it was much harder than expected to motivate in-service teachers to participate in the study. We decided to end the data collection after expanding the data collection timeframe (12/2022&#x2013;05/2023) and exploiting available resources. The study was approved by the (University) ethics committee (reference # GEKTUDO_2022_52).</p>
</sec>
<sec id="S4.SS2">
<title>Vignettes</title>
<p>In the study, teachers were presented with vignettes describing a difference between student groups in reading comprehension with an interpretation aid or in terms of units of standard deviation. Each vignette consisted of one of four group differences and one of four presentation formats (three interpretation aids + units of standard deviation) for a total of 4 &#x002A; 4 = 16 vignettes. Presentation formats were (1) units of standard deviation, (2) months of learning, (3) percentage points, and (4) risk ratio. The group differences were (1) gender (male versus female; <italic>d</italic> = &#x2212;0.16), trend (2016 versus 2021; <italic>d</italic> = &#x2212;0.25), (2) language spoken at home (always or almost always speaking German at home versus always or almost always speaking a language other than German at home; <italic>d</italic> = &#x2212;0.35), (3) socio-economic background as indicated by the number of books at home (less than 100 versus 100 or more; <italic>d</italic> = &#x2212;0.53). The results were artificially created based on the following premises: (1) the standard deviation was equal in all groups and (2) the effect size was plausible for the group difference it describes (e.g., <xref ref-type="bibr" rid="B11">Frey et al., 2023</xref>). Please find the underlying group differences in <xref ref-type="supplementary-material" rid="SF1">Appendix A</xref>.</p>
<p>The vignettes were phrased in a standardized way to ensure comparability between the different interpretation aids (see <xref ref-type="table" rid="T1">Table 1</xref>). The formulation of each vignette consisted of an introductory phrase (e.g., &#x201C;The average reading proficiency of students at the end of fourth grade is,&#x201D; plain text), a phrase to clarify the specific group difference (e.g., &#x201C;for boys compared to girls,&#x201D; italic) and the interpretation aid used (e.g., &#x201C;14 points lower on the reading proficiency scale,&#x201D; underlined).</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>Example vignettes for gender difference.</p></caption>
<table cellspacing="5" cellpadding="5" frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left">Presentation format</th>
<th valign="top" align="left">Example sentence (German original)</th>
<th valign="top" align="left">Example sentence (English translation)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Units of SD</td>
<td valign="top" align="left">Die durchschnittliche Lesekompetenz von Sch&#x00FC;ler:innen am Ende der vierten Klasse ist <italic>bei Jungen im Vergleich zu M&#x00E4;dchen</italic> <underline>um 14 Punkte auf der Lesekompetenzskala geringer</underline>.</td>
<td valign="top" align="left">The average reading proficiency of students at the end of fourth grade is <underline>14 points lower on the reading proficiency scale</underline> <italic>for boys compared to girls</italic>.</td>
</tr>
<tr>
<td valign="top" align="left" colspan="3"><bold>Interpretation aids</bold></td>
</tr>
<tr>
<td valign="top" align="left">Months of learning</td>
<td valign="top" align="left">Die durchschnittliche Lesekompetenz von Sch&#x00FC;ler:innen am Ende der vierten Klasse <italic>ist bei Jungen im Vergleich zu M&#x00E4;dchen</italic> <underline>geringer und zwar um das, was Kinder in etwa 3 Monaten lernen</underline>.</td>
<td valign="top" align="left">The average reading proficiency of students at the end of the fourth grade is <underline>lower</underline> <italic>for boys compared to girls</italic> <underline>by what children learn in about 3 months</underline>.</td>
</tr>
<tr>
<td valign="top" align="left">Percentage points</td>
<td valign="top" align="left">Der Anteil von Sch&#x00FC;ler:innen, die den Mindeststandard f&#x00FC;r Lesekompetenz am Ende der vierten Klasse nicht erreichen, <italic>ist bei Jungen</italic> <underline>um 4 Prozentpunkte h&#x00F6;her</underline> <italic>als bei M&#x00E4;dchen</italic>.</td>
<td valign="top" align="left">The percentage of students who do not meet the minimum reading proficiency standard by the end of fourth grade is <underline>4 percentage points higher for</underline> <italic>boys than for girls</italic>.</td>
</tr>
<tr>
<td valign="top" align="left">Risk ratio</td>
<td valign="top" align="left">Der Anteil von Sch&#x00FC;ler:innen, die nicht den Mindeststandard f&#x00FC;r die Lesekompetenz am Ende der vierten Klasse erreichen, ist bei Jungen <underline>1.3 Mal so hoch</underline> wie bei M&#x00E4;dchen.</td>
<td valign="top" align="left">The percentage of students who do not meet the minimum reading proficiency standard by the end of fourth grade is <underline>1.3 times higher</underline> <italic>for boys than for girls</italic>.</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>Plain text, introductory phrase; Italic, group comparison; Underlined, quantitative statement.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="S4.SS3">
<title>Procedure</title>
<p>To answer the research questions above, we used a within-person experimental design in an onlinebriefly introduced to LSA (140 words) and why it is important to be aware of disparities in education, ly introduced to LSA (140 words) and why it is important to be aware of disparities in education and answered a few demographic questions. Specifically, they were informed of the fact that all the presented results are statistically significant and representative. However, participants were not informed that each vignette about a difference describes the same degree of impact, i.e., that the effect sizes for group differences are identical between information aids.</p>
<p>Second, participants answered on a seven-point semantic differential to what extent they rate the vignette to be (1) informative and (2) comprehensible, and to what extent they think the difference is (3) negligibly small or seriously large. The questions and rating were adopted from <xref ref-type="bibr" rid="B26">Lortie-Forgues et al. (2021)</xref>. Each participant had to assess each of the 16 vignettes regarding these three aspects. The presentation order of the vignettes was randomized for each participant. At the end of the survey, participants rated their familiarity with different statistical concepts.</p>
</sec>
<sec id="S4.SS4">
<title>Analysis</title>
<p>Our analysis plan was preregistered prior to data collection and can be inspected, along with the survey questionnaire, analysis code, and data at <ext-link ext-link-type="uri" xlink:href="https://osf.io/yr2fv/?view_only=e85db5416bb94c0399fc242334196a25">https://osf.io/yr2fv/?view_only=e85db5416bb94c0399fc242334196a25</ext-link>. We used the same model for both research questions. Perceived usefulness was derived from the average between the ratings of the comprehensibility and informativeness of a statement. The pre-registered hypotheses were tested by comparing a baseline model with a model that additionally included a factor for the interpretation aid. The baseline model was a linear mixed-effects model with a random effect for the teacher and a fixed effect for group difference (factor). The analysis used RStudio (<xref ref-type="bibr" rid="B33">R Core Team, 2023</xref>), specifically the lmer function from the lme4 package (<xref ref-type="bibr" rid="B3">Bates et al., 2015</xref>). Additionally, we report the coefficient of the model to indicate the strength and direction of the effects. All means and standard deviations of the ratings per vignette can be found in <xref ref-type="supplementary-material" rid="SF1">Appendix B</xref>.</p>
</sec>
</sec>
<sec id="S5" sec-type="results">
<title>Results</title>
<sec id="S5.SS1">
<title>Are there differences in usefulness (RQ1) and perceived magnitude (RQ2) between interpretation aids?</title>
<p>The factor interpretation aids had a statistically significant effect on explaining the usefulness ratings of the vignettes, &#x03C7;<sup>2</sup>&#x0394;(3) = 55.82, <italic>p</italic> &#x003C; 0.001, and perceived magnitude of the group difference, &#x03C7;<sup>2</sup>&#x0394;(3) = 21.95, <italic>p</italic> &#x003C; 0.001. This supports the hypotheses (H1) that there are differences in teachers&#x2019; perceived usefulness of different types of interpretation aids and (H2) that there are differences in teachers&#x2019; estimates of the size and magnitude of the difference between interpretation aids. The model results are displayed in <xref ref-type="table" rid="T2">Table 2</xref> and expected ratings in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<table-wrap position="float" id="T2">
<label>TABLE 2</label>
<caption><p>Results on the effect of interpretation aid on perceived usefulness and magnitude.</p></caption>
<table cellspacing="5" cellpadding="5" frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left">Variables</th>
<th valign="top" align="center" colspan="3">Usefulness<xref ref-type="table-fn" rid="t2fn1"><sup>1</sup></xref></th>
<th valign="top" align="center" colspan="3">Perceived magnitude<xref ref-type="table-fn" rid="t2fn2"><sup>2</sup></xref></th>
</tr>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="left">&#x03B2;</th>
<th valign="top" align="left">SE</th>
<th valign="top" align="left"><italic>P</italic></th>
<th valign="top" align="left">&#x03B2;</th>
<th valign="top" align="left">SE</th>
<th valign="top" align="left"><italic>P</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="7"><bold>Fixed effects</bold></td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Units of SD and gender (intercept)</td>
<td valign="top" align="left">4.02</td>
<td valign="top" align="left">0.14</td>
<td valign="top" align="left">0.001</td>
<td valign="top" align="left">4.19</td>
<td valign="top" align="left">0.13</td>
<td valign="top" align="left">0.001</td>
</tr>
<tr>
<td valign="top" align="left" colspan="7"><bold>Interpretation aids</bold></td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Months of learning</td>
<td valign="top" align="left"><bold>0.47</bold></td>
<td valign="top" align="left"><bold>0.11</bold></td>
<td valign="top" align="left"><bold>0.001</bold></td>
<td valign="top" align="left">0.13</td>
<td valign="top" align="left">0.10</td>
<td valign="top" align="left">0.161</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Percentage points</td>
<td valign="top" align="left"><bold>0.75</bold></td>
<td valign="top" align="left"><bold>0.11</bold></td>
<td valign="top" align="left"><bold>0.001</bold></td>
<td valign="top" align="left">&#x2212;<bold>0.27</bold></td>
<td valign="top" align="left"><bold>0.10</bold></td>
<td valign="top" align="left"><bold>0.005</bold></td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Relative risk</td>
<td valign="top" align="left"><bold>0.64</bold></td>
<td valign="top" align="left"><bold>0.11</bold></td>
<td valign="top" align="left"><bold>0.001</bold></td>
<td valign="top" align="left">0.11</td>
<td valign="top" align="left">0.10</td>
<td valign="top" align="left">0.262</td>
</tr>
<tr>
<td valign="top" align="left" colspan="7"><bold>Group differences (controls)</bold></td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Trend</td>
<td valign="top" align="left">0.19</td>
<td valign="top" align="left">0.11</td>
<td valign="top" align="left">0.081</td>
<td valign="top" align="left"><bold>0.60</bold></td>
<td valign="top" align="left"><bold>0.10</bold></td>
<td valign="top" align="left"><bold>0.001</bold></td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Language at home</td>
<td valign="top" align="left">0.05</td>
<td valign="top" align="left">0.11</td>
<td valign="top" align="left">0.629</td>
<td valign="top" align="left"><bold>0.89</bold></td>
<td valign="top" align="left"><bold>0.10</bold></td>
<td valign="top" align="left"><bold>0.001</bold></td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;Socio-economic background</td>
<td valign="top" align="left">&#x2212;0.02</td>
<td valign="top" align="left">0.11</td>
<td valign="top" align="left">0.864</td>
<td valign="top" align="left"><bold>1.01</bold></td>
<td valign="top" align="left"><bold>0.10</bold></td>
<td valign="top" align="left"><bold>0.001</bold></td>
</tr>
<tr>
<td valign="top" align="left" colspan="7"><bold>Random variance</bold></td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;&#x03C3;<sub>Teacher</sub></td>
<td valign="top" align="center" colspan="3">0.90</td>
<td valign="top" align="center" colspan="3">0.87</td>
</tr>
<tr>
<td valign="top" align="left">&#x2003;&#x03C3;<sub>Residual</sub></td>
<td valign="top" align="center" colspan="3">1.31</td>
<td valign="top" align="center" colspan="3">1.16</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p><italic>N</italic><sub>Observations</sub> = 1200 (<italic>N</italic><sub>Teacher</sub> = 75 &#x00D7; <italic>N</italic><sub>Trails</sub> = 16). Bold coefficients with <italic>p</italic> &#x003C; 0.05.</p></fn>
<fn id="t2fn1"><p><sup>1</sup>Mean of ratings of informativeness and comprehensibility on a rating scale from 1 to 7.</p></fn>
<fn id="t2fn2"><p><sup>2</sup>Rating of perceived magnitude on a scale from 1 to 7.</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>Ratings of usefulness and perceived magnitude. Usefulness: units of standard deviation &#x003C; months of learning, percentage points and relative risk, months of learning &#x003C; percentage points. Perceived magnitude: units of standard deviation &#x003E; percentage points, months of learning &#x003E; percentage points, percentage points &#x003C; units of standard deviation, months of learning and relative risk, relative risk &#x003E; percentage points. Error bars represent the 83.43% confidence interval. Overlap between 83.43% confidence intervals, approximate test for statistically significant difference, <italic>p</italic> &#x003C; 0.05 (<xref ref-type="bibr" rid="B21">Knol et al., 2011</xref>). The model indicates that all interpretation aids were rated statistically significantly more useful than the units of standard deviation (<xref ref-type="table" rid="T2">Table 2</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="feduc-10-1515281-g001.tif">
<alt-text content-type="machine-generated">Bar chart depicting expected ratings for usefulness and perceived magnitude across four measures: units of standard deviation, months of learning, percentage points, and relative risk. Usefulness ratings range from 3.7 to 4.4, while perceived magnitude ratings range from 4.2 to 4.6. Error bars indicate variability.</alt-text>
</graphic>
</fig>
<p>In more detail, we found that in terms of usefulness, units of standard deviation were inferior to months of learning (&#x03B2; = 0.47, <italic>t</italic> = 4.43, <italic>p</italic> &#x003C; 0.001), percentage points (&#x03B2; = 0.75, <italic>t</italic> = 7.03, <italic>p</italic> &#x003C; 0.001), and relative risk (&#x03B2; = 0.64, <italic>t</italic> = 5.94, <italic>p</italic> &#x003C; 0.001). Additionally, percentage points were superior to months of learning (&#x03B2; = 0.28, <italic>t</italic> = &#x2212;2.60, <italic>p</italic> = 0.011). In terms of perceived magnitude, the percentage points were perceived to be smaller in magnitude than units of standard deviation (&#x03B2; = &#x2212;0.27, <italic>t</italic> = 2.81, <italic>p</italic> = 0.007), months of learning (&#x03B2; = &#x2212;0.40, <italic>t</italic> = 4.21, <italic>p</italic> &#x003C; 0.001), and relative risk (&#x03B2; = &#x2212;0.37, <italic>t</italic> = 3.93, <italic>p</italic> &#x003C; 0.001).</p>
</sec>
</sec>
<sec id="S6" sec-type="discussion">
<title>Discussion</title>
<p>The study examined the perceived usefulness and magnitude of LSA reports. LSAs use units of standard deviation to communicate results. These units of standard deviation have methodological advantages but are abstract and difficult to interpret for non-researchers. Interpretation aids such as months of progress equivalents, change in percentage meeting a threshold, and risk ratios for falling below a threshold aim at improving the usefulness of reports but should not change the perceived magnitude of differences and their denotation. Therefore, we analyzed data from <italic>N</italic> = 75 in-service teachers who rated several vignettes in a within-person experimental study. The vignettes reported group differences with varying magnitudes (trends, gender, language at home, and books at home) common in LSA reports.</p>
<p>Our results showed that all interpretation aids were perceived to be more useful than the units of standard deviations. Thus, the results support the notion that teachers do value interpretation aids and find them more relatable than units of standard deviation. Thus, interpretation aids would play a significant role in increasing the perceived utility value of educational research for teachers (<xref ref-type="bibr" rid="B41">Zeeb and Voss, 2025</xref>). This finding is consistent with findings about interpretation aids for intervention studies (<xref ref-type="bibr" rid="B26">Lortie-Forgues et al., 2021</xref>). Notably, we found that teachers preferred percentage points over months of learning. Given the many limitations of months of learning equivalence (<xref ref-type="bibr" rid="B1">Baird and Pane, 2019</xref>) and the relatively few limitations of percentage change (<xref ref-type="bibr" rid="B18">Hollingshead and Childs, 2011</xref>), conveying the results of LSA via percentage changes may be the better interpretation aid.</p>
<p>However, the perceived magnitude of the results was also impacted in a statistically significant way by the interpretation aids. Specifically, the percentage change was rated as weaker in magnitude than the units of standard deviation, whereas months of learning and risk ratios tended to have a higher perceived magnitude. The finding that interpretation aids using the difference in percentage points meeting a threshold decreased the perceived magnitude is contradictory to results about interpretation aids when reporting on the effectiveness of interventions (<xref ref-type="bibr" rid="B26">Lortie-Forgues et al., 2021</xref>). Notably, <xref ref-type="bibr" rid="B26">Lortie-Forgues et al. (2021)</xref> described the percentage falling below a threshold in the two groups, whereas in this study, we described the differences in percentage points meeting or exceeding the threshold. Thus, the percentage meeting a threshold within a particular group might be an important reference for teachers. Additionally, the tendency for months of learning to inflate the perceived magnitude of a result is consistent with interpretation aids reporting on the effectiveness of interventions (<xref ref-type="bibr" rid="B26">Lortie-Forgues et al., 2021</xref>). However, the inflationary effect is heavily dependent on the annual growth estimate used to determine the months of learning (<xref ref-type="bibr" rid="B1">Baird and Pane, 2019</xref>). The vignettes were about elementary students with relatively large annual growth estimates; thus, the months of learning equivalent have only a modest potential to inflate the perceived magnitude.</p>
<sec id="S6.SS1">
<title>Implications</title>
<p>Results from LSAs play an important role in informing educational policy. Teachers, as the primary agents responsible for implementing these policies in practice, play a central role in translating them into classroom action. Overcoming practical, scientific, and political barriers in education to move toward a stronger evidence orientation is an important developmental goal of the education sector (<xref ref-type="bibr" rid="B4">Bauer and Kollar, 2023</xref>). It is therefore essential that teachers&#x2019; beliefs and understandings of educational inequality and the broader education system are grounded in empirical evidence. Communicators&#x2019; decisions about how to report their research findings are likely to influence the extent to which teachers will use results to refine their beliefs and ideas about inequality and the education system.</p>
<p>Our study suggests that communicators who want to maximize teacher engagement with research should consider emphasizing the relevance of outcomes through interpretation aids, such as months of progress equivalents, differences in percentage points meeting a threshold, and risk ratios for falling below a threshold; reporting only units of standard deviation may result in lower engagement. Research findings have the characteristics of (a) being statistically significant, (b) being quantified with a certain statistical magnitude, and (c) being contextualized as practically relevant (<xref ref-type="bibr" rid="B2">Bakker et al., 2019</xref>). Interpretation aids are superior in emphasizing the third aspect, contextualizing the practical relevance; however, they have stronger limitations with regard to the second aspect, quantifying the statistical magnitude, than units of standard deviation. Findings presented in terms of percentage change seem to slightly weaken the perceived magnitude of the effect. Additionally, we have already discussed the limitations of interpretation aids when viewed as statistics. For instance, (i) relative risk can dramatically inflate the reported magnitude of the effect if the denominator is very small (<xref ref-type="bibr" rid="B39">Trevena et al., 2013</xref>), (ii) the percentage change can be misleading depending on the position of the cut-off value relative to the pivot point of the normal distribution (<xref ref-type="bibr" rid="B18">Hollingshead and Childs, 2011</xref>), and (iii) months of progress can generate implausible results if the average annual progress estimate is small (<xref ref-type="bibr" rid="B1">Baird and Pane, 2019</xref>). The use of interpretation aids should be accompanied by a note emphasizing their character as an interpretation aid and not as a statistic upon which statistical significance is determined or a quantity with optimal measurement properties, but rather as an indication or contextualization of a result.</p>
</sec>
<sec id="S6.SS2">
<title>Limitations and outlook</title>
<p>The study investigates perception using results from large-scale assessments; however, this scope reflects only a small segment of the broader evidence-based orientation that teachers need. For a more comprehensive understanding, it is likely far more relevant for teachers to be informed about educational theories that reflect the current state of evidence in educational science (<xref ref-type="bibr" rid="B34">Renkl, 2022</xref>). Nevertheless, increasing the accessibility of theoretical knowledge in the teacher population requires significantly more effort than optimizing interpretation aids. Enhancing interpretation aids typically involves only minor adjustments to LSA reporting formats and outputs. Therefore, interpretation aids remain highly relevant for increasing the value of large-scale assessment results for teachers.</p>
<p>We only found that percentage points are more useful than months of learning regarding the differences between interpretation aids in usefulness. Therefore, we can make only a few specific recommendations in favor of a particular interpretation aid. A larger sample size would make it possible to identify smaller effects as statistically significant. However, the differences found are already relatively small, at around 0.5 on a scale of 1&#x2013;7, meaning that much smaller effects may no longer be practically relevant. Additionally, the teachers were employed at different types of schools that do not serve students in the age range to which the years-of-learning interpretation aids referred. This possibly limited the perceived usefulness of these aids. A larger, more targeted sample from specific school types would have been preferable.</p>
<p>Lastly, the vignettes provide relatively little context about the results. Most reports would provide a theoretical background or motivation for the specific comparison and verbal contextualization. The vignettes mimic sentences in press releases or an executive summary. Additionally, the nature of the interpretations requires somewhat different phrasing of the sentences. Future work should examine the communication of results with more context and scenarios that are more authentic and with a variety of differently phrased sentences.</p>
</sec>
</sec>
</body>
<back>
<sec id="S7" sec-type="data-availability">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/<xref ref-type="supplementary-material" rid="SF1">Supplementary material</xref>.</p>
</sec>
<sec id="S8" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Ethikkommission der TU Dortmund Fakult&#x00E4;t Erziehungswissenschaft, Psychologie und Bildungsforschung (12). The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="S9" sec-type="author-contributions">
<title>Author contributions</title>
<p>UL: Conceptualization, Formal analysis, Methodology, Project administration, Supervision, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. LB: Conceptualization, Data curation, Project administration, Validation, Writing &#x2013; review &#x0026; editing. NM: Funding acquisition, Resources, Supervision, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="S11" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="S12" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The authors declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="S13" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="S14" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/feduc.2025.1515281/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/feduc.2025.1515281/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Supplementary_file_1.pdf" id="SF1" mimetype="application/pdf"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baird</surname> <given-names>M. D.</given-names></name> <name><surname>Pane</surname> <given-names>J. F.</given-names></name></person-group> (<year>2019</year>). <article-title>Translating standardized effects of education programs into more interpretable metrics.</article-title> <source><italic>Educ. Res.</italic></source> <volume>48</volume> <fpage>217</fpage>&#x2013;<lpage>228</lpage>. <pub-id pub-id-type="doi">10.3102/0013189x19848729</pub-id> <pub-id pub-id-type="pmid">38293548</pub-id></mixed-citation></ref>
<ref id="B2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bakker</surname> <given-names>A.</given-names></name> <name><surname>Cai</surname> <given-names>J.</given-names></name> <name><surname>English</surname> <given-names>L.</given-names></name> <name><surname>Kaiser</surname> <given-names>G.</given-names></name> <name><surname>Mesa</surname> <given-names>V.</given-names></name> <name><surname>Van Dooren</surname> <given-names>W.</given-names></name></person-group> (<year>2019</year>). <article-title>Beyond small, medium, or large: Points of consideration when interpreting effect sizes.</article-title> <source><italic>Educ. Stud. Mathemat.</italic></source> <volume>102</volume> <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1007/s10649-019-09908-4</pub-id></mixed-citation></ref>
<ref id="B3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bates</surname> <given-names>D. M.</given-names></name> <name><surname>M&#x00E4;chler</surname> <given-names>M.</given-names></name> <name><surname>Bolker</surname> <given-names>B. M.</given-names></name> <name><surname>Walker</surname> <given-names>S. C.</given-names></name></person-group> (<year>2015</year>). <article-title>Fitting linear mixed-effects models using LME4.</article-title> <source><italic>J. Statist. Softw.</italic></source> <volume>67</volume> <fpage>1</fpage>&#x2013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v067.i01</pub-id></mixed-citation></ref>
<ref id="B4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bauer</surname> <given-names>J.</given-names></name> <name><surname>Kollar</surname> <given-names>I.</given-names></name></person-group> (<year>2023</year>). <article-title>(Wie) kann die Nutzung bildungswissenschaftlicher Evidenz Lehren und Lernen verbessern? Thesen und Fragen zur Diskussion um evidenzorientiertes Denken und Handeln von Lehrkr&#x00E4;ften. [(How) can the use of educational evidence improve teaching and learning? Theses and questions for the discussion on evidence-based thinking and action among teachers].</article-title> <source><italic>Unterrichtswissenschaft</italic></source> <volume>51</volume> <fpage>123</fpage>&#x2013;<lpage>147</lpage>. <pub-id pub-id-type="doi">10.1007/s42010-023-00166-1</pub-id> German</mixed-citation></ref>
<ref id="B5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baumert</surname> <given-names>J.</given-names></name> <name><surname>L&#x00FC;dtke</surname> <given-names>O.</given-names></name> <name><surname>Trautwein</surname> <given-names>U.</given-names></name> <name><surname>Brunner</surname> <given-names>M.</given-names></name></person-group> (<year>2009</year>). <article-title>Large-scale student assessment studies measure the results of processes of knowledge acquisition: Evidence in support of the distinction between intelligence and student achievement.</article-title> <source><italic>Educ. Res. Rev.</italic></source> <volume>4</volume> <fpage>165</fpage>&#x2013;<lpage>176</lpage>. <pub-id pub-id-type="doi">10.1016/j.edurev.2009.04.002</pub-id></mixed-citation></ref>
<ref id="B6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brunner</surname> <given-names>M.</given-names></name> <name><surname>Stallasch</surname> <given-names>S. E.</given-names></name> <name><surname>L&#x00FC;dtke</surname> <given-names>O.</given-names></name></person-group> (<year>2023</year>). <article-title>Empirical benchmarks to interpret intervention effects on student achievement in elementary and secondary school: Meta-analytic results from Germany.</article-title> <source><italic>J. Res. Educ. Effect.</italic></source> <volume>17</volume>, <fpage>119</fpage>&#x2013;<lpage>157</lpage>. <pub-id pub-id-type="doi">10.1080/19345747.2023.2175753</pub-id></mixed-citation></ref>
<ref id="B7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cain</surname> <given-names>T.</given-names></name></person-group> (<year>2016</year>). <article-title>Research utilisation and the struggle for the teacher&#x2019;s soul: A narrative review.</article-title> <source><italic>Eur. J. Teach. Educ.</italic></source> <volume>39</volume> <fpage>616</fpage>&#x2013;<lpage>629</lpage>. <pub-id pub-id-type="doi">10.1080/02619768.2016.1252912</pub-id></mixed-citation></ref>
<ref id="B8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ditton</surname> <given-names>H.</given-names></name> <name><surname>Kr&#x00FC;sken</surname> <given-names>J.</given-names></name></person-group> (<year>2009</year>). <article-title>Denn wer hat, dem wird gegeben werden? Eine L&#x00E4;ngsschnittstudie zur Entwicklung schulischer Leistungen und den Effekten der sozialen Herkunft in der Grundschulzeit [For he who has, to him shall be given? A longitudinal study on the development of academic performance and the effects of social background in primary school].</article-title> <source><italic>J. Educ. Res. Online</italic></source> <volume>1</volume> <fpage>33</fpage>&#x2013;<lpage>61</lpage>. <pub-id pub-id-type="doi">10.25656/014555</pub-id> <pub-id pub-id-type="pmid">35213755</pub-id> <comment>German</comment></mixed-citation></ref>
<ref id="B9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Durda</surname> <given-names>T.</given-names></name> <name><surname>Artelt</surname> <given-names>C.</given-names></name> <name><surname>Lechner</surname> <given-names>C. M.</given-names></name> <name><surname>Rammstedt</surname> <given-names>B.</given-names></name> <name><surname>Wicht</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Proficiency level descriptors for low reading proficiency: An integrative process model.</article-title> <source><italic>Int. Rev. Educ.</italic></source> <volume>66</volume> <fpage>211</fpage>&#x2013;<lpage>233</lpage>. <pub-id pub-id-type="doi">10.1007/s11159-020-09834-1</pub-id></mixed-citation></ref>
<ref id="B10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Filderman</surname> <given-names>M. J.</given-names></name> <name><surname>Toste</surname> <given-names>J. R.</given-names></name> <name><surname>Didion</surname> <given-names>L.</given-names></name> <name><surname>Peng</surname> <given-names>P.</given-names></name></person-group> (<year>2022</year>). <article-title>Data literacy training for K&#x2013;12 teachers: A meta-analysis of the effects on teacher outcomes.</article-title> <source><italic>Remed. Special Educ.</italic></source> <volume>43</volume> <fpage>328</fpage>&#x2013;<lpage>343</lpage>. <pub-id pub-id-type="doi">10.1177/074193252110542</pub-id></mixed-citation></ref>
<ref id="B11"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Frey</surname> <given-names>A.</given-names></name> <name><surname>Ludewig</surname> <given-names>U.</given-names></name> <name><surname>K&#x00F6;nig</surname> <given-names>C.</given-names></name> <name><surname>Krampen</surname> <given-names>D.</given-names></name> <name><surname>Lorenz</surname> <given-names>R.</given-names></name> <name><surname>Bos</surname> <given-names>W.</given-names></name></person-group> (<year>2023</year>). &#x201C;<article-title>IGLU 2021: Lesekompetenzen von Viertkl&#x00E4;sslerinnen und Viertkl&#x00E4;sslern im internationalen Vergleich: 20 Jahre-trend [IGLU 2021.</article-title> <article-title>[Reading achievement of fourth graders in international comparison: 20-year trend.]</article-title>,&#x201D; in <source><italic>IGLU 2021 &#x2013; Lesekompetenz von Grundschulkindern im internationalen Vergleich und im Trend &#x00FC;ber 20 Jahre</italic></source>, <role>eds</role> <person-group person-group-type="editor"><name><surname>McElvany</surname> <given-names>N.</given-names></name> <name><surname>Lorenz</surname> <given-names>R.</given-names></name> <name><surname>Frey</surname> <given-names>A.</given-names></name> <name><surname>Goldhammer</surname> <given-names>F.</given-names></name> <name><surname>Schilcher</surname> <given-names>A.</given-names></name> <name><surname>Stubbe</surname> <given-names>T.</given-names></name></person-group> (<publisher-loc>Ohio</publisher-loc>: <publisher-name>Waxmann</publisher-name>). German</mixed-citation></ref>
<ref id="B12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Funder</surname> <given-names>D. C.</given-names></name> <name><surname>Ozer</surname> <given-names>D. J.</given-names></name></person-group> (<year>2019</year>). <article-title>Evaluating effect size in psychological research: Sense and nonsense.</article-title> <source><italic>Adv. Methods Pract. Psychol. Sci.</italic></source> <volume>2</volume> <fpage>156</fpage>&#x2013;<lpage>168</lpage>. <pub-id pub-id-type="doi">10.1177/2515245919847202</pub-id></mixed-citation></ref>
<ref id="B13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Georgiou</surname> <given-names>D.</given-names></name> <name><surname>Diery</surname> <given-names>A.</given-names></name> <name><surname>Mok</surname> <given-names>S. Y.</given-names></name> <name><surname>Fischer</surname> <given-names>F.</given-names></name> <name><surname>Seidel</surname> <given-names>T.</given-names></name></person-group> (<year>2023</year>). <article-title>Turning research evidence into teaching action: Teacher educators&#x2019; attitudes toward evidence-based teaching.</article-title> <source><italic>Int. J. Educ. Res. Open</italic></source> <volume>4</volume>:<fpage>100240</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijedro.2023.100240</pub-id></mixed-citation></ref>
<ref id="B14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gro&#x00DF;-Ophoff</surname> <given-names>J.</given-names></name> <name><surname>Brown</surname> <given-names>C.</given-names></name> <name><surname>Helm</surname> <given-names>C.</given-names></name></person-group> (<year>2023</year>). <article-title>Do pupils at research-informed schools actually perform better? Findings from a study at English schools.</article-title> <source><italic>Front. Educ.</italic></source> <volume>7</volume>:<fpage>1011241</fpage>. <pub-id pub-id-type="doi">10.3389/feduc.2022.1011241</pub-id></mixed-citation></ref>
<ref id="B15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hill</surname> <given-names>C. J.</given-names></name> <name><surname>Bloom</surname> <given-names>H. S.</given-names></name> <name><surname>Black</surname> <given-names>A. R.</given-names></name> <name><surname>Lipsey</surname> <given-names>M. W.</given-names></name></person-group> (<year>2008</year>). <article-title>Empirical benchmarks for interpreting effect sizes in research.</article-title> <source><italic>Child Dev. Perspect.</italic></source> <volume>2</volume> <fpage>172</fpage>&#x2013;<lpage>177</lpage>. <pub-id pub-id-type="doi">10.1111/j.1750-8606.2008.00061.x</pub-id></mixed-citation></ref>
<ref id="B16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hinzke</surname> <given-names>J. H.</given-names></name> <name><surname>Gesang</surname> <given-names>J.</given-names></name> <name><surname>Besa</surname> <given-names>K. S.</given-names></name></person-group> (<year>2021</year>). <article-title>Ungewissheit im unterrichtlichen Handeln von lehrpersonen. zur erfahrung von ungewissheit zwischen norm, theorie und habitus. [Uncertainty in teachers&#x2019; teaching practices: On the experience of uncertainty between norm, theory, and habitus].</article-title> <source><italic>ZISU&#x2013;Zeitschrift Interpretat. Schul-und Unterrichtsforschung</italic></source> <volume>10</volume> <fpage>56</fpage>&#x2013;<lpage>69</lpage>. <pub-id pub-id-type="doi">10.3224/zisu.v10i1.04</pub-id> German</mixed-citation></ref>
<ref id="B17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ho</surname> <given-names>A. D.</given-names></name></person-group> (<year>2009</year>). <article-title>A nonparametric framework for comparing trends and gaps across tests.</article-title> <source><italic>J. Educ. Behav. Statis.</italic></source> <volume>34</volume> <fpage>201</fpage>&#x2013;<lpage>228</lpage>. <pub-id pub-id-type="doi">10.3102/1076998609332755</pub-id> <pub-id pub-id-type="pmid">38293548</pub-id></mixed-citation></ref>
<ref id="B18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hollingshead</surname> <given-names>L.</given-names></name> <name><surname>Childs</surname> <given-names>R. A.</given-names></name></person-group> (<year>2011</year>). <article-title>Reporting the percentage of students above a cut score: The effect of group size.</article-title> <source><italic>Educ. Measurem. Issues Pract.</italic></source> <volume>30</volume> <fpage>36</fpage>&#x2013;<lpage>43</lpage>. <pub-id pub-id-type="doi">10.1111/j.1745-3992.2010.00198.x</pub-id></mixed-citation></ref>
<ref id="B19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Joram</surname> <given-names>E.</given-names></name> <name><surname>Gabriele</surname> <given-names>A. J.</given-names></name> <name><surname>Walton</surname> <given-names>K.</given-names></name></person-group> (<year>2020</year>). <article-title>What influences teachers&#x2019; &#x201C;buy-in&#x201D; of research? Teachers&#x2019; beliefs about the applicability of educational research to their practice.</article-title> <source><italic>Teach. Teach. Educ.</italic></source> <volume>88</volume>:<fpage>102980</fpage>. <pub-id pub-id-type="doi">10.1016/j.tate.2019.102980</pub-id></mixed-citation></ref>
<ref id="B20"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Klieme</surname> <given-names>E.</given-names></name></person-group> (<year>2012</year>). &#x201C;<article-title>The role of large-scale assessments in research on educational effectiveness and school development</article-title>,&#x201D; in <source><italic>The role of international large-scale assessments: Perspectives from technology, economy, and educational research</italic></source>, <role>eds</role> <person-group person-group-type="editor"><name><surname>von Davier</surname> <given-names>M.</given-names></name> <name><surname>Gonzalez</surname> <given-names>E.</given-names></name> <name><surname>Kirsch</surname> <given-names>I.</given-names></name> <name><surname>Yamamoto</surname> <given-names>K.</given-names></name></person-group> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>115</fpage>&#x2013;<lpage>147</lpage>. <pub-id pub-id-type="doi">10.1007/978-94-007-4629-9_7</pub-id></mixed-citation></ref>
<ref id="B21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Knol</surname> <given-names>M. J.</given-names></name> <name><surname>Pestman</surname> <given-names>W. R.</given-names></name> <name><surname>Grobbee</surname> <given-names>D. E.</given-names></name></person-group> (<year>2011</year>). <article-title>The (mis)use of overlap of confidence intervals to assess effect modification.</article-title> <source><italic>Eur. J. Epidemiol.</italic></source> <volume>26</volume> <fpage>253</fpage>&#x2013;<lpage>254</lpage>. <pub-id pub-id-type="doi">10.1007/s10654-011-9563-8</pub-id> <pub-id pub-id-type="pmid">21424218</pub-id></mixed-citation></ref>
<ref id="B22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kraft</surname> <given-names>M. A.</given-names></name></person-group> (<year>2020</year>). <article-title>Interpreting effect sizes of education interventions.</article-title> <source><italic>Educ. Res.</italic></source> <volume>49</volume> <fpage>241</fpage>&#x2013;<lpage>253</lpage>. <pub-id pub-id-type="doi">10.3102/0013189x20912798</pub-id> <pub-id pub-id-type="pmid">38293548</pub-id></mixed-citation></ref>
<ref id="B23"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>K&#x00FC;hlwein</surname> <given-names>F.</given-names></name> <name><surname>Merk</surname> <given-names>S.</given-names></name> <name><surname>Schneider</surname> <given-names>J.</given-names></name> <name><surname>Schmidt</surname> <given-names>K.</given-names></name></person-group> (<year>2025</year>). <source><italic>Effektst&#x00E4;rken verst&#x00E4;ndlich an Lehrpersonen kommunizieren [Symposium: F&#x00F6;rderung evidenzinformierter Entscheidungen von Lehrkr&#x00E4;ften]. Communicating effect sizes clearly to teachers [Symposium: Promoting evidence-informed decisions by teachers].</italic></source> <publisher-loc>Mannheim</publisher-loc>: <publisher-name>Universit&#x00E4;t Mannheim</publisher-name>. German</mixed-citation></ref>
<ref id="B24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>J.</given-names></name> <name><surname>Alonzo</surname> <given-names>D.</given-names></name> <name><surname>Beswick</surname> <given-names>K.</given-names></name> <name><surname>Abril</surname> <given-names>J. M. V.</given-names></name> <name><surname>Chew</surname> <given-names>A. W.</given-names></name> <name><surname>Oo</surname> <given-names>C. Z.</given-names></name><etal/></person-group> (<year>2024</year>). <article-title>Dimensions of teachers&#x2019; data literacy: A systematic review of literature from 1990 to 2021.</article-title> <source><italic>Educ. Asse. Eval. Acc.</italic></source> <volume>36</volume> <fpage>145</fpage>&#x2013;<lpage>200</lpage>. <pub-id pub-id-type="doi">10.1007/s11092-024-09435-8</pub-id></mixed-citation></ref>
<ref id="B25"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Lipsey</surname> <given-names>M. W.</given-names></name> <name><surname>Puzio</surname> <given-names>K.</given-names></name> <name><surname>Yun</surname> <given-names>C.</given-names></name> <name><surname>Hebert</surname> <given-names>M. D.</given-names></name> <name><surname>Steinka-Fry</surname> <given-names>K.</given-names></name> <name><surname>Cole</surname> <given-names>M. W.</given-names></name><etal/></person-group> (<year>2012</year>). <source><italic>Translating the statistical representation of the effects of education interventions into more readily interpretable forms.</italic></source> <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>National Center for Special Education Research</publisher-name>.</mixed-citation></ref>
<ref id="B26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lortie-Forgues</surname> <given-names>H.</given-names></name> <name><surname>Sio</surname> <given-names>U. N.</given-names></name> <name><surname>Inglis</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>How should educational effects be communicated to teachers?</article-title> <source><italic>Educ. Res.</italic></source> <volume>50</volume> <fpage>345</fpage>&#x2013;<lpage>354</lpage>. <pub-id pub-id-type="doi">10.3102/0013189X20987856</pub-id> <pub-id pub-id-type="pmid">38293548</pub-id></mixed-citation></ref>
<ref id="B27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ludewig</surname> <given-names>U.</given-names></name> <name><surname>Strietholt</surname> <given-names>R.</given-names></name> <name><surname>McElvany</surname> <given-names>N.</given-names></name></person-group> (<year>2025</year>). <article-title>Reading literacy decline in Europe: Disentangling school closures and out-of-school learning conditions during the COVID-19 pandemic.</article-title> <source><italic>Learn. Instruct.</italic></source> <volume>98</volume>:<fpage>102150</fpage>. <pub-id pub-id-type="doi">10.1016/j.learninstruc.2025.102150</pub-id></mixed-citation></ref>
<ref id="B28"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Mang</surname> <given-names>J.</given-names></name> <name><surname>Wagner</surname> <given-names>S.</given-names></name> <name><surname>Gomolka</surname> <given-names>J.</given-names></name> <name><surname>Sch&#x00E4;fer</surname> <given-names>A.</given-names></name> <name><surname>Meinck</surname> <given-names>S.</given-names></name> <name><surname>Reiss</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <source><italic>Technische Hintergrundinformationen PISA 2018. [Technical background information PISA 2018].</italic></source> <publisher-loc>M&#x00FC;nster</publisher-loc>. German</mixed-citation></ref>
<ref id="B29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McCartney</surname> <given-names>K.</given-names></name> <name><surname>Rosenthal</surname> <given-names>R.</given-names></name></person-group> (<year>2000</year>). <article-title>Effect size, practical importance, and social policy for children.</article-title> <source><italic>Child Dev.</italic></source> <volume>71</volume> <fpage>173</fpage>&#x2013;<lpage>180</lpage>. <pub-id pub-id-type="doi">10.1111/1467-8624.00131</pub-id> <pub-id pub-id-type="pmid">10836571</pub-id></mixed-citation></ref>
<ref id="B30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McGraw</surname> <given-names>K. O.</given-names></name> <name><surname>Wong</surname> <given-names>S. P.</given-names></name></person-group> (<year>1992</year>). <article-title>A common language effect size statistic.</article-title> <source><italic>Psychol. Bull.</italic></source> <volume>111</volume> <fpage>361</fpage>&#x2013;<lpage>365</lpage>. <pub-id pub-id-type="doi">10.1037/0033-2909.111.2.361</pub-id></mixed-citation></ref>
<ref id="B31"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Mullis</surname> <given-names>I. V. S.</given-names></name> <name><surname>von Davier</surname> <given-names>M.</given-names></name> <name><surname>Foy</surname> <given-names>P.</given-names></name> <name><surname>Fishbein</surname> <given-names>B.</given-names></name> <name><surname>Reynolds</surname> <given-names>K. A.</given-names></name> <name><surname>Wry</surname> <given-names>E.</given-names></name></person-group> (<year>2023</year>). <source><italic>PIRLS 2021 International Results in Reading.</italic></source> <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>Boston College, TIMSS &#x0026; PIRLS International Study Center</publisher-name>, <pub-id pub-id-type="doi">10.6017/lse.tpisc.tr2103.kb5342</pub-id> <pub-id pub-id-type="pmid">32514056</pub-id></mixed-citation></ref>
<ref id="B32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Novelli</surname> <given-names>M.</given-names></name> <name><surname>Antognini</surname> <given-names>A. B.</given-names></name> <name><surname>Boffetta</surname> <given-names>P.</given-names></name> <name><surname>Ioannidis</surname> <given-names>J. P.</given-names></name> <name><surname>Spatari</surname> <given-names>G.</given-names></name> <name><surname>Violante</surname> <given-names>F. S.</given-names></name></person-group> (<year>2021</year>). <article-title>Reporting only relative effect measures was potentially misleading: Some good practices for improving the soundness of epidemiological results.</article-title> <source><italic>J. Clin. Epidemiol.</italic></source> <volume>137</volume> <fpage>195</fpage>&#x2013;<lpage>199</lpage>. <pub-id pub-id-type="doi">10.1016/j.jclinepi.2021.04.006</pub-id> <pub-id pub-id-type="pmid">33894329</pub-id></mixed-citation></ref>
<ref id="B33"><mixed-citation publication-type="journal"><collab>R Core Team</collab> (<year>2023</year>). <source><italic>R: A language and environment for statistical computing.</italic></source> <publisher-loc>Vienna</publisher-loc>: <publisher-name>R Foundation for Statistical Computing</publisher-name>.</mixed-citation></ref>
<ref id="B34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Renkl</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Meta-analyses as a privileged information source for informing teachers&#x2019; practice?</article-title> <source><italic>Zeitschrift P&#x00E4;dagogische Psychol.</italic></source> <volume>36</volume> <fpage>217</fpage>&#x2013;<lpage>231</lpage>. <pub-id pub-id-type="doi">10.1024/1010-0652/a000345</pub-id></mixed-citation></ref>
<ref id="B35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rochnia</surname> <given-names>M.</given-names></name> <name><surname>Radisch</surname> <given-names>F.</given-names></name> <name><surname>Kastens</surname> <given-names>C.</given-names></name></person-group> (<year>2023</year>). <article-title>Theory application in school and meaning-oriented learning opportunities at university&#x2014;resources for teaching quality.</article-title> <source><italic>Educ. Sci.</italic></source> <volume>13</volume>:<fpage>381</fpage>. <pub-id pub-id-type="doi">10.3390/educsci13040381</pub-id></mixed-citation></ref>
<ref id="B36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schmidt</surname> <given-names>K.</given-names></name> <name><surname>Edelsbrunner</surname> <given-names>P. A.</given-names></name> <name><surname>Rosman</surname> <given-names>T.</given-names></name> <name><surname>Cramer</surname> <given-names>C.</given-names></name> <name><surname>Merk</surname> <given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>When perceived informativity is not enough. How teachers perceive and interpret statistical results of educational research.</article-title> <source><italic>Teach. Teach. Educ.</italic></source> <volume>130</volume>:<fpage>104134</fpage>. <pub-id pub-id-type="doi">10.1016/j.tate.2023.104134</pub-id></mixed-citation></ref>
<ref id="B37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Seidel</surname> <given-names>T.</given-names></name> <name><surname>Mok</surname> <given-names>S. Y.</given-names></name> <name><surname>Hetmanek</surname> <given-names>A.</given-names></name> <name><surname>Knogler</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Meta-analyses on teaching effectiveness and their contribution to the realization of a Clearing House Unterricht for teacher education.</article-title> <source><italic>Zeitschrift Bildungsforschung</italic></source> <volume>7</volume> <fpage>311</fpage>&#x2013;<lpage>325</lpage>. <pub-id pub-id-type="doi">10.1007/s35834-017-0191-6</pub-id></mixed-citation></ref>
<ref id="B38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Slavin</surname> <given-names>R. E.</given-names></name></person-group> (<year>2020</year>). <article-title>How evidence-based reform will transform research and practice in education.</article-title> <source><italic>Educ. Psychol.</italic></source> <volume>55</volume> <fpage>21</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1080/00461520.2019.1611432</pub-id></mixed-citation></ref>
<ref id="B39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Trevena</surname> <given-names>L. J.</given-names></name> <name><surname>Zikmund-Fisher</surname> <given-names>B. J.</given-names></name> <name><surname>Edwards</surname> <given-names>A.</given-names></name> <name><surname>Gaissmaier</surname> <given-names>W.</given-names></name> <name><surname>Galesic</surname> <given-names>M.</given-names></name> <name><surname>Han</surname> <given-names>P. K.</given-names></name><etal/></person-group> (<year>2013</year>). <article-title>Presenting quantitative information about decision outcomes: A risk communication primer for patient decision aid developers.</article-title> <source><italic>BMC Med. Inform. Decis. Making</italic></source> <volume>13</volume>:<fpage>S7</fpage>. <pub-id pub-id-type="doi">10.1186/1472-6947-13-S2-S7</pub-id> <pub-id pub-id-type="pmid">24625237</pub-id></mixed-citation></ref>
<ref id="B40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Visscher</surname> <given-names>A. J.</given-names></name> <name><surname>Coe</surname> <given-names>R.</given-names></name></person-group> (<year>2003</year>). <article-title>School performance feedback systems: Conceptualisation, analysis, and reflection.</article-title> <source><italic>Sch. Effect. Sch. Improvem.</italic></source> <volume>14</volume> <fpage>321</fpage>&#x2013;<lpage>349</lpage>. <pub-id pub-id-type="doi">10.1076/sesi.14.3.321.15842</pub-id> <pub-id pub-id-type="pmid">37995447</pub-id></mixed-citation></ref>
<ref id="B41"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeeb</surname> <given-names>H.</given-names></name> <name><surname>Voss</surname> <given-names>T.</given-names></name></person-group> (<year>2025</year>). <article-title>Fostering preservice teachers&#x2019; research-related beliefs and motivation with growth mindset and utility value interventions.</article-title> <source><italic>Motivat. Sci.</italic></source> <volume>11</volume> <fpage>84</fpage>&#x2013;<lpage>100</lpage>. <pub-id pub-id-type="doi">10.1037/mot0000352</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/631052/overview">Michael Grosche</ext-link>, University of Wuppertal, Germany</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1244194/overview">Michael Rochnia</ext-link>, University of Wuppertal, Germany</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2972751/overview">Nina Jude</ext-link>, Heidelberg University, Germany</p></fn>
</fn-group>
</back>
</article>