<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Educ.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Education</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Educ.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2504-284X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/feduc.2025.1738655</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>The effect of the frequency of use of an intelligent tutoring system on learning gains in mathematics secondary education</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Schaaf</surname>
<given-names>Julius</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3155389"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rolfes</surname>
<given-names>Tobias</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/762321"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nagy</surname>
<given-names>Gabriel</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/628136"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Heinze</surname>
<given-names>Aiso</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Institute of Mathematics and Computer Science Education, Faculty of Computer Science and Mathematics, Goethe University</institution>, <city>Frankfurt am Main</city>, <country country="de">Germany</country></aff>
<aff id="aff2"><label>2</label><institution>Educational Measurement and Data Science, Leibniz Institute for Science and Mathematics Education</institution>, <city>Kiel</city>, <country country="de">Germany</country></aff>
<aff id="aff3"><label>3</label><institution>Mathematics Education, Leibniz Institute for Science and Mathematics Education</institution>, <city>Kiel</city>, <country country="de">Germany</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Julius Schaaf, <email xlink:href="mailto:schaaf@math.uni-frankfurt.de">schaaf@math.uni-frankfurt.de</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-21">
<day>21</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>10</volume>
<elocation-id>1738655</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>23</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Schaaf, Rolfes, Nagy and Heinze.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Schaaf, Rolfes, Nagy and Heinze</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-21">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Intelligent tutoring systems (ITS) are characterized by their direct and adaptive feedback as well as their capability of assessing the knowledge of students and administering exercises based on those assessments. The state of research regarding the effectiveness of ITS in mathematics is inconclusive. Hence, in this study, we examined the impact of utilizing an ITS on the learning gains in mathematics for students in grades 7 and 8. This longitudinal investigation was conducted with students from 55 classes (940 students) in northern Germany. Mathematics performance as well as relevant covariates were measured at the beginning and the end of the school year, and interactions with the ITS were recorded throughout the school year, providing a comprehensive dataset for analysis. Teachers were free in choosing the extent, subjects and methods of ITS usage. In addition, students could also use the ITS on their own. A multilevel analysis revealed that the frequency of ITS usage had no significant effect, neither at the class level nor at the individual level. Our results show that using ITS does not automatically lead to better learning gains. Therefore, future studies need to identify the conditions and practices that contribute to effective ITS use.</p>
</abstract>
<kwd-group>
<kwd>empirical study</kwd>
<kwd>Germany</kwd>
<kwd>ITS</kwd>
<kwd>longitudinal study</kwd>
<kwd>mathematics education</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="2"/>
<equation-count count="1"/>
<ref-count count="49"/>
<page-count count="15"/>
<word-count count="12967"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Digital Education</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Education is becoming increasingly digital. Although digital media have been used in schools for several decades, school closings during the COVID-19 pandemic have inevitably accelerated this trend considerably. At the same time, education is expected to become more individualized. Digital media could be seen as a tool to implement individualized opportunities to learn, and to improve students&#x2019; learning processes. This is especially true for <italic>intelligent tutoring systems</italic> (ITS). They are digital systems designed to enable effective learning through feedback and individualized task delivery based on students&#x2019; prior performance (<xref ref-type="bibr" rid="ref43">VanLehn, 2006</xref>).</p>
<p>Evidence from several meta-analyses indicates that these systems improve student outcomes across a range of contexts (e.g., <xref ref-type="bibr" rid="ref18">Kulik and Fletcher, 2016</xref>; <xref ref-type="bibr" rid="ref34">Steenbergen-Hu and Cooper, 2014</xref>; <xref ref-type="bibr" rid="ref20">Ma et al., 2014</xref>; <xref ref-type="bibr" rid="ref13">Higgins et al., 2012</xref>). One of the key benefits of ITS lies in their ability to provide adaptive feedback and facilitate self-paced learning, both of which are grounded in well-established and empirically supported educational theories (<xref ref-type="bibr" rid="ref14">Hillmayr et al., 2020</xref>).</p>
<p>While ITS generally lead to positive learning gains, the effectiveness of these systems, particularly in mathematics, is more varied and less definitive. Research by <xref ref-type="bibr" rid="ref18">Kulik and Fletcher (2016)</xref> shows that the impact of ITS in mathematics is smaller than in other subjects, and a study by <xref ref-type="bibr" rid="ref33">Steenbergen-Hu and Cooper (2013)</xref> indicates a minimal effect when using ITS for mathematics. <xref ref-type="bibr" rid="ref20">Ma et al. (2014)</xref>, in contrast, find no significant differences in the effectiveness of ITS between subjects. Despite this mixed evidence, commercial ITS platforms are becoming increasingly widespread in educational settings. In Germany, commercial ITS such as MatheGym and Bettermarks report hundreds of thousands of users (<xref ref-type="bibr" rid="ref22">MatheGym, 2025</xref>; <xref ref-type="bibr" rid="ref5">Bettermarks, 2025</xref>). Considering these trends, we aimed to evaluate whether the use of ITS correlates with improved learning outcomes in mathematics. To assess this, we conducted a longitudinal study with both pre- and posttests, measuring students&#x2019; mathematics performance with a curriculum based standardized test alongside key factors such as their attitudes toward mathematics and levels of mathematics anxiety. Additionally, aggregated log data from the ITS was collected. We analyzed the data using a Rasch model and applied multilevel statistical modeling to gain deeper insights into the relationship between ITS usage and learning gains.</p>
</sec>
<sec id="sec2">
<label>2</label>
<title>Theoretical framework</title>
<p>In this section, we provide a theoretical rationale for the potential benefits of digital tools in promoting more effective learning. Starting with key educational principles and a definition of ITS, we review the current state of research on the effectiveness of ITS in education. Finally, we summarize existing research on the use of ITS specifically for mathematics learning.</p>
<sec id="sec3">
<label>2.1</label>
<title>Digital tools</title>
<p>According to <xref ref-type="bibr" rid="ref10">Hattie (2023)</xref> meta-analysis, the use of digital tools has, on average, a moderate effect on learning gains (<italic>d</italic> =&#x202F;0.34). Beyond cognitive outcomes, digital learning environments have also been shown to influence affective dimensions of learning. For instance, prior research suggests that such environments may help reduce students&#x2019; test anxiety (<xref ref-type="bibr" rid="ref1">Akram and Abdelrady, 2023</xref>). In addition, interactive digital tools can enhance students&#x2019; self-efficacy, perceived instructional clarity, and learning expectations (<xref ref-type="bibr" rid="ref2">Akram and Abdelrady, 2025</xref>), indicating that their benefits extend to motivational and experiential aspects of learning. Taken together, these findings suggest that digital media can support students&#x2019; learning in multiple ways. However, the effectiveness of digital tools depends strongly on how they are implemented. Simply introducing technology into the classroom does not automatically lead to improved performance (<xref ref-type="bibr" rid="ref10">Hattie, 2023</xref>). To ensure that digital media contribute meaningfully to learning, their use should be aligned with four well-established educational principles: multimedia learning, self-paced learning, guided activity, and feedback (<xref ref-type="bibr" rid="ref14">Hillmayr et al., 2020</xref>). Multimedia learning is conceptualized in the <italic>cognitive theory of multimedia learning</italic> (CTML; <xref ref-type="bibr" rid="ref23">Mayer, 2014</xref>). This theory asserts three underlying assumptions. First, humans process information via two separate channels: the visual pictorial and the auditive verbal channel. Second, both channels only have limited, but separate bandwidths, meaning the speed at which information can be processed is limited. Third, currently presented content must be engaged with and processed actively. Active engagement with content is defined via active learning &#x201C;which entails carrying out a coordinated set of cognitive processes during learning (i.e., active processing assumption)&#x201D; (<xref ref-type="bibr" rid="ref23">Mayer, 2014</xref>, p. 43). The first two assumptions explain the benefit of using multimedia in education, since more information can be processed if both channels are used without causing cognitive overload. The third assumption suggests that many digital tools can facilitate learning because they encourage students to actively engage with the content through interactive learning environments.</p>
<p>Furthermore, self-paced learning is considered to be an important part of effective learning because it enables students to progress through topics at their own pace (<xref ref-type="bibr" rid="ref14">Hillmayr et al., 2020</xref>; <xref ref-type="bibr" rid="ref26">Moreno, 2007</xref>). Students perceive tasks as less difficult if they can control the speed at which they work on them (<xref ref-type="bibr" rid="ref26">Moreno, 2007</xref>). It has been shown that students perform better when self-pacing than following prespecified pacing, even when controlling for the total study-time (<xref ref-type="bibr" rid="ref40">Tullis and Benjamin, 2011</xref>).</p>
<p>Additionally, guided activity holds significant value in interactive digital educational settings. Students find tasks enriched with guided activities less demanding than those without (<xref ref-type="bibr" rid="ref27">Moreno and Mayer, 2007</xref>). <xref ref-type="bibr" rid="ref4">Belland et al. (2017)</xref> concluded in their meta-analysis that scaffolding, a form of guided activity, has a significant positive effect (<italic>g</italic><xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> = 0.46) on learning outcomes in STEM education. Encouraging students to actively engage in selecting, organizing, and integrating new information fosters essential and generative processing, thereby enhancing learning outcomes (<xref ref-type="bibr" rid="ref27">Moreno and Mayer, 2007</xref>).</p>
<p>Finally, feedback is an important part of effective learning processes and has been proven to enhance learning gains (<xref ref-type="bibr" rid="ref11">Hattie and Timperley, 2007</xref>). Feedback can be categorized in different ways. Two common categorizations include differentiation by timing (<italic>immediate</italic> vs. <italic>delayed</italic> feedback), and comprehensiveness (<italic>correct response</italic> vs. <italic>elaborated</italic> feedback). Immediate feedback has been shown to be more beneficial than delayed feedback in applied settings such as classrooms (<italic>d</italic>&#x202F;=&#x202F;0.28; <xref ref-type="bibr" rid="ref19">Kulik and Kulik, 1988</xref>). Van <xref ref-type="bibr" rid="ref42">Van Der Kleij et al. (2015)</xref> showed that elaborated feedback results in larger effect sizes (<italic>g</italic>&#x202F;=&#x202F;0.49) than feedback only consisting of correct responses (<italic>g</italic>&#x202F;=&#x202F;0.32) or feedback only consisting of information whether the answer given was correct (<italic>g</italic>&#x202F;=&#x202F;0.05).</p>
<p>As interactive learning environments enable students to engage in multimedia, individualized, self-paced learning processes with direct feedback, they are expected to facilitate effective learning and enhance learning outcomes.</p>
</sec>
<sec id="sec4">
<label>2.2</label>
<title>Intelligent tutoring systems</title>
<p>An example of a digital tool designed to incorporate all these features &#x2014;multimedia, self-pacing, guided activity, elaborated feedback&#x2014;are ITS. The following section provides an overview of ITS and the current state of research regarding their effectiveness.</p>
<p>Computer tutoring systems in schools have been around for almost 60&#x202F;years (<xref ref-type="bibr" rid="ref3">Atkinson, 1968</xref>). They are generally divided into two generations. The first generation is known as <italic>computer assisted instruction</italic> (CAI) (<xref ref-type="bibr" rid="ref18">Kulik and Fletcher, 2016</xref>; <xref ref-type="bibr" rid="ref44">VanLehn, 2011</xref>), while systems from the second generation are usually called ITS (<xref ref-type="bibr" rid="ref44">VanLehn, 2011</xref>). The main difference between CAI and ITS lies in the level of adaptivity and the types of scaffolding provided. While there are various definitions of ITS (e.g., <xref ref-type="bibr" rid="ref20">Ma et al., 2014</xref>; <xref ref-type="bibr" rid="ref31">Shute and Zapata-Rivera, 2007</xref>), most emphasize that, unlike CAI, ITS not only offer corrective feedback and hints but also allow students to choose their approach. Additionally, ITS provide adaptive feedback and feedback for intermediate steps, known as sub-step feedback (<xref ref-type="bibr" rid="ref43">VanLehn, 2006</xref>, <xref ref-type="bibr" rid="ref44">2011</xref>). VanLehn calls this behavior the <italic>inner loop</italic>. The <italic>outer loop</italic>, in contrast, governs the type and difficulty of tasks a student is presented with within the system. In practice, this means ITS can, for example, assign more challenging tasks to well-performing students while directing underperforming students to review fundamental topics.</p>
<p><xref ref-type="bibr" rid="ref20">Ma et al. (2014)</xref> have summarized the three features an ITS needs in order to conduct these loops properly:</p>
<p>&#x201C;An ITS is a computer system that for each student:</p>
<list list-type="order">
<list-item>
<p>Performs tutoring functions by (a) presenting information to be learned, (b) asking questions or assigning learning tasks, (c) providing feedback or hints, (d) answering questions posed by students, or (e) offering prompts to provoke cognitive, motivational, or metacognitive change.</p>
</list-item>
<list-item>
<p>By computing inferences from student responses constructs either a persistent multidimensional model of the student&#x2019;s psychological states (such as subject matter knowledge, learning strategies, motivations, or emotions) or locates the student&#x2019;s current psychological state in a multidimensional domain model.</p>
</list-item>
<list-item>
<p>Uses the student modeling functions identified in point 2 to adapt one or more of the tutoring functions identified in point 1.&#x201D; (p. 902).</p>
</list-item>
</list>
<sec id="sec5">
<label>2.2.1</label>
<title>Effectiveness of ITS</title>
<p>The effect size reported for ITS in education is substantially influenced by both study design and subject matter. Specifically in mathematics, the body of research remains inconclusive.</p>
<p>A meta-analysis conducted by <xref ref-type="bibr" rid="ref44">VanLehn (2011)</xref> found an effect size of <italic>d</italic>&#x202F;=&#x202F;0.76, which is substantially larger than the effect sizes for CAI (<italic>Glass&#x2019; ES</italic><xref ref-type="fn" rid="fn0002"><sup>2</sup></xref> = 0.35; <xref ref-type="bibr" rid="ref38">Tamim et al., 2011</xref>) or the general usage of technology, (<italic>d</italic>&#x202F;=&#x202F;0.34; <xref ref-type="bibr" rid="ref10">Hattie, 2023</xref>, 293). Other meta-analyses find similar values. <xref ref-type="bibr" rid="ref18">Kulik and Fletcher (2016)</xref> report a median Glass&#x2019; <italic>ES</italic> of 0.66, <xref ref-type="bibr" rid="ref34">Steenbergen-Hu and Cooper (2014)</xref> report values of Hedges&#x2019; <italic>g</italic> between <italic>g</italic>&#x202F;=&#x202F;0.35 and 0.37. <xref ref-type="bibr" rid="ref20">Ma et al. (2014)</xref> report a mean effect size of <italic>g</italic>&#x202F;=&#x202F;0.41.</p>
<p>The effectiveness of ITS varies across studies, influenced by multiple factors such as teacher proficiency, control group composition, instructional methods, assessment types, study duration, and subject domain. These factors will be discussed in more detail in the following.</p>
<p>The teacher&#x2019;s proficiency with the ITS plays a significant role. <xref ref-type="bibr" rid="ref16">Koedinger and Anderson (1993)</xref> reported substantial differences in the effectiveness of the ITS called &#x201C;Angle&#x201D; based on how much experience the teachers had in using this specific ITS. They reported an effect size of Glass&#x2019; <italic>ES</italic>&#x202F;=&#x202F;0.96 for experts and &#x2212;0.23 for novices. This indicates that the unfavorable use of digital tools by teachers can even impede students&#x2019; understanding of the topic. One possible explanation is how the ITS is used. The expert employed Angle as a supplementary resource, whereas the novice relied on it as the primary instructional method. Leaving students working with the ITS for extended periods of time during lessons could result in reduced student-teacher interactions, leading students to feel isolated and consequently less motivated. Furthermore, even if ITS are used supplementary (e.g., for homework) they could have adverse effects if not properly implemented. <xref ref-type="bibr" rid="ref10">Hattie (2023)</xref> highlights the importance of integrating homework into the school&#x2019;s curriculum for it to be effective. If teachers stop grading or discussing homework because the ITS provides feedback, students may perceive their work as undervalued.</p>
<p>Next, to an adequate implementation, the effect size measured depends on the nature of the control group (<xref ref-type="bibr" rid="ref20">Ma et al., 2014</xref>). Specifically, when comparing the use of ITS to non-ITS computer-based instruction, the average effect size was <italic>g</italic>&#x202F;=&#x202F;0.57. In contrast, when ITS use was compared to small group instruction without digital tools, the average effect size was <italic>g</italic>&#x202F;=&#x202F;&#x2212;0.11. So, while ITS seem to be more effective than CAI, they seem to be less effective than human tutoring in small groups. In addition, ITS was more effective as a supplementary tool than as the primary method of instruction (see <xref ref-type="bibr" rid="ref20">Ma et al., 2014</xref>, p. 909).</p>
<p>Additionally, <xref ref-type="bibr" rid="ref18">Kulik and Fletcher (2016)</xref> report a difference based on the type of test used to measure student achievement: Evaluations of the cognitive tutor, a prominent American ITS, consistently showed a positive significant effect (<italic>Glass&#x2019; ES</italic>&#x202F;=&#x202F;0.73) when evaluated with tests that were developed to measure the type of tasks focused on in the program. However, when using standardized tests, no significant effect was found (<italic>ES</italic>&#x202F;=&#x202F;0.13<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref>). Evaluations using a combination of both types of tests found an average effect of <italic>ES</italic>&#x202F;=&#x202F;0.45.</p>
<p>Furthermore, shorter interventions also seem to increase the effect size (<xref ref-type="bibr" rid="ref18">Kulik and Fletcher, 2016</xref>). Therefore, studies with long measurement periods have small expected effect sizes. Nevertheless, studies measuring the effect over an entire school year provide a realistic estimate of the learning gains expected from long-term use.</p>
<p>The subject domain also plays a crucial role. Although the positive effect of ITS is empirically well-supported in several domains, the situation is less clear for ITS in mathematics. <xref ref-type="bibr" rid="ref18">Kulik and Fletcher (2016)</xref> demonstrated that the use of ITS in mathematics has a significantly smaller effect size than in other subjects. However, in this meta-analysis, studies focused on mathematics generally also had larger sample sizes and employed standardized tests, which are typically associated with smaller effect sizes. <xref ref-type="bibr" rid="ref33">Steenbergen-Hu and Cooper (2013)</xref> conducted a meta-analysis of ITS used exclusively in mathematics and found no significant learning gains for the use of ITS in mathematics (<italic>g</italic>&#x202F;=&#x202F;0.01 to <italic>g</italic>&#x202F;=&#x202F;0.09). <xref ref-type="bibr" rid="ref20">Ma et al. (2014)</xref> observed that ITS used in humanities and social sciences had significantly higher effect sizes than those used in mathematics and science, while <xref ref-type="bibr" rid="ref13">Higgins et al. (2012)</xref>, on average, reported a greater effect for ITS in mathematics and science than other fields such as literacy. In summary, the body of research on the effectiveness of ITS for mathematics learning is inconclusive.</p>
</sec>
</sec>
</sec>
<sec id="sec6">
<label>3</label>
<title>Current study</title>
<p>Despite the uncertainties regarding empirical effectiveness mentioned in the previous section, the use of ITS in mathematics instruction is widespread. In the US, cognitive tutor, a popular ITS, and its successor, MATHia are used by hundreds of thousands of students (<xref ref-type="bibr" rid="ref41">U.S. Department of Education, Institute of Education Sciences, What Works Clearinghouse, 2009</xref>), even though, learning with the ITS does not seem to increase performance in standardized tests, as mentioned earlier. In Germany, MatheGym, and Bettermarks are two very prominent ITS. MatheGym has approximately 150,000 users (as of April, 2024) (<xref ref-type="bibr" rid="ref22">MatheGym, 2025</xref>). Bettermarks is used by 500,000 students according to self-reported numbers by the company (<xref ref-type="bibr" rid="ref5">Bettermarks, 2025</xref>). In addition, at least 7 out of 16 federal states in Germany have state-wide licenses for Bettermarks. Therefore, it is a relevant question how effective these systems are in practice. Due to its broad use in Germany, we selected the Bettermarks system to investigate the effectiveness of ITS in supporting mathematics learning.</p>
<sec id="sec7">
<label>3.1</label>
<title>Research question</title>
<p>We opted to analyze the effect in a real, ecologically valid setting, where individual teachers had the freedom to determine the extent of ITS usage in their respective classes. This design was chosen because it allowed to investigate the effect sizes of ITS usage in real-world conditions. As mentioned above, the effectiveness of ITS depends on many different factors&#x2014;some of which may differ substantially between experimental and field settings&#x2014;including factors beyond our control. Therefore, we chose to observe the implementation of an ITS in an authentic educational context, and, more specifically, to analyze how the frequency of ITS use is related to learning outcomes.</p>
<p>If the ITS is effective, we would expect students who use it more frequently to show greater learning gains in mathematics performance, even when controlling for covariates relevant to learning. Since the system is designed to support students in mastering curricular content, these learning gains will be assessed based on students&#x2019; performance on standard curricular tasks. Hence, our research question is:</p>
<disp-quote>
<p><italic>RQ</italic>: What is the relationship between the frequency of ITS use and student learning gains in curricular mathematics tasks?</p>
</disp-quote>
<p>As mentioned above, on the one hand, there are reasons to expect a positive effect of the ITS due to the effectiveness of multimedia learning, adaptive and immediate feedback, adaptive tasks, and self-paced learning. On the other hand, reduced direct teacher-student interactions and the resulting reduced social support lead to an expectation of a negative effect, as does the constraint to solve tasks in a specific way dictated by the ITS through its scaffolding. Therefore, it is not clear whether the use of ITS in learning mathematics leads to a higher or lower performance gain.</p>
</sec>
</sec>
<sec sec-type="methods" id="sec8">
<label>4</label>
<title>Methods</title>
<sec id="sec9">
<label>4.1</label>
<title>Design</title>
<p>The study was conducted as a one-year longitudinal study with pre- and posttest. In order to investigate authentic ITS use, the teachers were not given any regulations regarding ITS use. They were free to use the system in their classes as much or as little as they deemed appropriate. Consequently, substantial variance in ITS usage was anticipated. In our design, this natural variability in ITS usage was used to estimate the expected effect of ITS usage on learning gains under common conditions.</p>
<p>At the beginning and end of the school year, a computer-based questionnaire and test was administered. The pretest was conducted between September and November 2021, while the posttest took place in June or July 2022. The exact timing of test administration was determined by the respective teachers, resulting in varying time intervals between pre- and posttests across classes. The average time interval between pre- and posttest was <italic>M</italic>&#x202F;=&#x202F;250&#x202F;days (<italic>SD</italic>&#x202F;=&#x202F;21). Mathematics performance was measured in both tests. Given the correlational nature of the study design, a comprehensive set of student-related variables was recorded in order to control for individual differences and thus enable the estimation of the effect of ITS usage (see Section 3.4 Instruments).</p>
<p>Furthermore, throughout the school year, the activity of individual students in the ITS was automatically recorded in the form of aggregated data. Students completed so-called <italic>worksheets</italic> within the ITS. Each worksheet comprised a set of similar tasks on a common topic. Each time a worksheet was completed by a student, one datapoint was collected, containing three pieces of information: The number of tasks on the worksheet, the number of tasks solved correctly, and the start time of the work on the worksheet. For technical reasons, more detailed data could not be collected.</p>
</sec>
<sec id="sec10">
<label>4.2</label>
<title>Sample</title>
<p>The study was conducted accompanying the rollout of a test license for the used ITS in three counties in the German federal state Schleswig-Holstein. All schools&#x2014;and by extension, all students&#x2014;in those counties had free access to the ITS, regardless of their participation in the study. The study was conducted with 7th and 8th grade students from 82 classes across 13 schools. The pretest was completed by 1,673 students, while the posttest was completed by 1,309 students. In total, 1,062 students were successfully matched with both a pretest and a posttest. Of these students, 120 could not be linked to an account in the ITS and were therefore excluded from the longitudinal analysis. Additionally, two more students had to be removed because they were the only participants in their respective classes with parental consent, which would have caused complications during the analysis. Thus, data from 940 students across 57 classes were available for longitudinal analysis. However, data from students with only one available measurement point were still used when feasible (e.g., scaling the Rasch model).</p>
<p>The average age of the sample used for longitudinal analysis at the pretest was 12.8&#x202F;years, with a standard deviation of 0.7&#x202F;years. Of these, 489 were female, 445 were male, and 8 did not specify their gender.</p>
</sec>
<sec id="sec11">
<label>4.3</label>
<title>ITS Bettermarks</title>
<p>The ITS Bettermarks is a digital learning platform designed to align with the state-level mathematics curricula of German secondary education and can be used both in the classroom and as a supplementary tool (<xref ref-type="bibr" rid="ref5">Bettermarks, 2025</xref>). The ITS is intended to support the assessment and development of students&#x2019; competencies within the curricular frameworks governing secondary mathematics education in Germany (<xref ref-type="bibr" rid="ref5">Bettermarks, 2025</xref>). Therefore, it offers a comprehensive range of tasks covering the prescribed content of secondary education in Germany. The material provided by the ITS for Grades 7 and 8 shows substantial correspondence with the TIMSS framework (<xref ref-type="bibr" rid="ref28">Mullis et al., 2021</xref>), aligning largely with the content domains Number, Algebra, and Geometry and Measurement, as well as with the cognitive domains Knowing and Reasoning.</p>
<p>Access to the platform is available to both students and teachers via a browser or an app. The content within the ITS is organized in a folder structure (<xref ref-type="fig" rid="fig1">Figure 1</xref>). Each topic has its own <italic>book</italic>, comprising an introduction, several <italic>chapters</italic>, a test to assess understanding of the covered content, and a review section for students to revisit the topic and assess their proficiency. Each chapter consists of various <italic>worksheets</italic>, each containing multiple <italic>tasks</italic>.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Exemplary content structure of bettermarks.</p>
</caption>
<graphic xlink:href="feduc-10-1738655-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Hierarchical diagram depicting layers of educational materials. Top layer labeled "book" includes "Addition and Subtraction of Decimal Numbers." Nested below, "chapter" with "Addition of Decimal Numbers." Further nested, "worksheet" with "Addition of Decimal Numbers - without carry." At bottom, "task" contains "Add the following: 9 + 9.694."</alt-text>
</graphic>
</fig>
<p>Teachers can assign digital worksheets, known as <italic>to do&#x2019;s</italic>, to their students, allowing them to give digital homework, for example. Teachers can view the completion rates of individual students as well as their inputs. Students can also independently select tasks from all books and work on them autonomously.</p>
<p>Most tasks in the worksheets focus on promoting procedural knowledge, but there are also tasks that promote conceptual knowledge (e.g., <xref ref-type="bibr" rid="ref12">Hiebert and Lefevre, 1986</xref>). Tasks often involve performing common standardized procedures, such as adding fractions, determining a function equation, or drawing and calculating the area of a triangle. The ITS contains few tasks that encourage constructive reasoning. As a result, input formats often consist of numbers or fractions. Sometimes more interactive inputs are required, such as digitally drawing a function graph using a toolbox or iconically representing a fraction by dividing and coloring an area.</p>
<p>More complex tasks are broken down into substeps. For example, the breakdown of adding two unlike fractions is as follows:</p>
<list list-type="alpha-lower">
<list-item>
<p>Bring both fractions to the lowest common denominator.</p>
</list-item>
<list-item>
<p>Add the two fractions.</p>
</list-item>
<list-item>
<p>Simplify and convert the result into a mixed number.</p>
</list-item>
</list>
<p>These substeps are worked on sequentially. The ITS provides direct feedback for each input. Some of the feedback is corrective (&#x201C;Sorry, that&#x2019;s not right.&#x201D;), while other feedback is adaptive (&#x201C;You have found a common denominator, but the lowest common denominator is smaller.&#x201D;). If an incorrect input is made twice, this substep is marked as incorrect, and the solution to this substep is shown as a worked example. These features correspond to the inner loop according to <xref ref-type="bibr" rid="ref43">VanLehn (2006</xref>, <xref ref-type="bibr" rid="ref44">2011)</xref>. Additionally, the ITS recognizes when students consistently make specific types of errors. For instance, if a student demonstrates proficiency in adding like fractions but has difficulty with unlike fractions, the system detects this specific knowledge gap. It then assigns practice tasks that directly target the identified area of difficulty. These tasks are presented individually within the domain labeled <italic>knowledge gaps</italic>, enabling focused and tailored practice. This mechanism corresponds to the outer loop according to VanLehn. The structure of this system also qualifies Bettermarks as an ITS, in line with the definition of <xref ref-type="bibr" rid="ref20">Ma et al. (2014)</xref> mentioned in section 2.2.</p>
<p>There were no guidelines for the teachers on how the ITS should be used. Teachers and students could decide freely on the extent of ITS usage and when it occurred (whether during or outside school hours). There were also no guidelines regarding the topics or usage format (group or individual work) although the design of the ITS lends itself more to individual use.</p>
</sec>
<sec id="sec12">
<label>4.4</label>
<title>Instruments</title>
<sec id="sec13">
<label>4.4.1</label>
<title>Performance test</title>
<p>The performance test was constructed based on the content areas of the curriculum for grades 7 and 8 in the federal state Schleswig-Holstein (<xref ref-type="bibr" rid="ref24">Ministerium f&#x00FC;r Schule und Berufsbildung des Landes Schleswig-Holstein, 2014</xref>). The aim was to develop a curriculum-valid measure of students&#x2019; mathematical performance. Since the exact sequence in which these curriculum&#x2019;s topics are taught is determined by individual schools, it was not feasible to anchor specific content areas to a particular grade or to develop separate tests for grades 7 and 8. Instead, using the curriculum as a basis, a comprehensive test for grades 7 and 8 was developed. Content areas that require more advanced background knowledge, such as logarithms and trigonometric functions were excluded, as they were unlikely to be taught before grade 9. The resulting test covered 12 content areas. The first four content areas&#x2014;decimal numbers, common fractions, percentages/interest calculations, and variables/expressions&#x2014;were each assessed using six items. The subsequent six areas&#x2014;negative numbers, area calculations for polygons, linear equations, representations of functions, proportional relationships/rule of three, and linear functions&#x2014;were each represented by five items. The final two areas&#x2014;linear equation systems and inverse proportional relationships&#x2014;were each covered by three items. To construct an appropriate item pool, curriculum-valid items from the International Trends in Mathematics and Science Study (TIMSS), the National Assessment of Educational Progress (NAEP) for 8th grade and the Program for International Student Assessment (PISA) study were selected. These items have been used in several studies over decades and have been optimized based on theoretical concepts and empirical evaluations (<xref ref-type="bibr" rid="ref21">Martin and Kelly, 1996</xref>). Therefore, they provide a recognized and internally valid means of measuring mathematical performance and possess good test-theoretical properties. Among these curriculum-valid items with good test-theoretical properties, items that primarily assessed procedural knowledge (<xref ref-type="bibr" rid="ref12">Hiebert and Lefevre, 1986</xref>) were selected. This decision was motivated by the intention to align the focus of the performance test with the types of tasks provided by the ITS, which primarily support the practice and consolidation of procedural knowledge.</p>
<p>To cover all content areas appropriately, four additional items had to be designed. Thus, the item pool consisted of 60 items in total (48 TIMSS items, seven NAEP items, one PISA item, and four self-constructed items). Of these, 55 items were single-choice items, and five were constructed-response items.</p>
<p>The items were administered in a multi-matrix design (Youden design), with the 60 items divided into six clusters of 10 items each. Each cluster contained items from 10 of the 12 content areas and each performance test consisted of two clusters, i.e., every student answered 20 items.</p>
</sec>
<sec id="sec14">
<label>4.4.2</label>
<title>Noncognitive measurements</title>
<p>Besides the mathematics performance as a cognitive measure, several noncognitive measures were administered. The influence of constructs such as <italic>subject-specific self-concept</italic>, <italic>mathematics anxiety</italic>, <italic>cost-utility</italic>, and <italic>work ethics</italic> on student mathematics performance is empirically well-established (<xref ref-type="bibr" rid="ref10">Hattie, 2023</xref>). Therefore, these constructs were captured in the questionnaire to be used as covariates in the analysis. We also decided to capture the constructs of <italic>subject-specific self-concept</italic> and <italic>subject interest</italic> in the subjects of German and English to use this information in the background model (see section 3.6.4). Additionally, to capture the students&#x2019; experience using the ITS, they were asked to assess the ITS in terms of <italic>usefulness</italic>, <italic>demand</italic>, <italic>affective and cognitive engagement</italic>. The constructs were assessed using Likert scales, with each Likert scale comprising three to nine items on a four-point scale (<italic>strongly disagree</italic>, <italic>disagree</italic>, <italic>agree</italic>, <italic>strongly agree</italic>). The internal consistency of the administered noncognitive scales (Cronbach&#x2019;s <italic>&#x03B1;</italic>) ranged from 0.79 to 0.94.</p>
<p>In addition, the questionnaire asked for some personal data, including the cultural capital of the household (books-at-home-question), as this also has an empirically proven influence on student academic performance (<xref ref-type="bibr" rid="ref29">OECD, 2023</xref>), as well as age, gender, school type, and semester grades. Finally, the students were asked to evaluate the ITS by grading its perceived usefulness on the standard German grading scale, ranging from 1 (<italic>very good</italic>) to 6 (<italic>insufficient</italic>).</p>
</sec>
</sec>
<sec id="sec15">
<label>4.5</label>
<title>Operationalization ITS usage</title>
<p>To investigate the impact of the frequency of ITS usage on learning progress, we first needed to operationalize what &#x201C;usage&#x201D; means in a way that fits the structure of the available data. The log data contained one entry for each time a student opened a worksheet, including the start date and time, but they do not contain any information on when students stopped working. Because of this limitation, we were not able to calculate actual time-on-task and instead focused on how often students initiated work on worksheets. To capture different facets of students&#x2019; engagement with the ITS, we developed five plausible operationalizations of usage frequency.</p>
<p>The five operationalizations are defined as follows:</p>
<list list-type="simple">
<list-item>
<p>1 Number of worksheets opened.</p>
</list-item>
</list>
<p>This operationalization counts every instance of a student opening a worksheet during the measurement period, including repeated interactions with the same worksheet. It is conceptually close to common indicators of engagement, such as time spent using the system, as it reflects the overall volume of interactions. However, it may overrepresent short, dense bursts of activity (e.g., repeated re-openings), which can inflate usage counts without necessarily reflecting meaningful learning activity.</p>
<list list-type="simple">
<list-item>
<p>2 Number of unique worksheets opened.</p>
</list-item>
</list>
<p>Here, each worksheet is counted only once, regardless of how often it is revisited. This measure emphasizes the breadth of content students engaged with and reduces inflation due to repeated openings of the same worksheet. At the same time, it ignores repetition and revisiting, which may be pedagogically relevant, and is less closely related to time-based measures of system use.</p>
<list list-type="simple">
<list-item>
<p>3 Number of unique worksheets opened per hour.</p>
</list-item>
</list>
<p>In this approach, each worksheet is counted at most once per hour. This reduces the influence of very dense activity within short time spans while still allowing repeated engagement with the same worksheet to be reflected over time. A limitation is that the choice of an hourly window is somewhat arbitrary, and the measure may overestimate the usefulness of engagement when the same worksheets are worked on repeatedly.</p>
<list list-type="simple">
<list-item>
<p>4 Number of unique worksheets opened per day.</p>
</list-item>
</list>
<p>This operationalization counts each worksheet at most once per day. It further reduces the impact of short-term bursts of activity and aligns well with daily study routines and typical homework structures. However, it masks variation in within-day intensity and duration of work and treats brief and extensive engagement on the same day equivalently, resulting in a weaker correspondence with time-based engagement measures.</p>
<list list-type="simple">
<list-item>
<p>5 Days on which the ITS was used.</p>
</list-item>
</list>
<p>This measure captures the number of days on which at least one worksheet was opened. It emphasizes the regularity of engagement over time and is closely related to concepts of spaced practice. At the same time, it provides no information about the amount or depth of work completed on a given day and does not distinguish between minimal and extensive daily usage.</p>
<p>Because the period between pre- and posttest differed across classes, all operationalizations were adjusted by dividing the absolute count <inline-formula>
<mml:math id="M1">
<mml:mi>n</mml:mi>
</mml:math>
</inline-formula> by the number of weeks <inline-formula>
<mml:math id="M2">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula> between the two measurements. This yields the standardized usage score.</p>
<disp-formula id="E1">
<mml:math id="M3">
<mml:mtext>ITSU</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
</mml:mfrac>
</mml:math>
</disp-formula>
<p>We consider all five operationalizations valid, as each captures a different facet of how students used the ITS. Since none is theoretically superior, we selected the operationalization with the strongest raw effect size for the main analyses. Raw effect size refers to the association between usage and learning gains without adjusting for covariates. Section 5.1 reports the comparison of these raw effects and identifies which operationalization was chosen for further analysis.</p>
</sec>
<sec id="sec16">
<label>4.6</label>
<title>Data analysis</title>
<sec id="sec17">
<label>4.6.1</label>
<title>Handling of missing data</title>
<p>Some of the questionnaires and tests contained missing data. Depending on the type of data, we took different approaches. A missing answer in the performance test was scored with zero points, since this meant that a student was either unsure of the answer and skipped the question or that the student was too slow to reach this question. Missing data in the covariates, however, was imputed not to lose students with only a few missing entries. Since for any given variable, only a small part of the data were missing (&#x003C;5% in most cases), the missing data of the covariates was imputed via the R Package <italic>missForest</italic> (v1.5) (<xref ref-type="bibr" rid="ref35">Stekhoven, 2022</xref>).</p>
</sec>
<sec id="sec18">
<label>4.6.2</label>
<title>Descriptive statistics</title>
<p>The items were dichotomously rated as correct or incorrect. The solution rate in the pretest was 46%, which improved to 54% in the posttest. The data from pre- and posttest were scaled by a Rasch model that allowed us to score the students&#x2019; performance on the logit metric. The average learning gain was approximately 0.39 logit over 250&#x202F;days, which extrapolates to a learning gain of 0.58 logit per schoolyear. Studies on mathematics performance in Germany with similar students found average increases from 0.5 SD (PALMA) (<xref ref-type="bibr" rid="ref45">vom Hofe et al., 2009</xref>) to 0.6&#x2013;0.7 SD per year (<xref ref-type="bibr" rid="ref17">K&#x00F6;ller et al., 2000</xref>), which fit these results very well, assuming a sample standard distribution of one logit.</p>
</sec>
<sec id="sec19">
<label>4.6.3</label>
<title>Quality assessment</title>
<sec id="sec20">
<label>4.6.3.1</label>
<title>DIF analysis</title>
<p>To assess the reliability of the performance tests, a graphical DIF analysis was conducted between the pretest and posttest groups (<xref ref-type="fig" rid="fig2">Figure 2</xref>).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Graphical DIF analysis comparing pretest and posttest item difficulties.</p>
</caption>
<graphic xlink:href="feduc-10-1738655-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Scatter plot showing the relationship between item difficulties in pretest and posttest in logit. Data points cluster around a reference line with areas of negligible and small to moderate differential item functioning (DIF) highlighted. A legend explains symbols and shading.</alt-text>
</graphic>
</fig>
<p>The item difficulties were estimated separately for both tests and the resulting difference in difficulty for each item was compared. Ideally, the increase in mathematics performance should result in a uniform decrease in item difficulty for all items (better performing students solve items more frequently and therefore, by definition, the item difficulty decreases).</p>
<p>If the difficulty change of an item significantly deviates from this trend, it is referred to as <italic>differential item functioning</italic> (DIF) (<xref ref-type="bibr" rid="ref47">Zumbo, 1999</xref>), as the items function (i.e., what is measured by that item) is different in both groups.</p>
<p><xref ref-type="bibr" rid="ref39">Trist&#x00E1;n (2006)</xref> considers an absolute DIF of less than 0.43 logits as negligible, between 0.43 and 0.64 logits as small to moderate, and greater than 0.64 logits as moderate to large. According to these rules of thumb, four items show small to moderate DIF, and one item shows moderate to large DIF. These items were not excluded directly, but further quality assessment was conducted in the form of infit and item-total correlations.</p>
</sec>
<sec id="sec21">
<label>4.6.3.2</label>
<title>Item quality</title>
<p>To assess the quality of the Rasch model, item-total correlations and infits were computed. The item-total correlation is a classical measurement to access the discrimination of an item. The item-total correlation for the utilized items ranged between 0.24 and 0.59, indicating an acceptable strength of discrimination.</p>
<p>The weighted mean square (infit) is a standard measure for assessing the fit of a Rasch model. In an item response theory (IRT) model, a specific level of variance is expected for each item. An infit value of 1.0 indicates the presence of the expected level of variance. A value above 1.0 suggests more variance (thus less discrimination) than expected, while a value below 1.0 suggests less variance (thus more discrimination) than expected. Acceptable values rage from 0.8. to 1.2 (<xref ref-type="bibr" rid="ref8">Harks et al., 2014</xref>). The infit values ranged between 0.87 and 1.15, indicating a good fit to the model. As all items demonstrated satisfactory infit statistics, they were retained for further analysis&#x2014;even the one item that exhibited moderate to large differential item functioning (DIF).</p>
</sec>
</sec>
<sec id="sec22">
<label>4.6.4</label>
<title>Model structure and plausible values</title>
<p>To use all available information, item difficulties and person scores were estimated in a two-step-process using virtual persons (<xref ref-type="bibr" rid="ref37">Tabachnick and Fidell, 2019</xref>). In the first step, all 2,982 tests were treated as if submitted during a single measurement point instead of being submitted from pre- and posttest. The item difficulties were then estimated with a unidimensional Rasch model according to <xref ref-type="bibr" rid="ref9">Hartig and K&#x00FC;hnbach (2006)</xref>, based on the result of all of these (partly virtual) students. In the second step, these item difficulties were fixed and used to estimate the person abilities of the 940 students in the longitudinal sample. The estimation of the item difficulties was conducted with an EAP reliability of 0.83 and a WLE reliability of 0.76, indicating good precision (<xref ref-type="bibr" rid="ref15">Kline, 2000</xref>).</p>
<p>To generate plausible values (PVs), an extensive background model was applied, which accounted for the hierarchical structure of the dataset. When analyzing samples as a whole with Rasch models, PVs are often preferred over WLEs due to their superior performance (<xref ref-type="bibr" rid="ref25">Mislevy et al., 1992</xref>). Because of the uncertainty inherent to IRT, each student is assigned both an estimated ability score as well as the variance of that score as a measure of uncertainty. The estimated score and the corresponding variance together with the variables in the background model are used to create a probability distribution of possible person ability scores for each student (posterior distribution). PVs are random draws of the ability score based on this posterior distribution.</p>
<p>The PVs for this analysis were generated according to <xref ref-type="bibr" rid="ref9">Hartig and K&#x00FC;hnbach (2006)</xref>, using a two-dimensional Rasch model with pre- and posttest forming the two dimensions. Fifty PVs were drawn for each pre- and posttest of the 940 students in our longitudinal study.</p>
<p>To model the structure of the data (students within classes within schools) adequately, we employed hierarchical linear modelling (HLM). HLM represents a method to analyze effects both on an individual level and on a class level while reflecting and preserving the inherent structure of the dataset (<xref ref-type="bibr" rid="ref7">Field et al., 2012</xref>). In this case, level 1 represents individual students, while level 2 represents the different classes. Since the classes were from 13 different schools, adding a third level to reflect this structure in the model would have been possible. We decided against a third level partly because there was very little variance at the school level (ca. 5%) and partly because there were not enough schools for reliable parameter estimation.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="sec23">
<label>5</label>
<title>Results</title>
<p>We begin by presenting the descriptive analysis of ITS usage, followed by the results from students&#x2019; perspective on the ITS. The model development and the resulting multilevel models will be reported last.</p>
<sec id="sec24">
<label>5.1</label>
<title>ITS usage</title>
<p>Overall, the students completed 61,051 worksheets, indicating that each student on average completed about 2 worksheets per school week (<italic>SD</italic>&#x202F;=&#x202F;2.4). The average solution rate was 34%, meaning roughly one in three given answers was correct. Knowledge gaps (see section 3.5) comprised 1.5% of the opened worksheets. If students worked on worksheets multiple times, each instance was counted separately. Approximately one-third of ITS usage occurred during school hours (Monday to Friday, 8&#x202F;a.m. to 2&#x202F;p.m.), while the remaining two-thirds occurred at home.</p>
<p>The operationalizations for ITS usage discussed in 4.5 were compared in terms of their raw effect size on both the class level (class mean) and the individual level (centered at the respective class means). The standardized effect sizes are shown in <xref ref-type="table" rid="tab1">Table 1</xref>.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Raw effect sizes for different operationalizations of ITS usage.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Operationalization</th>
<th align="center" valign="top" colspan="2">Raw size (without pretest)</th>
<th align="center" valign="top" colspan="2">Raw size (with pretest)</th>
</tr>
<tr>
<th align="center" valign="top">Class level</th>
<th align="center" valign="top">Individual</th>
<th align="center" valign="top">Class level</th>
<th align="center" valign="top">Individual</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">1 (All worksheets)</td>
<td align="center" valign="top">0.03</td>
<td align="char" valign="top" char=".">0.06</td>
<td align="char" valign="top" char=".">0.00</td>
<td align="char" valign="top" char=".">0.03</td>
</tr>
<tr>
<td align="left" valign="top">2 (Unique worksheets)</td>
<td align="center" valign="top">&#x2212;0.01</td>
<td align="char" valign="top" char=".">0.22&#x002A;</td>
<td align="char" valign="top" char=".">&#x2212;0.02</td>
<td align="char" valign="top" char=".">0.09&#x002A;</td>
</tr>
<tr>
<td align="left" valign="top">3 (Unique worksheets per hour)</td>
<td align="center" valign="top">&#x2212;0.02-</td>
<td align="char" valign="top" char=".">0.19&#x002A;</td>
<td align="char" valign="top" char=".">&#x2212;0.02</td>
<td align="char" valign="top" char=".">0.08&#x002A;</td>
</tr>
<tr>
<td align="left" valign="top">4 (Unique worksheets per day)</td>
<td align="center" valign="top">&#x2212;0.02</td>
<td align="char" valign="top" char=".">0.15&#x002A;</td>
<td align="char" valign="top" char=".">&#x2212;0.02</td>
<td align="char" valign="top" char=".">0.07</td>
</tr>
<tr>
<td align="left" valign="top">5 (Days used)</td>
<td align="center" valign="top">&#x2212;0.01</td>
<td align="char" valign="top" char=".">0.06</td>
<td align="char" valign="top" char=".">0.00</td>
<td align="char" valign="top" char=".">0.05</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><sup>&#x002A;</sup><italic>p</italic>&#x202F;&#x003C;&#x202F;0.05.</p>
</table-wrap-foot>
</table-wrap>
<p>As can be seen in <xref ref-type="table" rid="tab1">Table 1</xref>, operationalization 2 (counting each worksheet only once per student, no matter how often it was completed during the school year) has the strongest significant raw effect size on the posttest of all operationalizations. Therefore, this operationalization was used for subsequent analysis. Under this operationalization, the ITS usage is visualized in the histogram in <xref ref-type="fig" rid="fig3">Figure 3</xref>.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Distribution of weekly unique worksheet usage in the ITS.</p>
</caption>
<graphic xlink:href="feduc-10-1738655-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar chart showing the distribution of the number of worksheets completed by students per week. The horizontal axis represents the number of worksheets, ranging from zero to nine. The vertical axis shows the number of students, with a peak around one worksheet per week and exponentially decreasing as the number increases.</alt-text>
</graphic>
</fig>
<p>The average ITS usage is about 1.3 <italic>unique</italic> worksheets per school week. A total of 469 students completed less than one worksheet per school week, 301 completed between one and two, 170 completed 2 or more. It can be seen that the ITS usage varied between students and the ITS usage itself is substantial. Therefore, a potential effect of ITS usage on learning gains should show up in the final model.</p>
</sec>
<sec id="sec25">
<label>5.2</label>
<title>Students&#x2019; perspective on the ITS</title>
<p>The items assessing students&#x2019; perspective on working with the ITS&#x2014;specifically in terms of demand, affective and cognitive engagement, and usefulness&#x2014;were averaged into their respective Likert scales.</p>
<p>As shown in <xref ref-type="fig" rid="fig4">Figure 4</xref>, most students found working with the ITS to be relatively low in demand (<italic>M</italic>&#x202F;=&#x202F;1.9, <italic>SD</italic>&#x202F;=&#x202F;0.6). They reported moderate levels of engagement (<italic>M</italic>&#x202F;=&#x202F;2.5, <italic>SD</italic>&#x202F;=&#x202F;0.8), and half of the students somewhat agreed that the ITS was helpful in learning mathematics (<italic>M</italic>&#x202F;=&#x202F;2.8, <italic>SD</italic>&#x202F;=&#x202F;0.7).</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Distribution of student ratings for ITS demand, engagement, and usefulness.</p>
</caption>
<graphic xlink:href="feduc-10-1738655-g004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Box plot showing students ratings of ITS Demand, Engagement, and Usefulness. The y-axis has levels from Strongly Disagree to Strongly Agree. ITS Demand centers around Somewhat Disagree, Engagement slightly below Somewhat Agree, and Usefulness around Somewhat Agree. The center boxes hight is around 1 for all boxes.</alt-text>
</graphic>
</fig>
<p>On the German grading scale (ranging from 1, <italic>very good,</italic> to 6, <italic>insufficient</italic>), students rated the ITS perceived usefulness with an average score of 2.7 (<italic>SD</italic>&#x202F;=&#x202F;1.0). Overall, most students expressed neutral or mildly positive feelings about using the ITS on both the subjective ratings and the grade.</p>
</sec>
<sec id="sec26">
<label>5.3</label>
<title>Model development and results</title>
<p>As described in the Methods section, a series of multilevel models (<xref ref-type="table" rid="tab2">Table 2</xref>) were used to analyze the data. To build a model with a good fit to the dataset, the initial model was progressively made more complex by successively adding predictors. Predictors were added generally following the approach outlined by <xref ref-type="bibr" rid="ref7">Field et al. (2012)</xref>, if (a) there is empirical evidence which links the predictor to student performance and (b) adding the predictor increased the predictive power of the model. Models were compared based on their Akaike information criterion (AIC) and Bayesian information criterion (BIC) values. Both information criteria are measures of goodness of fit. While the values cannot be interpreted in an absolute way, a lower value indicates a better fit. In model M0, only the hierarchical structure of data with no predictors was considered. The intra class correlation (ICC) was 0.53, indicating there was about as much variance on the individual level as on the class level. In models M1 and M2, assessments of prior knowledge (<italic>pretest score</italic> and <italic>marks in mathematics</italic>) were added. In model M3, the noncognitive measures regarding mathematics and students&#x2019; perspective were added. The five different noncognitive measurements regarding mathematics were expectedly moderately correlated (on average 0.43), so it made sense to only add one of them as a covariate. Out of five different scales, attitude towards mathematics had the strongest effect of the posttest score and was therefore included in model M3. In model M4, structural variables at the student and school level&#x2014;grade and school type&#x2014;were added. School type showed a substantial association with posttest performance, even after controlling for prior achievement and other covariates. Grade level, in contrast, was included primarily as a theoretically relevant control variable to account for differences in curricular progression, but did not exhibit an additional effect once prior performance was taken into account. As shown in <xref ref-type="table" rid="tab2">Table 2</xref>, the stepwise addition of predictors from models M1 to M4 was associated with successive improvements in model fit, as indicated by decreasing AIC and BIC values, and with increases in explained variance at both the individual and class level. Including any other predictors, such as gender or self-concept in other subjects, resulted in an increase in both information criteria, AIC and BIC.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Effect sizes and information criteria of different multilevel models.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Variable</th>
<th align="center" valign="top">Model 1</th>
<th align="center" valign="top">Model 2</th>
<th align="center" valign="top">Model 3</th>
<th align="center" valign="top">Model 4</th>
<th align="center" valign="top">Model 5</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" colspan="6">Level 1 (students)</td>
</tr>
<tr>
<td align="left" valign="middle">Pretest</td>
<td align="center" valign="middle">0.83&#x002A;(0.03)</td>
<td align="center" valign="middle">0.77&#x002A;(0.04)</td>
<td align="center" valign="middle">0.77&#x002A;(0.04)</td>
<td align="center" valign="middle">0.75&#x002A;(0.04)</td>
<td align="center" valign="middle">0.75&#x002A;(0.04)</td>
</tr>
<tr>
<td align="left" valign="middle">Marks<sup>a</sup></td>
<td/>
<td align="center" valign="middle">0.12&#x002A;(0.03)</td>
<td align="center" valign="middle">0.11&#x002A;(0.03)</td>
<td align="center" valign="middle">0.11&#x002A;(0.03)</td>
<td align="center" valign="middle">0.11&#x002A;(0.03)</td>
</tr>
<tr>
<td align="left" valign="middle">Math attitude<sup>b</sup></td>
<td/>
<td/>
<td align="center" valign="middle">0.06&#x002A;(0.03)</td>
<td align="center" valign="middle">0.06&#x002A;(0.03)</td>
<td align="center" valign="middle">0.06&#x002A;(0.03)</td>
</tr>
<tr>
<td align="left" valign="middle">ITS usage<sup>b,c</sup></td>
<td/>
<td/>
<td/>
<td/>
<td align="center" valign="middle">0.06 (0.03)</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="6">Level 2 (classes)</td>
</tr>
<tr>
<td align="left" valign="middle">School type</td>
<td/>
<td/>
<td/>
<td align="center" valign="middle">0.57&#x002A;(0.10)</td>
<td align="center" valign="middle">0.58&#x002A;(0.10)</td>
</tr>
<tr>
<td align="left" valign="middle">Grade</td>
<td/>
<td/>
<td/>
<td align="center" valign="middle">0.17 (0.09)</td>
<td align="center" valign="middle">0.17 (0.09)</td>
</tr>
<tr>
<td align="left" valign="middle">ITS-usage (class mean)<sup>b</sup></td>
<td/>
<td/>
<td/>
<td/>
<td align="center" valign="middle">&#x2212;0.01 (0.05)</td>
</tr>
<tr>
<td align="left" valign="middle" colspan="6">Explained variance</td>
</tr>
<tr>
<td align="left" valign="middle">Individual level</td>
<td align="center" valign="middle">55.9%</td>
<td align="center" valign="middle">57.8%</td>
<td align="center" valign="middle">58.3%</td>
<td align="center" valign="middle">58.4%</td>
<td align="center" valign="middle">58.5%</td>
</tr>
<tr>
<td align="left" valign="middle">Class level</td>
<td align="center" valign="middle">83.5%</td>
<td align="center" valign="middle">81.0%</td>
<td align="center" valign="middle">80.6%</td>
<td align="center" valign="middle">91.0%</td>
<td align="center" valign="middle">90.9%</td>
</tr>
<tr>
<td/>
<td/>
<td/>
<td align="center" valign="middle">Model fit</td>
<td/>
<td/>
</tr>
<tr>
<td align="left" valign="middle">AIC</td>
<td align="center" valign="middle">1616.41</td>
<td align="center" valign="middle">1538.27</td>
<td align="center" valign="middle">1536.14</td>
<td align="center" valign="middle">1509.08</td>
<td align="center" valign="middle">1517.54</td>
</tr>
<tr>
<td align="left" valign="middle">BIC</td>
<td align="center" valign="middle">1635.80</td>
<td align="center" valign="middle">1562.32</td>
<td align="center" valign="middle">1565.01</td>
<td align="center" valign="middle">1547.57</td>
<td align="center" valign="middle">1565.66</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Unless otherwise noted, predictors were not standardized. The standard error is given in brackets. ITS, intelligent tutoring system. <sup>a</sup>in Germany, lower marks correspond to better performance, therefore the scale was inverted. <sup>b</sup>Predictor was z standardized. <sup>c</sup>Predictor was group-mean centered. &#x002A;<italic>p</italic>&#x202F;&#x003C;&#x202F;0.05.</p>
</table-wrap-foot>
</table-wrap>
<p>To examine the influence of ITS usage frequency, this factor was incorporated into model M5 at two levels. At the class level, the group-centered mean of ITS usage frequency was introduced for each class. At the individual level, the usage frequency of individual students, centered on the class mean, was included as part of the model. At the class level, we investigated whether classes that used the ITS more frequently (and thus probably traditional materials less frequently) showed greater learning gains compared to classes with lower usage frequency (between-group effect). At the individual level, we examined whether students who completed more tasks than their classmates exhibited higher learning gains (within-group effect). A between-group effect could therefore be interpreted as an improvement in learning gains compared to traditional learning materials, while a within-group effect could suggest that increased learning time enhances learning gains.</p>
<p><xref ref-type="table" rid="tab2">Table 2</xref> presents the hierarchical inclusion of predictors across models M1 to M5. In M1, performance in the pretest is a strong predictor of the performance in the posttest, accounting for approximately 56% of the variance at the individual level and approximately 83% at the class level. The inclusion of marks as an additional predictor in M2 increases the proportion of explained variance and improves model fit. Model M3 shows that a more positive attitude towards mathematics is significantly associated with higher posttest scores. In M4, the influence of school type becomes evident. Controlling for prior performance, mathematics attitude, marks in mathematics and grade, the difference in posttest performance between students attending academic track schools (<italic>Gymnasium</italic>) and those from comprehensive schools (<italic>Gemeinschaftsschule</italic>) is 0.57 logits, indicating substantially greater learning gains among students in the academic track. The inclusion of ITS usage as a predictor in M5 does not improve model fit, as indicated by higher AIC and BIC values compared to Model 4, and the explained variance remains largely unchanged. Furthermore, ITS usage does not exhibit a significant effect on student performance at either the individual or class level.</p>
</sec>
</sec>
<sec sec-type="discussion" id="sec27">
<label>6</label>
<title>Discussion</title>
<sec id="sec28">
<label>6.1</label>
<title>Conclusion</title>
<p>Based on a longitudinal pre&#x2013;posttest design, our study examined whether the frequency of ITS use influenced students&#x2019; learning gains. The multilevel analysis revealed a significant positive effect of ITS usage on mathematics performance (<italic>&#x03B2;</italic>&#x202F;=&#x202F;0.22) on the individual level when not controlling for other variables (left part of <xref ref-type="table" rid="tab1">Table 1</xref>). However, when controlling pretest scores, the observed effect of ITS usage on mathematics performance was notably smaller (<italic>&#x03B2;</italic>&#x202F;=&#x202F;0.09, right part of <xref ref-type="table" rid="tab1">Table 1</xref>), a pattern consistent with findings from a meta-analysis by <xref ref-type="bibr" rid="ref20">Ma et al. (2014)</xref>, which showed that studies accounting for baseline differences tend to report smaller effect sizes. This smaller, yet still significant effect aligns with results reported by <xref ref-type="bibr" rid="ref32">Spitzer (2022)</xref> for the same ITS; however, that study did not account for additional covariates beyond prior performance. In contrast, in our study, when further covariates&#x2014;such as students&#x2019; attitudes toward mathematics, grade level, and school type&#x2014;were included in the model, the effect of ITS use was no longer statistically significant, neither on the individual nor the class level (Model 5 in <xref ref-type="table" rid="tab2">Table 2</xref>). Therefore, the answer to the research question is that no significant relationship was found between the frequency of ITS use and student learning gains in curricular mathematics tasks, once prior performance and other relevant covariates were controlled for. This non-significant effect shows the importance of controlling relevant covariates in a correlational design, even though doing so substantially increases the analytical workload.</p>
<p>The analysis was repeated using alternative operationalizations of ITS usage (as defined in section 4.5), including counting every opened worksheet, limiting each worksheet to one count per hour or per day, and counting only days in which the ITS was used at least once, yielding no significant deviations from the aforementioned results. The effect of ITS use on posttest performance was smaller once pretest performance was controlled for. This indicates that high-achieving students use the ITS more and learn more over the course of a school year. However, our findings do not support the assumption that increased ITS usage directly leads to greater learning gains. Simply using the ITS more frequently&#x2014;potentially at the expense of other instructional approaches&#x2014;does not appear to enhance student learning outcomes. As <xref ref-type="bibr" rid="ref13">Higgins et al. (2012)</xref> aptly put it, &#x201C;&#x2026;it is not whether technology is used (or not) which makes the difference, but how well the technology is used to support teaching and learning&#x201D; (p. 3). In addition, students perceived the ITS as having a moderately positive impact on their mathematics learning. This aligns with the findings from the multilevel model, which suggest that the ITS supports mathematics learning to a similar extent as traditional media.</p>
</sec>
<sec id="sec29">
<label>6.2</label>
<title>Limitations</title>
<p>As the study was conducted in the context of the introduction of the ITS in a federal state in Germany, it was not possible to conduct an experiment with random assignment to a control group not using the ITS. Therefore, the lack of a control group restricts our ability to firmly establish causal effects of the ITS&#x2019;s impact on learning outcomes. Nevertheless, we found substantial variance in the &#x201C;natural&#x201D; usage of ITS among different classes and we could show a large discrepancy between raw and controlled effect sizes of ITS usage, both of which will be important for further investigation.</p>
<p>Due to the limited available log data, it was only possible to analyze the usage frequency of the students. This reduces the explanatory power of our analysis, as other factors&#x2014;such as the social form employed during ITS usage as well as the way teachers implemented the ITS during and beyond school hours&#x2014;could also influence the ITS&#x2019;s effectiveness.</p>
<p>As an additional consequence resulting from the limited log data, ITS usage was operationalized as the frequency with which students opened unique worksheets. As a result, it was not possible to distinguish between different forms of engagement, such as careful, effortful work and rapid task completion, nor to identify non-learning behaviors. This limitation may have weakened the observed relationship between ITS usage and learning gains. Nevertheless, the moderate raw association between ITS usage and performance suggests that this measure still captured a meaningful, though coarse, indicator of students&#x2019; interaction with the ITS.</p>
</sec>
<sec id="sec30">
<label>6.3</label>
<title>Reasons for the lack of an ITS effect</title>
<p>The results of our study raise the question of why the use of the ITS was not effective. In the following, several plausible reasons are discussed. Because the extent to which students actually used the system determines the potential impact of any instructional technology, the discussion first considers the intensity of ITS use observed in our data. After outlining the extent of actual system use, the discussion begins with specific features of the ITS itself including the types of tasks available and the nature of the feedback, and the extent to which the system&#x2019;s adaptive functionality was utilized. Then moves on to challenges related to its use in the classroom, including self-pacing, homework integration, collaborative learning, and the role of teachers in shaping instructional use. Furthermore, a broader perspective is taken by examining the ITS&#x2019;s alignment with principles from CTML and guided activity, before reflecting on aspects of the study design&#x2014;such as the long measurement period&#x2014;that may also have played a role. The section concludes with a discussion of the alignment between the ITS and the performance test.</p>
<p>Although students completed a total of 61,051 worksheets, the average individual usage corresponded to only about 1.3 unique worksheets per school week, with nearly half of the students completing fewer than one unique worksheet per week. Taken together with the null effects in our multilevel models, this suggests that many students may not have reached an intensity of ITS use that is sufficient to produce detectable gains on a broad curriculum-based test.</p>
<p>One contributing factor may be the limited types of tasks that students could engage with in the ITS. Although the system offers a large number of items, many of them focus on highly procedural exercise formats and comparatively few engage students in forms of mathematical activity linked to deeper learning, such as constructive reasoning, modelling real-world situations, communicating mathematical ideas, sketching representations, or working through open or non-routine problems. These forms of activity are widely recognized as essential components of mathematical proficiency because they require students to make sense of mathematical structures, connect representations, and articulate their reasoning (e.g., <xref ref-type="bibr" rid="ref30">Santos-Trigo, 2024</xref>). As outlined in the introduction, the ITS primarily offers tasks with clearly structured scaffolding and step-by-step guidance, but includes relatively few opportunities for tasks that actively promote such constructive or representational reasoning. In this regard, the system may be well suited for supporting procedural fluency but less effective for fostering flexible and conceptual mathematical understanding.</p>
<p>The type of feedback provided by the ITS could also account for the results. Although the system offers immediate feedback at each substep, the depth and quality of this feedback vary considerably: some items provide elaborated explanations, whereas many others offer only corrective &#x201C;right/wrong&#x201D; responses. Research has shown that elaborative feedback is generally more beneficial for learning than purely corrective feedback because it supports the development of conceptual understanding and helps students diagnose their misconceptions (<xref ref-type="bibr" rid="ref42">Van Der Kleij et al., 2015</xref>). In our study, this pattern aligns with students&#x2019; own perceptions: they rated the ITS as only moderately engaging and only somewhat helpful for learning, suggesting that the predominantly corrective feedback may have facilitated procedural success without consistently fostering deeper understanding. For more complex or conceptually demanding tasks, students may therefore have required additional forms of scaffolding beyond immediate correctness indicators.</p>
<p>Another factor that may help explain the absence of significant effects concerns the limited use of the ITS&#x2019;s adaptive knowledge gap functionality. Although the system is designed to identify and address individual misconceptions through targeted follow-up tasks, knowledge gaps accounted for only 1.5% of all opened worksheets, despite an average solution rate of approximately one third (section 5.1). This indicates that, while students frequently made errors during regular worksheet work, these errors translated into only minimal engagement with targeted follow-up tasks addressing identified difficulties. One possible explanation is that knowledge gaps were assigned relatively conservatively by the system; another is that students often did not engage with recommended follow-up tasks once they were available. The present data do not allow a clear distinction between these explanations. Previous research suggests that learning effects of digital tools are particularly pronounced when systems are adaptive (<xref ref-type="bibr" rid="ref14">Hillmayr et al., 2020</xref>). Against this background, the largely underutilized adaptive functionality observed in the present study provides a plausible explanation for why the ITS did not yield stronger effects.</p>
<p>In this context, how teachers handled the use of the ITS also likely influenced the effectiveness. For the portion of ITS usage that occurred during school hours, it is not entirely clear to what extent students were able to work with the ITS at their own pace, as this likely depended on how much autonomy teachers allowed during ITS use. As found by <xref ref-type="bibr" rid="ref26">Moreno (2007)</xref>, allowing students to self-pace lowers perceived difficulty.</p>
<p>In addition to usage during school hours, a substantial part (approximately two thirds) of the ITS usage was conducted outside school hours. This indicates that a significant fraction of homework was completed and handed in through the ITS. As found by <xref ref-type="bibr" rid="ref10">Hattie (2023)</xref>, the evaluation and discussion of homework is important for students. If the digital homework was not properly discussed in class because the ITS provided worked examples, this could hinder students to ask questions and get clarification about certain tasks and therefore negatively impact their learning gains.</p>
<p>Another important factor is collaborative learning, which has been shown to enhance learning outcomes. As outlined in the theoretical framework, students who engage in collaborative learning tend to learn more effectively, especially when technology facilitates this process (<xref ref-type="bibr" rid="ref6">Chen et al., 2018</xref>; <xref ref-type="bibr" rid="ref36">Sung et al., 2017</xref>). However, the ITS analyzed here is not designed to support collaborative learning in pairs or small groups. Given that the majority of ITS use in our sample occurred outside school hours, much of the work with the system likely took place individually rather than in pairs or groups. This usage pattern, together with the lack of collaborative features in the system, may help explain why potential benefits of collaborative learning with technology did not materialize here. Consequently, greater use of the ITS may limit opportunities for students to collaborate and engage in meaningful discussions about mathematical concepts. This lack of interaction could result in smaller learning gains overall.</p>
<p>Teachers&#x2019; decisions regarding the adoption of the ITS constitute another important factor in interpreting the findings. Although the system was available to all participating schools, its use was entirely voluntary, and teachers retained full discretion over whether they used the ITS at all and how intensively it was employed in their classes. Even though the ITS was adopted in many classrooms, overall usage intensity remained low and varied substantially across classes, indicating that mere availability did not translate into sustained or systematic use. This pattern is consistent with research showing that teachers&#x2019; adoption of digital tools depends strongly on their perceived usefulness, attitudes toward technology, and institutional conditions (<xref ref-type="bibr" rid="ref9002">Teo, 2011</xref>). Moreover, when digital tools are not embedded in shared instructional routines at the school level, uptake tends to remain uneven, which is typically associated with reduced or absent effects on student learning (<xref ref-type="bibr" rid="ref9001">Ertmer and Ottenbreit-Leftwich, 2010</xref>).</p>
<p>Beyond adoption, the instructional use of the ITS is therefore likely to have influenced its effectiveness. Although teachers who chose to use the system were free to embed it into their teaching as they saw fit, no mandatory training or instructional guidance accompanied its implementation. Meta-analytic evidence indicates that digital interventions yield substantially larger learning effects when teacher training is provided than when it is not (<xref ref-type="bibr" rid="ref14">Hillmayr et al., 2020</xref>). Teacher training typically focuses not only on technical operation but also on how digital tools can be integrated into instruction, how students&#x2019; work can be monitored, and how digital tasks can be connected to classroom discussion and follow-up activities. As a result, the pedagogical use of the ITS likely varied widely, and in some cases may not have been closely aligned with instructional goals or curricular demands. Earlier experimental work further suggests that insufficient preparation can, in some cases, even be associated with negative learning effects (<xref ref-type="bibr" rid="ref16">Koedinger and Anderson, 1993</xref>). Taken together, these findings highlight the teacher&#x2019;s role in shaping the effectiveness of digital tools and provide a plausible explanation for the null effects observed under conditions of voluntary and unsupported ITS use.</p>
<p>In addition to the lack of collaborative learning, the results can in part be explained with CTML (<xref ref-type="bibr" rid="ref23">Mayer, 2014</xref>). Since most tasks found in the ITS operate at a symbolic or algebraic level, according to CTML, they do not make full use of the pictorial channel, leading to less effective learning.</p>
<p>In contrast to the previously discussed theoretical concepts, the ITS adheres to the principles of guided activity very well. The ITS incorporates well-structured substeps with clear instructions, aligning with the principles of guided activity. This design facilitates task completion by providing explicit guidance to students and, hence, assumingly fosters effective learning.</p>
<p>The study design itself also plays a role in interpreting the results. As <xref ref-type="bibr" rid="ref18">Kulik and Fletcher (2016)</xref> found, longer measurement periods correlate with smaller effect sizes. One possible explanation for this is that the novelty factor of the ITS diminishes over time.</p>
<p>The ITS and the performance test were closely aligned with respect to both curricular content and the types of competencies addressed. As described in sections 4.3 and 4.4, both were developed with reference to the same state-level curricula, resulting in substantial overlap in the mathematical content covered. In addition, the performance test primarily assessed procedural knowledge, which corresponds to the predominant focus of the tasks provided by the ITS. The observed pre&#x2013;post learning gains were of a magnitude that is typical for students at this grade level over a school year, suggesting that the performance test was sensitive to the mathematical learning that occurred during the measurement period. Against this background, it is unlikely that the absence of a significant effect of ITS use can be attributed to a mismatch between the learning environment and the performance test.</p>
<p>In summary, while the ITS aligns well with the principle of guided activity, other aspects may help explain its limited effectiveness. The types of tasks and the feedback provided may not have been sufficient to support deeper learning. Opportunities for self-paced work in class were likely restricted, and digital homework may have lacked in-class follow-up. Key principles from CTML also appear only partially addressed. Lastly, the long measurement period may reduce the measured effect sizes in digital learning due to diminishing novelty effects.</p>
<p>Considering limitations of the ITS and likely challenges in its integration into classroom settings, the absence of significant effects are reasonable. They align with <xref ref-type="bibr" rid="ref10">Hattie (2023)</xref> findings that the mere presence of technology in the classroom does not guarantee improved learning outcomes. Instead, it is probably the way technology is integrated into the teaching process that truly matters.</p>
<p>Given the ecologically valid nature of the study, the results can be interpreted as a realistic evaluation of the implementation of an ITS. In Germany, if schools are provided with technology, such as an ITS, the frequency and manner in which this technology is used is almost entirely at the discretion of the individual teacher. The hope associated with the provision of an ITS is that this type of technology use will optimize learning processes and lead to sustainably higher performances in international (PISA) and national standardized evaluations in Germany.</p>
</sec>
<sec id="sec31">
<label>6.4</label>
<title>Implications</title>
<p>A more precise delineation of the capabilities and limitations of digital media in education is necessary. In particular, it is important to address how effective digital tools are in supporting students when revisiting familiar concepts as opposed to engaging with new ones. Also, exploring the importance of students&#x2019; knowledge about the ITS and their metacognitive awareness of learning with ITS merit attention.</p>
<sec id="sec32">
<label>6.4.1</label>
<title>Comprehensive analysis approach</title>
<p>Current research often focuses either on measuring the effectiveness of ITS with limited process data or on analyzing process data without adequately considering external factors such as performance in standardized tests and relevant noncognitive measurements. To gain a deeper understanding of why ITS can facilitate effective learning, a holistic approach is needed. By combining outcome-based evaluation with contextual and usage-related information, the present study represents a substantial step in this direction. Access to even more comprehensive data&#x2014;effectiveness measurement and process analysis&#x2014; would allow for an even more nuanced analysis of how the ITS was used and how it impacts learning. Therefore, more comprehensive data on feedback received, utilization patterns, and engagement with features like worked examples would be useful for understanding how the ITS was used. In addition, data on student use of the ITS and teacher support inside and outside classes is also important for identifying efficient ITS usage. Gathering and analyzing all this information (learning gains and relevant noncognitive measurements, detailed log data, and process data provided by students and teachers) would possibly allow finding effective implementations of the ITS in teaching and learning.</p>
<p>In summary, future research should strive to adopt a broad perspective and comprehensive analysis approach, aiming to uncover general underlying trends rather than focusing solely on specific features of individual systems. Addressing these research desiderata will enhance our understanding of how digital tools can be optimally leveraged to support teaching and learning processes.</p>
</sec>
<sec id="sec33">
<label>6.4.2</label>
<title>Potential of digital media in learning mathematics</title>
<p>Our findings suggest several additional directions for future research on the use of digital media in education. One area that deserves attention is collaborative learning. Collaboration&#x2014;especially when supported by digital tools&#x2014;can improve learning outcomes (<xref ref-type="bibr" rid="ref6">Chen et al., 2018</xref>). However, many digital learning environments are primarily designed for individual use. This may be because it is difficult to combine personalized instruction with group-based learning. Still, finding ways to enable student interaction and exchange within digital learning settings could help increase their overall effectiveness.</p>
<p>Another important point concerns the type of feedback students receive. In many systems, feedback is limited to whether an answer is correct or not. But research shows that elaborated feedback&#x2014;explaining why an answer is right or wrong&#x2014;leads to better learning outcomes. New developments in artificial intelligence could make it easier to provide such feedback in a flexible and adaptive way. For example, AI-based systems could react to specific student errors without the need to pre-program all possible explanations, as a recent meta-analysis shows (<xref ref-type="bibr" rid="ref46">Yi et al., 2025</xref>). Exploring this potential could be a valuable area for future research.</p>
<p>A third aspect relates to the use of multiple and dynamic representations. One of the key advantages of digital media is the ability to visualize abstract content in new ways&#x2014;for instance, through interactive graphics or simulations. In mathematics education, examples include dynamic representations of fractions or functions. These tools offer possibilities that traditional media cannot, but more research is needed on how to integrate them effectively into teaching.</p>
<p>Although these points are based on the analysis of one ITS, they apply to digital learning tools more broadly. Similar challenges and opportunities can be found in other technologies such as <italic>dynamic geometry environments</italic> (DGEs) or <italic>spreadsheets</italic>. The findings support the idea that it is not the technology itself that determines its effectiveness, but how it is used in teaching. Simply introducing digital tools into the classroom is not enough&#x2014;what matters is how they are embedded into instruction, how students interact with them, and how teachers guide their use. Future research should therefore focus not only on the tools themselves, but also on the pedagogical strategies and learning environments in which they are applied.</p>
</sec>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec34">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="sec35">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Ethics Committee of the IPN &#x2013; Leibniz Institute for Science and Mathematics Education. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation in this study was provided by the participants&#x2019; legal guardians/next of kin.</p>
</sec>
<sec sec-type="author-contributions" id="sec36">
<title>Author contributions</title>
<p>JS: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft, Formal analysis, Software, Data curation, Validation, Methodology, Visualization. TR: Software, Formal analysis, Methodology, Project administration, Conceptualization, Investigation, Writing &#x2013; review &#x0026; editing, Validation, Supervision. GN: Conceptualization, Supervision, Methodology, Writing &#x2013; review &#x0026; editing. AH: Supervision, Writing &#x2013; review &#x0026; editing, Project administration, Methodology, Conceptualization.</p>
</sec>
<sec sec-type="COI-statement" id="sec37">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec38">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was used in the creation of this manuscript. Generative AI was used to improve grammar and style of the manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec39">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Akram</surname><given-names>H.</given-names></name> <name><surname>Abdelrady</surname><given-names>A. H.</given-names></name></person-group> (<year>2023</year>). <article-title>Application of classpoint tool in reducing EFL learners&#x2019; test anxiety: an empirical evidence from Saudi Arabia</article-title>. <source>J. Comput. Educ.</source> <volume>10</volume>, <fpage>529</fpage>&#x2013;<lpage>547</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s40692-023-00265-z</pub-id></mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Akram</surname><given-names>H.</given-names></name> <name><surname>Abdelrady</surname><given-names>A. H.</given-names></name></person-group> (<year>2025</year>). <article-title>Examining the role of ClassPoint tool in shaping EFL students&#x2019; perceived E-learning experiences: a social cognitive theory perspective</article-title>. <source>Acta Psychol.</source> <volume>254</volume>:<fpage>104775</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.actpsy.2025.104775</pub-id>, <pub-id pub-id-type="pmid">39923551</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Atkinson</surname><given-names>R. C.</given-names></name></person-group> (<year>1968</year>). <article-title>Computerized instruction and the learning process</article-title>. <source>Am. Psychol.</source> <volume>23</volume>, <fpage>225</fpage>&#x2013;<lpage>239</lpage>. doi: <pub-id pub-id-type="doi">10.1037/h0020791</pub-id>, <pub-id pub-id-type="pmid">5647875</pub-id></mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Belland</surname><given-names>B. R.</given-names></name> <name><surname>Walker</surname><given-names>A. E.</given-names></name> <name><surname>Kim</surname><given-names>N. J.</given-names></name> <name><surname>Lefler</surname><given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Synthesizing results from empirical research on computer-based scaffolding in STEM education: a meta-analysis</article-title>. <source>Rev. Educ. Res.</source> <volume>87</volume>, <fpage>309</fpage>&#x2013;<lpage>344</lpage>. doi: <pub-id pub-id-type="doi">10.3102/0034654316670999</pub-id>, <pub-id pub-id-type="pmid">28344365</pub-id></mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="other"><person-group person-group-type="author"><collab id="coll1">Bettermarks</collab></person-group>. (<year>2025</year>) <source>Das adaptive Lernsystem f&#x00FC;r Mathematik [The adaptive learning system for mathematics]</source>. Available online at: <ext-link xlink:href="https://de.bettermarks.com" ext-link-type="uri">https://de.bettermarks.com</ext-link> (Accessed April 3, 2024)</mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>J.</given-names></name> <name><surname>Wang</surname><given-names>M.</given-names></name> <name><surname>Kirschner</surname><given-names>P. A.</given-names></name> <name><surname>Tsai</surname><given-names>C.-C.</given-names></name></person-group> (<year>2018</year>). <article-title>The role of collaboration, computer use, learning environments, and supporting strategies in CSCL: a meta-analysis</article-title>. <source>Rev. Educ. Res.</source> <volume>88</volume>, <fpage>799</fpage>&#x2013;<lpage>843</lpage>. doi: <pub-id pub-id-type="doi">10.3102/0034654318791584</pub-id></mixed-citation></ref>
<ref id="ref9001"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ertmer</surname><given-names>P. A.</given-names></name> <name><surname>Ottenbreit-Leftwich</surname><given-names>A. T.</given-names></name></person-group> (<year>2010</year>). <article-title>Teacher Technology Change: How Knowledge, Confidence, Beliefs, and Culture Intersect</article-title>. <source>Journal of Research on Technology in Education</source> <volume>42</volume>: <fpage>255</fpage>&#x2013;<lpage>84</lpage>. doi: <pub-id pub-id-type="doi">10.1080/15391523.2010.10782551</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Field</surname><given-names>A.</given-names></name> <name><surname>Miles</surname><given-names>J.</given-names></name> <name><surname>Field</surname><given-names>Z.</given-names></name></person-group> (<year>2012</year>). <source>Discovering statistics using R. Repr</source>: <publisher-name>Sage</publisher-name>.</mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Harks</surname><given-names>B.</given-names></name> <name><surname>Klieme</surname><given-names>E.</given-names></name> <name><surname>Hartig</surname><given-names>J.</given-names></name> <name><surname>Leiss</surname><given-names>D.</given-names></name></person-group> (<year>2014</year>). <article-title>Separating cognitive and content domains in mathematical competence</article-title>. <source>Educ. Assess.</source> <volume>19</volume>, <fpage>243</fpage>&#x2013;<lpage>66</lpage>. doi: <pub-id pub-id-type="doi">10.25656/01:17987</pub-id></mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hartig</surname><given-names>J.</given-names></name> <name><surname>K&#x00FC;hnbach</surname><given-names>O.</given-names></name></person-group> (<year>2006</year>). &#x201C;<article-title>Sch&#x00E4;tzung von Ver&#x00E4;nderung mit &#x2018;Plausible Values&#x2019; in multidimensionalen Raschmodellen [Estimating Change with &#x2018;Plausible Values&#x2019; in Multidimensional Rasch Models]</article-title>&#x201D; in <source>Ver&#x00E4;nderungsmessung und L&#x00E4;ngsschnittstudien in der empirischen Erziehungswissenschaft</source>. eds. <person-group person-group-type="editor"><name><surname>Ittel</surname><given-names>A.</given-names></name> <name><surname>Merkens</surname><given-names>H.</given-names></name></person-group> (<publisher-loc>Wiesbaden</publisher-loc>: <publisher-name>VS Verlag f&#x00FC;r Sozialwissenschaften</publisher-name>), <fpage>27</fpage>&#x2013;<lpage>44</lpage>.</mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hattie</surname><given-names>J.</given-names></name></person-group> (<year>2023</year>). <source>Visible learning: the sequel: a synthesis of over 2,100 Meta-analyses relating to achievement</source>. <edition>1st</edition> Edn. <publisher-loc>London and New York</publisher-loc>: <publisher-name>Routledge</publisher-name>.</mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hattie</surname><given-names>J.</given-names></name> <name><surname>Timperley</surname><given-names>H.</given-names></name></person-group> (<year>2007</year>). <article-title>The power of feedback</article-title>. <source>Rev. Educ. Res.</source> <volume>77</volume>, <fpage>81</fpage>&#x2013;<lpage>112</lpage>. doi: <pub-id pub-id-type="doi">10.3102/003465430298487</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hiebert</surname><given-names>J.</given-names></name> <name><surname>Lefevre</surname><given-names>P.</given-names></name></person-group> (<year>1986</year>). &#x201C;<article-title>Conceptual and procedural knowledge in mathematics: an introductory analysis</article-title>&#x201D; in <source>Conceptual and procedural knowledge: the case of mathematics</source>. ed. <person-group person-group-type="editor"><name><surname>Hiebert</surname><given-names>J.</given-names></name></person-group> (<publisher-loc>Hillsdale, NJ</publisher-loc>: <publisher-name>Lawrence Erlbaum Associates</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>27</lpage>.</mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Higgins</surname><given-names>S.</given-names></name> <name><surname>Xiao</surname><given-names>Z. M.</given-names></name> <name><surname>Katsipataki</surname><given-names>M.</given-names></name></person-group> (<year>2012</year>). <source>The impact of digital technology on learning: a summary for the education endowment foundation. Full report</source>. <publisher-loc>Durham</publisher-loc>: <publisher-name>School of Education, Durham University</publisher-name>.</mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hillmayr</surname><given-names>D.</given-names></name> <name><surname>Ziernwald</surname><given-names>L.</given-names></name> <name><surname>Reinhold</surname><given-names>F.</given-names></name> <name><surname>Hofer</surname><given-names>S. I.</given-names></name> <name><surname>Reiss</surname><given-names>K. M.</given-names></name></person-group> (<year>2020</year>). <article-title>The potential of digital tools to enhance mathematics and science learning in secondary schools: a context-specific meta-analysis</article-title>. <source>Comput. Educ.</source> <volume>153</volume>:<fpage>103897</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compedu.2020.103897</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kline</surname><given-names>P.</given-names></name></person-group> (<year>2000</year>). <source>Handbook of psychological testing</source>. <edition>2nd</edition> Edn. <publisher-loc>London and New York</publisher-loc>: <publisher-name>Routledge</publisher-name>.</mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Koedinger</surname><given-names>K. R.</given-names></name> <name><surname>Anderson</surname><given-names>J. R.</given-names></name></person-group> (<year>1993</year>). &#x201C;<article-title>Effective use of intelligent software in high school math classrooms</article-title>&#x201D; in <source>Proceedings of AI-ED 93: World Conference on Artificial Intelligence in Education</source>. eds. <person-group person-group-type="editor"><name><surname>Brna</surname><given-names>P.</given-names></name> <name><surname>Ohlsson</surname><given-names>S.</given-names></name> <name><surname>Pain</surname><given-names>H.</given-names></name></person-group> (<publisher-loc>Charlottesville, VA</publisher-loc>: <publisher-name>Association for the Advancement of Computing in Education (AACE)</publisher-name>), <fpage>241</fpage>&#x2013;<lpage>248</lpage>.</mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>K&#x00F6;ller</surname><given-names>O.</given-names></name> <name><surname>Baumert</surname><given-names>J.</given-names></name> <name><surname>Schnabel</surname><given-names>K.</given-names></name></person-group> (<year>2000</year>). &#x201C;<article-title>Zum Zusammenspiel von schulischem Interesse und Lernen im Fach Mathematik: L&#x00E4;ngsschnittanalysen in den Sekundarstufen I und II</article-title>&#x201D; in <source>Interesse und Lernmotivation: Untersuchungen zu Entwicklung, F&#x00F6;rderung und Wirkung</source>. eds. <person-group person-group-type="editor"><name><surname>Schiefele</surname><given-names>U.</given-names></name> <name><surname>Wild</surname><given-names>K.-P.</given-names></name></person-group> (<publisher-loc>M&#x00FC;nster</publisher-loc>: <publisher-name>Waxmann</publisher-name>), <fpage>163</fpage>&#x2013;<lpage>181</lpage>.</mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kulik</surname><given-names>J. A.</given-names></name> <name><surname>Fletcher</surname><given-names>J. D.</given-names></name></person-group> (<year>2016</year>). <article-title>Effectiveness of intelligent tutoring systems: a meta-analytic review</article-title>. <source>Rev. Educ. Res.</source> <volume>86</volume>, <fpage>42</fpage>&#x2013;<lpage>78</lpage>. doi: <pub-id pub-id-type="doi">10.3102/0034654315581420</pub-id></mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kulik</surname><given-names>J. A.</given-names></name> <name><surname>Kulik</surname><given-names>C.-L. C.</given-names></name></person-group> (<year>1988</year>). <article-title>Timing of feedback and verbal learning</article-title>. <source>Rev. Educ. Res.</source> <volume>58</volume>, <fpage>79</fpage>&#x2013;<lpage>97</lpage>. doi: <pub-id pub-id-type="doi">10.3102/00346543058001079</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Martin</surname><given-names>M. O.</given-names></name> <name><surname>Kelly</surname><given-names>D. L.</given-names></name></person-group> (<year>1996</year>). <source>Technical report</source>. <publisher-loc>Chestnut Hill, MA</publisher-loc>: <publisher-name>Center for the Study of Testing, Evaluation, and Educational Policy, Boston College</publisher-name>.</mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="other"><person-group person-group-type="author"><collab id="coll2">MatheGym</collab></person-group>. (<year>2025</year>). <source>Die Lernplattform f&#x00FC;r gymnasium und Realschule [The learning platform for academic track schools and comprehensive schools]</source>. Available online at: <ext-link xlink:href="https://www.mathegym.de" ext-link-type="uri">https://www.mathegym.de</ext-link> (Accessed April 3, 2024).</mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname><given-names>W.</given-names></name> <name><surname>Adesope</surname><given-names>O. O.</given-names></name> <name><surname>Nesbit</surname><given-names>J. C.</given-names></name> <name><surname>Liu</surname><given-names>Q.</given-names></name></person-group> (<year>2014</year>). <article-title>Intelligent tutoring systems and learning outcomes: a meta-analysis</article-title>. <source>J. Educ. Psychol.</source> <volume>106</volume>, <fpage>901</fpage>&#x2013;<lpage>918</lpage>. doi: <pub-id pub-id-type="doi">10.1037/a0037123</pub-id></mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Mayer</surname><given-names>R. E.</given-names></name></person-group> (<year>2014</year>). &#x201C;<article-title>Cognitive theory of multimedia learning</article-title>&#x201D; in <source>The Cambridge handbook of multimedia learning</source>. <edition>2nd</edition> ed (<publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>).</mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="other"><person-group person-group-type="author"><collab id="coll3">Ministerium f&#x00FC;r Schule und Berufsbildung des Landes Schleswig-Holstein</collab></person-group> <year>2014</year> <source>Fachanforderungen: Mathematik. Allgemein bildende Schulen &#x2013; Sekundarstufe I &#x2013; Sekundarstufe II [subject requirements: Mathematics. General education schools &#x2013; Lower secondary level &#x2013; Upper secondary level]</source> <publisher-loc>Kiel</publisher-loc> <publisher-name>Stamp Media &#x0026; Schmidt &#x0026; Klaunig</publisher-name> Available online at: <ext-link xlink:href="https://fachportal.lernnetz.de/sh/fachanforderungen/mathematik.html" ext-link-type="uri">https://fachportal.lernnetz.de/sh/fachanforderungen/mathematik.html</ext-link> (Accessed April 1, 2021).</mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mislevy</surname><given-names>R. J.</given-names></name> <name><surname>Beaton</surname><given-names>A. E.</given-names></name> <name><surname>Kaplan</surname><given-names>B.</given-names></name> <name><surname>Sheehan</surname><given-names>K. M.</given-names></name></person-group> (<year>1992</year>). <article-title>Estimating population characteristics from sparse matrix samples of item responses</article-title>. <source>J. Educ. Meas.</source> <volume>29</volume>, <fpage>133</fpage>&#x2013;<lpage>161</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1745-3984.1992.tb00371.x</pub-id></mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moreno</surname><given-names>R.</given-names></name></person-group> (<year>2007</year>). <article-title>Optimising learning from animations by minimising cognitive load: cognitive and affective consequences of signalling and segmentation methods</article-title>. <source>Appl. Cogn. Psychol.</source> <volume>21</volume>, <fpage>765</fpage>&#x2013;<lpage>781</lpage>. doi: <pub-id pub-id-type="doi">10.1002/acp.1348</pub-id></mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moreno</surname><given-names>R.</given-names></name> <name><surname>Mayer</surname><given-names>R.</given-names></name></person-group> (<year>2007</year>). <article-title>Interactive multimodal learning environments: special issue on interactive learning environments: contemporary issues and trends</article-title>. <source>Educ. Psychol. Rev.</source> <volume>19</volume>, <fpage>309</fpage>&#x2013;<lpage>326</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10648-007-9047-2</pub-id></mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Mullis</surname><given-names>I. V. S.</given-names></name> <name><surname>Martin</surname><given-names>M. O.</given-names></name> <name><surname>von Davier</surname><given-names>M.</given-names></name></person-group> <year>2021</year>. <source>TIMSS 2023 assessment frameworks</source>. <publisher-loc>Chestnut Hill, MA</publisher-loc>: <publisher-name>International Association for the Evaluation of Educational Achievement</publisher-name>. Available online at: <ext-link xlink:href="http://www.iea.nl" ext-link-type="uri">http://www.iea.nl</ext-link> (Accessed December 12, 2025).</mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="book"><person-group person-group-type="author"><collab id="coll4">OECD</collab></person-group> (<year>2023</year>). <source>PISA 2022 results (volume I): the state of learning and equity in education</source>. <publisher-loc>Paris</publisher-loc>: <publisher-name>OECD Publishing</publisher-name>.</mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Santos-Trigo</surname><given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Problem solving in mathematics education: tracing its foundations and current research-practice trends</article-title>. <source>ZDM Math. Educ.</source> <volume>56</volume>, <fpage>211</fpage>&#x2013;<lpage>222</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11858-024-01578-8</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shute</surname><given-names>V. J.</given-names></name> <name><surname>Zapata-Rivera</surname><given-names>D.</given-names></name></person-group> (<year>2007</year>). <article-title>Adaptive technologies</article-title>. <source>ETS Res. Rep. Ser.</source> <volume>2007</volume>, <fpage>i</fpage>&#x2013;<lpage>34</lpage>. doi: <pub-id pub-id-type="doi">10.1002/j.2333-8504.2007.tb02047.x</pub-id></mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Spitzer</surname><given-names>M. W. H.</given-names></name></person-group> (<year>2022</year>). <article-title>Just do it! Study time increases mathematical achievement scores for grade 4-10 students in a large longitudinal cross-country study</article-title>. <source>Eur. J. Psychol. Educ.</source> <volume>37</volume>, <fpage>39</fpage>&#x2013;<lpage>53</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10212-021-00546-0</pub-id>, <pub-id pub-id-type="pmid">40477366</pub-id></mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Steenbergen-Hu</surname><given-names>S.</given-names></name> <name><surname>Cooper</surname><given-names>H.</given-names></name></person-group> (<year>2013</year>). <article-title>A meta-analysis of the effectiveness of intelligent tutoring systems on K&#x2013;12 students&#x2019; mathematical learning</article-title>. <source>J. Educ. Psychol.</source> <volume>105</volume>, <fpage>970</fpage>&#x2013;<lpage>987</lpage>. doi: <pub-id pub-id-type="doi">10.1037/a0032447</pub-id></mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Steenbergen-Hu</surname><given-names>S.</given-names></name> <name><surname>Cooper</surname><given-names>H.</given-names></name></person-group> (<year>2014</year>). <article-title>A meta-analysis of the effectiveness of intelligent tutoring systems on college students&#x2019; academic learning</article-title>. <source>J. Educ. Psychol.</source> <volume>106</volume>, <fpage>331</fpage>&#x2013;<lpage>347</lpage>. doi: <pub-id pub-id-type="doi">10.1037/a0034752</pub-id></mixed-citation></ref>
<ref id="ref35"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Stekhoven</surname><given-names>D. J.</given-names></name></person-group> <year>2022</year>. <source>missForest: nonparametric missing value imputation using random Forest</source>. <comment>Version 1.5. R package. Comprehensive R archive network (CRAN). Available online at:</comment> <ext-link xlink:href="https://CRAN.R-project.org/package=missForest" ext-link-type="uri">https://CRAN.R-project.org/package=missForest</ext-link> (Accessed May 3, 2023).</mixed-citation></ref>
<ref id="ref36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sung</surname><given-names>Y.-T.</given-names></name> <name><surname>Yang</surname><given-names>J.-M.</given-names></name> <name><surname>Lee</surname><given-names>H.-Y.</given-names></name></person-group> (<year>2017</year>). <article-title>The effects of Mobile-computer-supported collaborative learning: meta-analysis and critical synthesis</article-title>. <source>Rev. Educ. Res.</source> <volume>87</volume>, <fpage>768</fpage>&#x2013;<lpage>805</lpage>. doi: <pub-id pub-id-type="doi">10.3102/0034654317704307</pub-id>, <pub-id pub-id-type="pmid">28989193</pub-id></mixed-citation></ref>
<ref id="ref37"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Tabachnick</surname><given-names>B. G.</given-names></name> <name><surname>Fidell</surname><given-names>L. S.</given-names></name></person-group> (<year>2019</year>). <source>Using multivariate statistics</source>. <edition>7th</edition> Edn. <publisher-loc>Boston, MA</publisher-loc>: <publisher-name>Pearson</publisher-name>.</mixed-citation></ref>
<ref id="ref38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tamim</surname><given-names>R. M.</given-names></name> <name><surname>Bernard</surname><given-names>R. M.</given-names></name> <name><surname>Borokhovski</surname><given-names>E.</given-names></name> <name><surname>Abrami</surname><given-names>P. C.</given-names></name> <name><surname>Schmid</surname><given-names>R. F.</given-names></name></person-group> (<year>2011</year>). <article-title>What forty years of research says about the impact of technology on learning: a second-order meta-analysis and validation study</article-title>. <source>Rev. Educ. Res.</source> <volume>81</volume>, <fpage>4</fpage>&#x2013;<lpage>28</lpage>. doi: <pub-id pub-id-type="doi">10.3102/0034654310393361</pub-id></mixed-citation></ref>
<ref id="ref9002"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Teo</surname><given-names>T</given-names></name></person-group>. <year>2011</year>. &#x201C;<article-title>Factors Influencing Teachers&#x2019; Intention to Use Technology: Model Development and Test</article-title>.&#x201D; <source>Computers &#x0026; Education</source> <volume>57</volume>:<fpage>2432</fpage>&#x2013;<lpage>40</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compedu.2011.06.008</pub-id></mixed-citation></ref>
<ref id="ref39"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Trist&#x00E1;n</surname><given-names>A.</given-names></name></person-group>. <year>2006</year>. &#x201C;<article-title>An adjustment for sample size in DIF analysis</article-title>.&#x201D; In <source>Rasch measurement: Transactions of the Rasch measurement SIG, American Educational Research Association</source>, edited by <person-group person-group-type="editor"><name><surname>Bernoulli</surname><given-names>J.</given-names></name> <name><surname>Fischer</surname><given-names>W. P.</given-names></name> <name><surname>Shannon</surname><given-names>C.</given-names></name> <name><surname>Rasch</surname><given-names>G.</given-names></name></person-group>, <volume>20</volume>, <fpage>1070</fpage>&#x2013;<lpage>1071</lpage>. Available online at: <ext-link xlink:href="http://www.rasch.org/rmt/rmt203e.htm" ext-link-type="uri">http://www.rasch.org/rmt/rmt203e.htm</ext-link> (Accessed February 12, 2023).</mixed-citation></ref>
<ref id="ref40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tullis</surname><given-names>J. G.</given-names></name> <name><surname>Benjamin</surname><given-names>A. S.</given-names></name></person-group> (<year>2011</year>). <article-title>On the effectiveness of self-paced learning</article-title>. <source>J. Mem. Lang.</source> <volume>64</volume>, <fpage>109</fpage>&#x2013;<lpage>118</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jml.2010.11.002</pub-id>, <pub-id pub-id-type="pmid">21516194</pub-id></mixed-citation></ref>
<ref id="ref41"><mixed-citation publication-type="book"><person-group person-group-type="author"><collab id="coll5">U.S. Department of Education, Institute of Education Sciences, What Works Clearinghouse</collab></person-group> (<year>2009</year>). <source>Cognitive tutor&#x00AE; algebra I: Intervention report (middle school math)</source>. <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>U.S. Department of Education</publisher-name>.</mixed-citation></ref>
<ref id="ref42"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Van Der Kleij</surname><given-names>F. M.</given-names></name> <name><surname>Feskens</surname><given-names>R. C. W.</given-names></name> <name><surname>Eggen</surname><given-names>T. J. H. M.</given-names></name></person-group> (<year>2015</year>). <article-title>Effects of feedback in a computer-based learning environment on students&#x2019; learning outcomes: a meta-analysis</article-title>. <source>Rev. Educ. Res.</source> <volume>85</volume>, <fpage>475</fpage>&#x2013;<lpage>511</lpage>. doi: <pub-id pub-id-type="doi">10.3102/0034654314564881</pub-id></mixed-citation></ref>
<ref id="ref43"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>VanLehn</surname><given-names>K.</given-names></name></person-group> (<year>2006</year>). <article-title>The behavior of tutoring systems</article-title>. <source>Int. J. Artif. Intell. Educ.</source> <volume>16</volume>, <fpage>227</fpage>&#x2013;<lpage>265</lpage>. doi: <pub-id pub-id-type="doi">10.3233/IRG-2006-16(3)02</pub-id></mixed-citation></ref>
<ref id="ref44"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>VanLehn</surname><given-names>K.</given-names></name></person-group> (<year>2011</year>). <article-title>The relative effectiveness of human tutoring, intelligent tutoring systems, and other tutoring systems</article-title>. <source>Educ. Psychol.</source> <volume>46</volume>, <fpage>197</fpage>&#x2013;<lpage>221</lpage>. doi: <pub-id pub-id-type="doi">10.1080/00461520.2011.611369</pub-id></mixed-citation></ref>
<ref id="ref45"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>vom Hofe</surname><given-names>R.</given-names></name> <name><surname>Hafner</surname><given-names>T.</given-names></name> <name><surname>Blum</surname><given-names>W.</given-names></name> <name><surname>Pekrun</surname><given-names>R.</given-names></name></person-group> (<year>2009</year>). &#x201C;<article-title>&#x201C;Die Entwicklung mathematischer Kompetenzen in der Sekundarstufe &#x2013; Ergebnisse der L&#x00E4;ngsschnittstudie PALMA&#x201D; [the development of mathematical competencies in secondary education &#x2013; results of the longitudinal study PALMA]</article-title>&#x201D; in <source>Mathematiklernen vom Kindergarten bis zum Studium: Kontinuit&#x00E4;t und Koh&#x00E4;renz als Herausforderung f&#x00FC;r den Mathematikunterricht</source>. eds. <person-group person-group-type="editor"><name><surname>Heinze</surname><given-names>A.</given-names></name> <name><surname>Gr&#x00FC;&#x00DF;ing</surname><given-names>M.</given-names></name></person-group> (<publisher-loc>M&#x00FC;nster</publisher-loc>: <publisher-name>Waxmann</publisher-name>), <fpage>125</fpage>&#x2013;<lpage>146</lpage>.</mixed-citation></ref>
<ref id="ref46"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yi</surname><given-names>L.</given-names></name> <name><surname>Liu</surname><given-names>D.</given-names></name> <name><surname>Jiang</surname><given-names>T.</given-names></name> <name><surname>Xian</surname><given-names>Y.</given-names></name></person-group> (<year>2025</year>). <article-title>The effectiveness of AI on K-12 students&#x2019; mathematics learning: a systematic review and meta-analysis</article-title>. <source>Int. J. Sci. Math. Educ.</source> <volume>23</volume>, <fpage>1105</fpage>&#x2013;<lpage>1126</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10763-024-10499-7</pub-id></mixed-citation></ref>
<ref id="ref47"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Zumbo</surname><given-names>B. D.</given-names></name></person-group> (<year>1999</year>). <source>A handbook on the theory and methods of differential item functioning (DIF): Logistic regression modeling as a unitary framework for binary and Likert-type (ordinal) item scores</source>. <publisher-loc>Ottawa, ON</publisher-loc>: <publisher-name>Directorate of Human Resources Research and Evaluation, Department of National Defence</publisher-name>.</mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0004">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1395191/overview">Huma Akram</ext-link>, North China University of Water Resources and Electric Power, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0005">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1799630/overview">Abbas Hussein Abdelrady</ext-link>, Qassim University, Saudi Arabia</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2026549/overview">Iden Rainal Ihsan</ext-link>, Universitas Samudra, Indonesia</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0001">
<label>1</label>
<p>Hedges&#x2019; <italic>g</italic> is a standardized effect size used for comparing groups of unequal sizes and can be interpreted similarly to Cohen&#x2019;s <italic>d</italic>.</p>
</fn>
<fn id="fn0002">
<label>2</label>
<p><italic>Glass&#x2019; ES</italic> is a standardized effect size used to compare groups with unequal variances and can be interpreted similarly to Cohen&#x2019;s <italic>d</italic>.</p>
</fn>
<fn id="fn0003">
<label>3</label>
<p>Although the effect was not statistically significant, we report this value for comparison with other findings.</p>
</fn>
</fn-group>
</back>
</article>