<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2022.1082659</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Psychology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Comparing the psychometric properties of two primary school Computational Thinking (CT) assessments for grades 3 and 4: The Beginners&#x00027; CT test (BCTt) and the competent CT test (cCTt)</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>El-Hamamsy</surname> <given-names>Laila</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/781667/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zapata-C&#x000E1;ceres</surname> <given-names>Mar&#x000ED;a</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2073859/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Marcelino</surname> <given-names>Pedro</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2098541/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Bruno</surname> <given-names>Barbara</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/893934/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Dehler Zufferey</surname> <given-names>Jessica</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2110326/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Mart&#x000ED;n-Barroso</surname> <given-names>Estefan&#x000ED;a</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2086979/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Rom&#x000E1;n-Gonz&#x000E1;lez</surname> <given-names>Marcos</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/760761/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>MOBOTS Group, Ecole Polytechnique F&#x000E9;d&#x000E9;rale de Lausanne</institution>, <addr-line>Lausanne</addr-line>, <country>Switzerland</country></aff>
<aff id="aff2"><sup>2</sup><institution>LEARN - Center for Learning Sciences, Ecole Polytechnique F&#x000E9;d&#x000E9;rale de Lausanne</institution>, <addr-line>Lausanne</addr-line>, <country>Switzerland</country></aff>
<aff id="aff3"><sup>3</sup><institution>Laboratory of Information Technologies in Education, Rey Juan Carlos University</institution>, <addr-line>Madrid</addr-line>, <country>Spain</country></aff>
<aff id="aff4"><sup>4</sup><institution>Computational Thinking Department, TreeTree2 (T2)</institution>, <addr-line>Lisbon</addr-line>, <country>Portugal</country></aff>
<aff id="aff5"><sup>5</sup><institution>CHILI Laboratory, Ecole Polytechnique F&#x000E9;d&#x000E9;rale de Lausanne</institution>, <addr-line>Lausanne</addr-line>, <country>Switzerland</country></aff>
<aff id="aff6"><sup>6</sup><institution>Faculty of Education, Universidad Nacional de Educaci&#x000F3;n a Distancia (UNED)</institution>, <addr-line>Madrid</addr-line>, <country>Spain</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Stamatios Papadakis, University of Crete, Greece</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: &#x000D6;zgen Korkmaz, Amasya University, Turkey; Janika Leoste, Tallinn University, Estonia</p></fn>

<corresp id="c001">&#x0002A;Correspondence: Laila El-Hamamsy <email>laila.elhamamsy&#x00040;epfl.ch</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Educational Psychology, a section of the journal Frontiers in Psychology</p></fn></author-notes>
<pub-date pub-type="epub">
<day>12</day>
<month>12</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1082659</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>11</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2022 El-Hamamsy, Zapata-C&#x000E1;ceres, Marcelino, Bruno, Dehler Zufferey, Mart&#x000ED;n-Barroso and Rom&#x000E1;n-Gonz&#x000E1;lez.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>El-Hamamsy, Zapata-C&#x000E1;ceres, Marcelino, Bruno, Dehler Zufferey, Mart&#x000ED;n-Barroso and Rom&#x000E1;n-Gonz&#x000E1;lez</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>With the increasing amount of research around Computational Thinking (CT) and endeavors introducing CT into curricula worldwide, assessing CT at all levels of formal education is of utmost importance to ensure that CT-related learning objectives are met. This has contributed to a progressive increase in the number of validated and reliable CT assessments for K-12, including primary school. Researchers and practitioners are thus required to choose among multiple instruments, often overlapping in their age validity.</p>
</sec>
<sec>
<title>Methods</title>
<p>In this study, we compare the psychometric properties of two of these instruments: the Beginners&#x00027; CT test (BCTt), developed for grades 1&#x02013;6, and the competent CT test (cCTt), validated for grades 3&#x02013;4. Classical Test Theory and Item Response Theory (IRT) were employed on data acquired from 575 students in grades 3&#x02013;4 to compare the properties of the two instruments and refine the limits of their validity.</p>
</sec>
<sec>
<title>Results</title>
<p>The findings (i) establish the detailed psychometric properties of the BCTt in grades 3&#x02013;4 for the first time, and (ii) through a comparison with students from the same country, indicate that the cCTt should be preferred for grades 3&#x02013;4 as the cCTt is able to discriminate between students of low and medium ability. Conversely, while the BCTt, which is easier, shows a ceiling effect, it is better suited to discriminate between students in the low ability range. For these grades, the BCTt can thus be employed as a screening mechanism to identify low ability students.</p>
</sec>
<sec>
<title>Discussion</title>
<p>In addition to providing recomendations for use of these instruments, the findings highlight the importance of comparing the psychometric properties of existing assessments, so that researchers and practitioners, including teachers and policy makers involved in digital education curricular reforms, may take informed decisions when selecting assessments.</p>
</sec></abstract>
<kwd-group>
<kwd>Computational Thinking</kwd>
<kwd>assessment</kwd>
<kwd>primary school</kwd>
<kwd>validation</kwd>
<kwd>developmental appropriateness</kwd>
<kwd>psychometrics</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Centre of Competence in Research Robotics<named-content content-type="fundref-id">10.13039/501100011021</named-content></contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="7"/>
<equation-count count="0"/>
<ref-count count="93"/>
<page-count count="20"/>
<word-count count="13017"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1. Introduction and related work</title>
<p>Computational Thinking (CT) is more and more often considered to be an essential twenty-first century skill (Li et al., <xref ref-type="bibr" rid="B47">2020</xref>), that is as important as reading, writing, and arithmetic (Wing, <xref ref-type="bibr" rid="B86">2006</xref>) and must be taught at a young age. Despite the lack of consensus regarding the definition of CT, CT is traditionally defined by Wing (<xref ref-type="bibr" rid="B86">2006</xref>) as &#x0201C;an approach to solving problems, designing systems, and understanding human behavior that draws on concepts fundamental to computing&#x0201D; which was later reformulated by Aho (<xref ref-type="bibr" rid="B1">2012</xref>) as &#x0201C;the thought processes involved in formulating problems so their solutions can be represented as computational steps and algorithms.&#x0201D; As such CT has often been associated with Computer Science (CS), although many researchers consider CT to be transversal (Mannila et al., <xref ref-type="bibr" rid="B50">2014</xref>; Weintrop, <xref ref-type="bibr" rid="B80">2016</xref>; Denning and Tedre, <xref ref-type="bibr" rid="B25">2021</xref>; Weintrop et al., <xref ref-type="bibr" rid="B82">2021b</xref>), and not exclusively related to CS or mathematics (Li et al., <xref ref-type="bibr" rid="B47">2020</xref>). This has lead to a &#x0201C;tremendous growth in curricula, learning environments, and innovations around CT education&#x0201D; (Weintrop et al., <xref ref-type="bibr" rid="B82">2021b</xref>). To be successful, these initiatives rely on the constructive alignment between the learning objectives, teaching and learning activities, and <italic>assessments</italic> (Biggs, <xref ref-type="bibr" rid="B8">1996</xref>). Developing and implementing effective CT interventions thus requires expanding the portfolio of developmentally appropriate instruments to assess CT at all levels of formal education, for use by researchers and educators alike (Weintrop et al., <xref ref-type="bibr" rid="B81">2021a</xref>).</p>
<p>Developing CT assessments requires having better insight into what composes this competence, with a competence referring to &#x0201C;the proven ability to use knowledge, skills, and personal, social, and/or methodological abilities, in work or study situations and in professional and personal development&#x0201D; (European Union, <xref ref-type="bibr" rid="B31">2006</xref>). As such, Brennan and Resnick (<xref ref-type="bibr" rid="B10">2012</xref>) proposed an operational definition of CT by decomposing CT into three dimensions. The first is CT-concepts, i.e., &#x0201C;the concepts designers engage with as they program, such as iteration, parallelism,&#x0201D; (Brennan and Resnick, <xref ref-type="bibr" rid="B10">2012</xref>), which thus includes sequences, loops, if-else statements and so forth at the primary school level. These elements can be adequately assessed through diagnostic and summative tools (Rom&#x000E1;n-Gonz&#x000E1;lez et al., <xref ref-type="bibr" rid="B64">2019</xref>). The second is CT-practices i.e., &#x0201C;the practices designers develop as they engage with the concepts, such as debugging projects or remixing others&#x00027; (Brennan and Resnick, <xref ref-type="bibr" rid="B10">2012</xref>), which thus requires understanding the thought processes involved in resolving CT problems. These may include elements of abstraction, decomposition, evaluation, and so forth and can be adequately assessed through formative-iterative tools and data-mining tools (Rom&#x000E1;n-Gonz&#x000E1;lez et al., <xref ref-type="bibr" rid="B64">2019</xref>). The third is CT-perspectives, i.e., &#x0201C;the perspectives designers form about the world around them and about themselves&#x0201D; (Brennan and Resnick, <xref ref-type="bibr" rid="B10">2012</xref>), and therefore their perception of CT which can be adequately evaluated through perception and attitude scales and vocabulary assessments (Rom&#x000E1;n-Gonz&#x000E1;lez et al., <xref ref-type="bibr" rid="B64">2019</xref>).</p>
<p>Despite the increase in research around CT in the past two decades, and the various means of assessing CT identified by Tang et al. (<xref ref-type="bibr" rid="B74">2020</xref>) [i.e., &#x0201C;traditional test(s) composed of selected- or constructed response questions, portfolio assessment(s), interviews, and surveys&#x0201D;], few validated and reliable instruments exist for CT, and even less at the primary school level (Rom&#x000E1;n-Gonz&#x000E1;lez et al., <xref ref-type="bibr" rid="B64">2019</xref>; Basu et al., <xref ref-type="bibr" rid="B6">2020</xref>; Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>; Clarke-Midura et al., <xref ref-type="bibr" rid="B18">2021</xref>). This limitation was highlighted by Tang et al. (<xref ref-type="bibr" rid="B74">2020</xref>) in their recent meta review on CT assessments: out of 96 studies, only 45% provided reliability evidence and just 18% provided validity evidence. This mirrors the findings of Bakala et al. (<xref ref-type="bibr" rid="B5">2021</xref>) who, in their literature review on the effects of robots on preschool children&#x00027;s CT, found that most studies employed <italic>ad-hoc</italic> evaluations, typically neither standardized nor validated. Bakala et al. (<xref ref-type="bibr" rid="B5">2021</xref>) attributed this to the fact that only two recent valid and reliable tests for that age group existed at the time of their review [the TechCheck by Relkin et al., <xref ref-type="bibr" rid="B62">2020</xref>; Relkin and Bers, <xref ref-type="bibr" rid="B61">2021</xref> and the Beginners&#x00027; CT test (BCTt) by Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>] and recommended that researchers aim to employ them in future studies. To further limit the available choices, many existing assessments are strongly tied to specific CS frameworks (Rowe et al., <xref ref-type="bibr" rid="B69">2021</xref>) [e.g., Dr., Scratch (Moreno-Le&#x000F3;n and Robles, <xref ref-type="bibr" rid="B53">2015</xref>) or the Fairy assessments (Werner et al., <xref ref-type="bibr" rid="B83">2012</xref>)]. As stated by Relkin and Bers (<xref ref-type="bibr" rid="B61">2021</xref>) and Rowe et al. (<xref ref-type="bibr" rid="B69">2021</xref>), being strongly tied to specific frameworks means that the instrument risks conflating with programming abilities. This contributes to a lack of generalizability and thus limits the range of applications of such instruments (Tikva and Tambouris, <xref ref-type="bibr" rid="B75">2021</xref>), which for example should be avoided in the context of pre-post test experimental designs. It is essential to provide researchers and practitioners (e.g., teachers and policy makers involved in digital education curricular reforms) the means to assess CT:</p>
<list list-type="order">
<list-item><p>at all levels of education</p></list-item>
<list-item><p>independently from specific studies or programming environments</p></list-item>
<list-item><p>in a valid and reliable way to ensure that there is sufficient &#x0201C;evidence and theory [to] support the interpretations of test scores entailed by proposed uses of tests&#x0201D; (Clarke-Midura et al., <xref ref-type="bibr" rid="B18">2021</xref>)</p></list-item>
<list-item><p>with an instrument which can easily be administered.</p></list-item>
</list>
<p>Without these, it is not possible to ensure that CT-related learning objectives are met, whether in individual interventions or in the context of large scale CS and/or CT curricular reform initiatives (El-Hamamsy et al., <xref ref-type="bibr" rid="B27">2021a</xref>,<xref ref-type="bibr" rid="B28">b</xref>).</p>
<p>Unfortunately, while an increasing number of instruments have been recently developed, several do not meet these criteria (Hubwieser and M&#x000FC;hling, <xref ref-type="bibr" rid="B38">2014</xref>; Bellettini et al., <xref ref-type="bibr" rid="B7">2015</xref>; Gane et al., <xref ref-type="bibr" rid="B32">2021</xref>; Parker et al., <xref ref-type="bibr" rid="B56">2021</xref>). For example, the Bebras challenge is sometimes used to assess CT skills, but has undergone limited psychometric validation (Hubwieser and M&#x000FC;hling, <xref ref-type="bibr" rid="B38">2014</xref>; Bellettini et al., <xref ref-type="bibr" rid="B7">2015</xref>). Gane et al. (<xref ref-type="bibr" rid="B32">2021</xref>)&#x00027;s assessment require manual grading and multiple annotators, thus limiting the test&#x00027;s scalability and its usability by other researchers and practitioners. Parker et al. (<xref ref-type="bibr" rid="B56">2021</xref>) assessment which is based on a combination of block-based and Bebras-style questions, has been piloted with just 57 fourth graders. Finally Chen et al. (<xref ref-type="bibr" rid="B14">2017</xref>)&#x00027;s assessment for 5th graders appears highly dependent on the robotics programming context, includes open questions and was administered to just 37 students, thus including the limitations of all the aforementioned assessments, in addition to limiting its use in other CT-related contexts.</p>
<p>Instruments meeting the aforementioned criteria, and having undergone a psychometric validation and reliability assessment process at the level of primary school (see section 2.2), include the TechCheck for lower primary school (grades 1&#x02013;2, ages 6&#x02013;8, Relkin et al., <xref ref-type="bibr" rid="B62">2020</xref>), the TechCheck-K, which is an adaptation of the former for kindergarden (ages 4&#x02013;6, Relkin and Bers, <xref ref-type="bibr" rid="B61">2021</xref>), the BCTt for grades 1&#x02013;6 (ages 5&#x02013;10, Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>), the competent CT test (cCTt) for grades 3&#x02013;4 (ages 7&#x02013;9, El-Hamamsy et al., <xref ref-type="bibr" rid="B29">2022a</xref>), the Computational Thinking Assessment for Chinese Elementary Students (CTA-CES) for grades 3&#x02013;6 (ages 9&#x02013;12, Li et al., <xref ref-type="bibr" rid="B48">2021</xref>), and Kong and Lai (<xref ref-type="bibr" rid="B43">2022</xref>)&#x00027;s CT-concepts test for grades 3&#x02013;5. A synthesis of these instruments is provided in <xref ref-type="table" rid="T1">Table 1</xref> and shows that these instruments often differ in the underlying definition of CT employed to define the test items which makes it complex to compare them pyschometrically. Furthermore, these instruments are all relatively new and adopt an unplugged approach, using multiple choice questions to assess primary school students&#x00027; CT abilities. Furthermore, there is an overlap in their target age ranges. It is thus important for researchers and practitioners to not only identify instruments that best assess the learning objectives of their interventions, but also to understand the limits of validity of these instruments to make informed decisions for their own studies. Such instruments are unfortunately not often compared against one another to determine which may be more appropriate for a given age range. To the best of our knowledge, only the TechCheck and TechCheck-K were compared to establish whether the TechCheck-K would be an adequate instrument for kindergarden students (Relkin and Bers, <xref ref-type="bibr" rid="B61">2021</xref>), with the TechCheck being more appropriate for first and second graders.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Synthesis of validated and scalable primary school unplugged CT assessments and corresponding validation processes adapted from El-Hamamsy et al. (<xref ref-type="bibr" rid="B29">2022a</xref>).</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Test</bold></th>
<th valign="top" align="left"><bold>Format</bold></th>
<th valign="top" align="left"><bold>Target age group</bold></th>
<th valign="top" align="left"><bold>CT definition</bold></th>
<th valign="top" align="left"><bold>Validation process</bold></th>
<th valign="top" align="left"><bold>Sample</bold></th>
<th valign="top" align="left"><bold>Validity established for</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">TechCheck (Relkin et al., <xref ref-type="bibr" rid="B62">2020</xref>) and TechCheck-K (Relkin and Bers, <xref ref-type="bibr" rid="B61">2021</xref>)</td>
<td valign="top" align="left">15 item MCQ</td>
<td valign="top" align="left">1st and 2nd graders (6&#x02013;9 year old students) and kindergarden (5&#x02013;6 year old students)</td>
<td valign="top" align="left">Algorithms, Modularity, Design Process, Debugging, Control Structures, Hardware/Software</td>
<td valign="top" align="left">Expert validation, psychometric analysis (Classical Test Theory and Item Response Theory), convergent validation with the TACTIC-KIBO</td>
<td valign="top" align="left">768 5&#x02013;9 year old students participating in a robotics coding curriculum and 89 kindergarden students without coding experience</td>
<td valign="top" align="left">Full sample</td>
</tr>
<tr>
<td valign="top" align="left">Beginner&#x00027;s CT test (Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>; Zapata-C&#x000E1;ceres and Fanchamps, <xref ref-type="bibr" rid="B91">2021</xref>)</td>
<td valign="top" align="left">25 item MCQ</td>
<td valign="top" align="left">Primary school (5&#x02013;12 year old students) and Kindergarden (4&#x02013;5 years old students)</td>
<td valign="top" align="left">Computational concepts, practices, perspectives (Brennan and Resnick, <xref ref-type="bibr" rid="B10">2012</xref>)</td>
<td valign="top" align="left">Expert validation, and psychometric analysis (Classical Test Theory)</td>
<td valign="top" align="left">299 primary school students from grades 1 to 6 and 5 kindergarden students</td>
<td valign="top" align="left">4&#x02013;7 year old students</td>
</tr>
<tr>
<td valign="top" align="left">The competent CT test (cCTt) (El-Hamamsy et al., <xref ref-type="bibr" rid="B29">2022a</xref>)</td>
<td valign="top" align="left">25 item MCQ</td>
<td valign="top" align="left">Primary school (7&#x02013;9 year old students)</td>
<td valign="top" align="left">Computational concepts, practices, perspectives (Brennan and Resnick, <xref ref-type="bibr" rid="B10">2012</xref>)</td>
<td valign="top" align="left">Expert validation and psychometric analysis (Classical Test Theory, Item Response Theory), Confirmatory Factor Analysis</td>
<td valign="top" align="left">1,519 primary school students from grades 3 to 4</td>
<td valign="top" align="left">Full sample</td>
</tr>
<tr>
<td valign="top" align="left">CT Assessment for Chinese Elementary Students (CTA-CES, Li et al., <xref ref-type="bibr" rid="B48">2021</xref>)</td>
<td valign="top" align="left">25 item MCQ</td>
<td valign="top" align="left">Grades 3&#x02013;6 (ages 9&#x02013;12)</td>
<td valign="top" align="left">Abstraction, algorithmic thinking, decomposition, evaluation, pattern recognition, generalization (Selby and Woollard, <xref ref-type="bibr" rid="B72">2013</xref>)</td>
<td valign="top" align="left">Expert validation, Classical Test Theory, Item Response Theory, Construct validity by comparing two groups of students, criterion validity through correlations with reasoning, spatial ability, and verbal ability</td>
<td valign="top" align="left">280 grade 3&#x02013;6 students</td>
<td valign="top" align="left">Full sample</td>
</tr>
<tr>
<td valign="top" align="left">Kong and Lai (<xref ref-type="bibr" rid="B43">2022</xref>)&#x00027;s CT-concepts test</td>
<td valign="top" align="left">14 item MCQ</td>
<td valign="top" align="left">Grades 3&#x02013;5 (ages 8&#x02013;10)</td>
<td valign="top" align="left">Sequences, conditionals, repetition (Brennan and Resnick, <xref ref-type="bibr" rid="B10">2012</xref>)</td>
<td valign="top" align="left">Item Response Theory</td>
<td valign="top" align="left">13,670 grade 3 to 5 students</td>
<td valign="top" align="left">Full sample</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In this paper, we are interested in the overlap between the BCTt and the cCTt for students in grades 3 and 4 as these two instruments overlap in their targets, and are from the same &#x0201C;family&#x0201D; of CT tests, and thus cover the same concepts. Therefore, the BCTt and cCTt cannot be considered complementary within a system of assessments, and thus require choosing between them. It is therefore essential to establish their limits of validity for the considered age group to provide recommendations to help researchers make an informed decision when selecting CT-assessments in accordance with their study requirements. Indeed, while the BCTt was initially developed as an instrument looking to cover all of primary school, the validation procedure appeared to indicate that the BCTt was too easy for students in upper primary school (Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>). As the cCTt was derived from the BCTt to adapt the instrument in terms of format and content to improve its validity for students in grades 3 and 4 (7&#x02013;9 year old students), the present study therefore investigates how the BCTt and cCTt complement each other in assessing CT in grades 3 and 4, to propose recommendations for their use for these grades. More specifically, we look to answer the following research questions:</p>
<list list-type="order">
<list-item><p><italic>How do the psychometric properties of the BCTt and the cCTt compare for students in grades 3&#x02013;4 (7&#x02013;9 years old)?</italic></p></list-item>
<list-item><p><italic>How does the psychometric comparison inform us about how the instruments should be used in grades 3&#x02013;4 (7&#x02013;9 years old)?</italic></p></list-item>
</list>
</sec>
<sec sec-type="methods" id="s2">
<title>2. Methodology</title>
<sec>
<title>2.1. The BCTt, cCTt, and their validation</title>
<p>The BCTt and the cCTt are two 25-item multiple choice CT assessments<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> of progressive difficulty, targeting the CT-concepts posed by Brennan and Resnick (<xref ref-type="bibr" rid="B10">2012</xref>) in their decomposition of CT into concepts, practices, and perspectives. More specifically, the two tests evaluate notions of sequences, simple loops (only one instruction is repeated), complex loops (two or more instructions are repeated), conditionals and while statements (see the distribution of items in <xref ref-type="table" rid="T2">Table 2</xref>), with the factor structure pertaining to these concepts having been validated through Confirmatory Factor Analysis by El-Hamamsy et al. (<xref ref-type="bibr" rid="B29">2022a</xref>). The BCTt was derived from the CTt (Rom&#x000E1;n-Gonz&#x000E1;lez et al., <xref ref-type="bibr" rid="B65">2017</xref>, <xref ref-type="bibr" rid="B66">2018</xref>, <xref ref-type="bibr" rid="B64">2019</xref>), with changes in terms of format and content to adapt it to primary school. In a similar spirit, the cCTt made alterations to the format and content of the BCTt to more specifically target students in grades 3 and 4 (El-Hamamsy et al., <xref ref-type="bibr" rid="B29">2022a</xref>). Both instruments, like their predecessor the CTt, employ grid-type and canvas-type questions (see <xref ref-type="fig" rid="F1">Figure 1</xref>) and employ the same type of tasks. The individual questions differ (see <xref ref-type="table" rid="T2">Table 2</xref>) as the cCTt (i) favors questions on 4 &#x000D7; 4 grids, (ii) replaces BCTt questions of low difficulty with questions related to complex concepts (e.g., while statements), (iii) alters the disposition of objects on the grids, and responses, with respect to the BCTt equivalents.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Comparison between the BCTt and the cCTt in terms of question concepts and question types (Table taken from El-Hamamsy et al., <xref ref-type="bibr" rid="B29">2022a</xref>).</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th/>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>BCTt</bold></th>
<th/>
<th/>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>cCTt</bold></th>
<th/>
</tr>
</thead>
<tbody>
<tr style="border-bottom: thin solid #000000;">
<td valign="top" align="left"><bold>Blocks</bold></td>
<td valign="top" align="center"><bold>Grid</bold> (3 &#x000D7; 3)</td>
<td valign="top" align="center"><bold>Grid</bold> (4 &#x000D7; 4)</td>
<td valign="top" align="center"><bold>Canvas</bold></td>
<td valign="top" align="center"><bold>Total</bold></td>
<td valign="top" align="center"><bold>Grid</bold> (3 &#x000D7; 3)</td>
<td valign="top" align="center"><bold>Grid</bold> (4 &#x000D7; 4)</td>
<td valign="top" align="center"><bold>Canvas</bold></td>
<td valign="top" align="center"><bold>Total</bold></td>
</tr> <tr>
<td valign="top" align="left">Sequences</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left">Simple loops</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left">Complex loops</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">7</td>
</tr>
<tr>
<td valign="top" align="left">Conditional statements</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left">While statements</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">4</td>
</tr>
<tr>
<td valign="top" align="left">Combinations</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">2</td>
</tr>
<tr>
<td valign="top" align="left">Total</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">25</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">25</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The two main question formats of the BCTt and cCTt: grid <bold>(Left)</bold> and canvas <bold>(Right)</bold> (Figure taken from El-Hamamsy et al., <xref ref-type="bibr" rid="B29">2022a</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-13-1082659-g0001.tif"/>
</fig>
<p>Both the BCTt and cCTt instruments were validated by starting with an evaluation by experts and making adjustments based on their suggestions, prior to administration to students in the target age groups. The BCTt, which was designed for grades 1&#x02013;6, was administered to 200 students in that age group (Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>). The authors found that the test had good reliablity with Cronbach&#x00027;s &#x003B1; &#x0003D; 0.824 . The results indicated that the students improved as they got older, and started to exhibit a ceiling effect in grades 3&#x02013;4<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref>. The results indicated that the differences were significant between all grades, excepted those in grades 4&#x02013;6 who already exhibit a ceiling effect (Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>). These results indicate that students begin to exhibit a ceiling effect either in grade 3 or grade 4. The cCTt, which was designed for grades 3 and 4, was administered to 1,519 students in that age group and analyzed through Classical Test Theory and Item Response Theory (El-Hamamsy et al., <xref ref-type="bibr" rid="B29">2022a</xref>). The results indicated that the grade 4 students scored significantly better than the grade 3 students (out of 25 pts, the one-way ANOVA indicates that <italic>p</italic> &#x0003C; 0.001 , &#x00394;<sub><italic>grades</italic></sub> &#x0003D; &#x0002B;2.9 pts, Cohen&#x00027;s <italic>d</italic> &#x0003D; 0.57 , &#x003BC;<sub>3</sub> &#x0003D; 12.62 &#x000B1; 5.18 , <italic>n</italic> = 711; &#x003BC;<sub>4</sub> &#x0003D; 15.49 &#x000B1; 4.96 , <italic>n</italic> = 749). The Classical Test Theory results indicated that the test had good reliability with Cronbach&#x00027;s &#x003B1; &#x0003D; 0.85 , levels of discrimination, and a wide range of question difficulties. Item Response Theory was employed to support these findings and indicated that the test was better suited at evaluating and discrimination between students with low and medium abilities.</p>
</sec>
<sec>
<title>2.2. Psychometric analysis</title>
<p>The objective of this study is to compare the psychometric properties of the BCTt and cCTt for students in grades 3 and 4. Classical Test Theory and Item Response Theory are two complementary (De Champlain, <xref ref-type="bibr" rid="B24">2010</xref>; Awopeju and Afolabi, <xref ref-type="bibr" rid="B4">2016</xref>) approaches typically employed to analyse the validity and reliability of scales and assessments. The Classical Test Theory and Item Response Theory (IRT) analyses are conducted in R (version 4.2.1, R Core Team, <xref ref-type="bibr" rid="B59">2019</xref>) using the following packages: lavaan (version 0.6-11, Rosseel, <xref ref-type="bibr" rid="B67">2012</xref>), CTT (version 2.3.3, Willse, <xref ref-type="bibr" rid="B85">2018</xref>), psych (version 2.1.3, Revelle, <xref ref-type="bibr" rid="B63">2021</xref>), mirt (version 1.36.1, Chalmers, <xref ref-type="bibr" rid="B12">2012</xref>), and subscore (version 3.3, Dai et al., <xref ref-type="bibr" rid="B22">2022</xref>).</p>
<sec>
<title>2.2.1. Classical test theory</title>
<p>Classical Test Theory &#x0201C;comprises a set of principles that allow us to determine how successful our proxy indicators are at estimating the unobservable variables of interest&#x0201D; (DeVellis, <xref ref-type="bibr" rid="B26">2006</xref>). Classical test theory focuses on test scores (Hambleton and Jones, <xref ref-type="bibr" rid="B35">1993</xref>) and computes:</p>
<list list-type="bullet">
<list-item><p>Reliability of the scale using Cronbach&#x00027;s &#x003B1; measurement of internal consistency of scales (Bland and Altman, <xref ref-type="bibr" rid="B9">1997</xref>). In the context of assessments, 0.7 &#x0003C; &#x003B1; &#x0003C; 0.9 is considered high and 0.5 &#x0003C; &#x003B1; &#x0003C; 0.7 is considered moderate (Hinton et al., <xref ref-type="bibr" rid="B36">2014</xref>; Taherdoost, <xref ref-type="bibr" rid="B73">2016</xref>). The drop alpha is computed per question as it indicates of the reliability of the test without said question, and thus whether the internal consistency of the test improves without it.</p></list-item>
<list-item><p>Item difficulty index, i.e., the proportion of correct responses. <italic>Please note that this means that a question with a high difficulty index is an easy question</italic>. Determining whether questions are too easy or too difficult is often based on arbitrary thresholds which vary around what are considered to be ideal item difficulties. Indeed, some researchers have posited that item difficulties should vary between 0.4 and 0.6 as these are claimed to have maximum discrimination indices (Vincent and Shanmugam, <xref ref-type="bibr" rid="B78">2020</xref>). As such, thresholds employed in the literature have varied around these values, with items being classified as difficult for a range of thresholds between 0.1 and 0.3, and items being classified as easy for a range of thresholds varying between 0.7 and 0.9.</p>
<p>In this study, to remain coherent with the first cCTt validation in grades 3&#x02013;4, we consider that questions with a difficulty index above 0.85 are too easy, while those with a difficulty index below 0.25 are too hard and could be revised.</p>
</list-item>
<list-item><p>Point biserial correlation, or item discrimination. This is a measure of discrimination between the high ability examinees and low ability examinees. A point-biserial correlation above 0.15 is recommended, with good items generally having point biserial correlations above 0.25 (Varma, <xref ref-type="bibr" rid="B77">2006</xref>). In this article, we consider a threshold of 0.2 , which is commonly employed in the field (Chae et al., <xref ref-type="bibr" rid="B11">2019</xref>).</p></list-item>
</list>
<p>Unfortunately, Classical Test Theory suffers from several limitations, including that the analysis is sample-dependent (Hambleton and Jones, <xref ref-type="bibr" rid="B35">1993</xref>). As such, analyzing an instrument from the lens of Classical Test Theory on two different populations may not yield consistent results. The literature thus recommends employing Item Response Theory to complement the results of Classical Test Theory.</p>
</sec>
<sec>
<title>2.2.2. Item Response Theory (IRT)</title>
<p>According to Hambleton and Jones (<xref ref-type="bibr" rid="B35">1993</xref>), (i) IRT <italic>is sample independent</italic> so scores describing examinee proficiency are not dependent on the test difficulty, (ii) test items can be matched to ability levels, and (iii) the test models do not require strict parallel tests to assess reliability. This is because IRT models the link between a students&#x00027; latent ability and their probability of correctly answering a question. Indeed, by evaluating the tests&#x00027; questions with respect to latent ability:</p>
<list list-type="bullet">
<list-item><p>The results are more likely to be sample independent, and therefore more likely to generalize beyond a specific sample of learners (Xie et al., <xref ref-type="bibr" rid="B88">2019</xref>), thus providing consistency between two different populations.</p></list-item>
<list-item><p>Item Response Theory is more adapted to compare multiple assessments through the latent ability scale (Jabrayilov et al., <xref ref-type="bibr" rid="B39">2016</xref>; Dai et al., <xref ref-type="bibr" rid="B21">2020</xref>), and thus including cases where different populations have taken the tests. Comparing two assessments can indeed be done in cases where the instruments measure the same latent traits (Xie et al., <xref ref-type="bibr" rid="B88">2019</xref>), which we believe is possible in the present case because both instruments measure the same CT-concepts, using the same symbols. This can be verified through Confirmatory Factor Analysis, as done by Kong and Lai (<xref ref-type="bibr" rid="B43">2022</xref>).</p></list-item>
</list>
<p>Item Response Theory models estimate the probability of a person of a given ability (measured in standard deviations from the mean) answering each question correctly. This is visualized through a logistic Item Characteristic Curve (ICC) for each question. As <xref ref-type="fig" rid="F2">Figure 2A</xref> shows, an item&#x00027;s difficulty (<italic>b</italic><sub><italic>i</italic></sub>) is the <italic>x</italic>-value (&#x003B8;) where the ICC reaches a <italic>y</italic> &#x0003D; 0.5 probability of answering correctly, and represents the number of standard deviations from the mean the question difficulty is. Items to the left of the graph are considered easier while items on the right are considered harder. According to De Ayala and Little (<xref ref-type="bibr" rid="B23">2022</xref>), &#x0201C;typical item and person locations fall within -3 to &#x0002B;3&#x0201D;, with easy items having scores below -2, average items having scores between -2 and &#x0002B;2 and hard items having scores above &#x0002B;2.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>IRT Theory plots. <bold>(A)</bold> Item Characteristic Curves for four items of equal discrimination (slope) and varying difficulty (using a 1-PL model on the cCTt test data). <bold>(B)</bold> Item Characteristic Curves (ICC) for four items (blue, red, green, purple) of varying difficulty and discrimination (using a 2-PL model on cCTt test data). <bold>(C)</bold> Item Information Curves (IICs) for the items in <bold>(B)</bold>. <bold>(D)</bold> Test Information Function (TIF, in blue) for the four items from Panels <bold>(B)</bold> and <bold>(C)</bold> (IIC, in black), and the standard error of measurement (SEM, in red).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-13-1082659-g0002.tif"/>
</fig>
<p>Several IRT models exist for binary response data, however given the low sample size (Sahin and Anil, <xref ref-type="bibr" rid="B70">2017</xref>), we focus on one parameter logistic (1-PL) and 2-PL models. While 1-PL models consider that only difficulty varies across items, 2-PL models also take into account that some questions can discriminate more or less well between students of different ability, and thus exhibit varying ICC slopes. In the example in <xref ref-type="fig" rid="F2">Figure 2B</xref>, blue and red items are of equal difficulty <italic>b</italic><sub><italic>i</italic></sub> (<italic>y</italic> &#x0003D; 0.5 crossing) and relatively similar discrimination <italic>a</italic><sub><italic>i</italic></sub> , while items green and purple are of equal difficulty and varying discrimination. As the blue item is steeper, it has a higher discrimination than the black and green items. According to De Ayala and Little (<xref ref-type="bibr" rid="B23">2022</xref>), reasonably good discrimination values range from approximately 0.8&#x02013;2.5. Indeed, questions with steeper ICC slopes are better suited at discriminating between students at a given ability, while questions with lower discrimination power have more gentle slopes.</p>
<p>Items that discriminate better (steeper ICC slopes) thus provide more information about the ability level at which students are likely to start answering correctly, which results in higher bell shaped Item Information Curves, or IICs. The bell shaped curves in <xref ref-type="fig" rid="F2">Figure 2C</xref> represent the amount of information <italic>I</italic><sub><italic>i</italic></sub> provided for each of the test&#x00027;s items according to the student&#x00027;s ability &#x003B8; . These IICs vary in both maximum value (dependent on the item&#x00027;s discriminability, i.e., the ICC slope), and the <italic>x</italic>-value at which they reach it (the item&#x00027;s difficulty). Here, the blue and red curves, as well as the green and purple curves, have the same difficulty (they both reach their maximum around <italic>x</italic> = -2 and <italic>x</italic> = 0, respectively), but are of different discriminability: the blue item discriminates more than the red, the red more than the green and the green more than the purple (steeper ICC slope, and higher maximum IIC value).</p>
<p>Taking into account the different test items and the amount of information provided by each question, one can obtain the resulting Test Information Function (TIF) and Standard Error of Measurements (SEM). In <xref ref-type="fig" rid="F2">Figure 2D</xref>, the TIF (blue) is the sum of the instrument&#x00027;s IICs from <xref ref-type="fig" rid="F2">Figures 2B</xref>,<xref ref-type="fig" rid="F2">C</xref>, while the SEM is the square root of the variance. The TIF shows that the instrument displays maximum information around -2 and provides more information in the low-medium ability range than in the high ability range. The SEM (red) is at its lowest where the test provides the most information (maximum of the TIF) and at its highest where the test provides the least information (minimum of the TIF).</p>
<p>Please note that prior to applying IRT, it is recommended to verify whether the data meets the unidimensionality criteria. If the unidimensionality criteria is not met, the higher the misspecification, then the higher the impact on the estimated parameters, and in particular on the discriminatoin parameter (with little impact on the difficulty parameter, Kahraman, <xref ref-type="bibr" rid="B40">2013</xref>; Rajlic, <xref ref-type="bibr" rid="B60">2019</xref>). The unidimensionality criteria can be verified through Confirmatory Factor Analysis (CFA) as done by Kong and Lai (<xref ref-type="bibr" rid="B43">2022</xref>) for instance. As the input data is binary (with a score of 0 or 1 per question), the CFA analysis is conducted using an estimator which is adapted to non-normal data and employs diagonally weighted least squared and robust estimators to estimate the model parameters (Schweizer et al., <xref ref-type="bibr" rid="B71">2015</xref>; Rosseel, <xref ref-type="bibr" rid="B68">2020</xref>).</p>
<p>When analyzing the results of IRT, as in the case of Confirmatory Factor Analysis, and other similar statistical approaches, multiple fit indices should be considered to establish the goodness of fit of the model. Model fit indices include the following metrics:</p>
<list list-type="bullet">
<list-item><p>The chi-square &#x003C7;<sup>2</sup> statistic which should have <inline-formula><mml:math id="M1"><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>&#x003C7;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:msub><mml:mo>&#x0003E;</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>05</mml:mn></mml:math></inline-formula> . However, the larger the sample, the larger the &#x003C7;<sup>2</sup> statistic, and the lower the <italic>p</italic>-value (Prudon, <xref ref-type="bibr" rid="B58">2015</xref>; Alavi et al., <xref ref-type="bibr" rid="B2">2020</xref>). The literature therefore suggests employing the ratio between the &#x003C7;<sup>2</sup> statistic and the degrees of freedom with a cutoff at &#x003C7;<sup>2</sup>/<italic>df</italic> &#x02264; 3 (Kyriazos, <xref ref-type="bibr" rid="B45">2018</xref>). At the individual item level for IRT models, Orlando and Thissen&#x00027;s signed &#x003C7;<sup>2</sup> statistic (<italic>S</italic>&#x02212;&#x003C7;<sup>2</sup> ) is recommended, with a ratio of &#x003C7;<sup>2</sup>/<italic>df</italic> &#x02264; 5 being acceptable (Wheaton et al., <xref ref-type="bibr" rid="B84">1977</xref>; Kong and Lai, <xref ref-type="bibr" rid="B43">2022</xref>) and a ratio below 3 being considered good.</p></list-item>
<list-item><p>The root mean square error of approximation or RMSEA which should be &#x0003C; 0.06 for good fit and &#x0003C; 0.08 for acceptable fit (Hu and Bentler, <xref ref-type="bibr" rid="B37">1999</xref>; Chen et al., <xref ref-type="bibr" rid="B13">2008</xref>; Xia and Yang, <xref ref-type="bibr" rid="B87">2019</xref>).</p></list-item>
<list-item><p>The standardized root mean square residual or SRMR (Hu and Bentler, <xref ref-type="bibr" rid="B37">1999</xref>; Xia and Yang, <xref ref-type="bibr" rid="B87">2019</xref>) which should be &#x0003C; 0.08 .</p></list-item>
<list-item><p>The comparative fit index (CFI) and Tucker Lewis index (TLI) with values &#x0003E;0.95 indicating a good fit, and acceptable values being &#x0003E;0.90 (Kong and Lai, <xref ref-type="bibr" rid="B43">2022</xref>).</p></list-item>
</list>
<p>Finally, more specifically to IRT, are</p>
<list list-type="bullet">
<list-item><p>Yen (<xref ref-type="bibr" rid="B90">1984</xref>)&#x00027;s Q3 statistic to measure local independence which requires that none of the pairs of item residuals have a high correlation to ensure that local independence is not violated for the given model type. Critical values for the Q3 statistic are often arbitrary (Christensen et al., <xref ref-type="bibr" rid="B17">2017</xref>) (e.g., 0.2 Christensen et al., <xref ref-type="bibr" rid="B17">2017</xref>; Kong and Lai, <xref ref-type="bibr" rid="B43">2022</xref> or 0.3 Marais, <xref ref-type="bibr" rid="B51">2012</xref>). As in our case the sample size is small (around 200 for the cCTt and 300 for the BCTt), and the number of items is high, the threshold of 0.3 is chosen as a critical value as the Q3 statistic is expected to be higher here than in cases with large samples and low number of items (Christensen et al., <xref ref-type="bibr" rid="B17">2017</xref>). Similarly, as the number of items is high, the critical values are also expected to be higher (Christensen et al., <xref ref-type="bibr" rid="B17">2017</xref>). As such, we consider the 0.3 threshold for the present study.</p>
<p>The Q3 statistic is computed once the model with the best fit has been selected.</p>
</list-item>
<list-item><p>The <italic>M</italic><sub>2</sub> statistics by Maydeu-Olivares and Joe &#x0201C;which have been found to be effective in evaluating the goodness of fit of IRT models&#x0201D; (Kong and Lai, <xref ref-type="bibr" rid="B43">2022</xref>).</p></list-item>
<list-item><p>The IRT reliability for each ability &#x003B8; which is &#x0201C;closely related to test information and standard error, as it concerns the measurement precision and can be calculated with the equation <italic>r</italic> &#x0003D; 1&#x02212;<italic>SEM</italic>(&#x003B8;<sup>2</sup>)&#x0201D; (Kong and Lai, <xref ref-type="bibr" rid="B43">2022</xref>) where SEM represents the SEM for each ability.</p></list-item>
<list-item><p>Wainer and Thissen (<xref ref-type="bibr" rid="B79">2001</xref>)&#x00027;s marginal reliability metric (<italic>r</italic><sub><italic>xx</italic></sub>) which &#x0201C;denotes the ratio of the true score variance to the total variance, expressed with respect to the estimated latent abilities&#x0201D; (Andersson and Xin, <xref ref-type="bibr" rid="B3">2018</xref>).</p></list-item>
</list>
</sec>
</sec>
<sec>
<title>2.3. Participants and data collection</title>
<p>To compare the instruments, we used data collected by researchers and practitioners using the BCTt and cCTt in a study looking to evaluate the impact of a CT intervention conducted in public schools in Portugal. The recruitment for the intervention was done in three stages. First a call was sent out to schools and teachers to ask whether they were interested in participating in the CT intervention which included a pre-post test assessment using either the BCTt (in spring 2020) or the cCTt (in spring 2021). Secondly, teachers who were interested were briefed about the intervention and the assessments before agreeing or not to participate with their classrooms. Thirdly, consent forms were sent out to the parents of the concerned students.</p>
<p>The administration of both instruments was done in the classrooms following the protocol established for the BCTt, and its adaptation for the cCTt. In order to compare the instruments and avoid biases from the interventions themselves (whose goals and outcomes are outside the scope of this article), we only consider the results of the pre-tests administered to 575 students prior to the interventions (El-Hamamsy et al., <xref ref-type="bibr" rid="B30">2022b</xref>).<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref> More specifically, we analyse the results of the BCTt pre-test administered in March 2020 to 374 students in grades 3&#x02013;4, and the results of the cCTt pre-test administered in April 2021 to 201 other students in grades 3&#x02013;4 (see <xref ref-type="table" rid="T3">Table 3</xref>). All participants were enrolled in the same school districts in Portugal and did not have any prior experience with the CT-concepts measured with the instruments, as this is not part of the national curriculum. Please note that while the populations are not identical, they are considered to be comparable, and a comparison of both instruments is possible through the lens of IRT which is sample agnostic (see section 2.2.2) and complements the results of Classical Test Theory which may be subject to sample dependency. Comparing the properties of the instruments on two distinct samples also helps avoid the testing-effect, i.e., having students&#x00027; performance improve on the second instrument because the questions employ the same modalities as the first instrument, and are therefore familiar and easier due to practice, rather than being due to a difference between the instruments (Knapp, <xref ref-type="bibr" rid="B42">2016</xref>).</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Participants.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="center" colspan="6"><bold>Number of participants per grade</bold></th>
</tr>
<tr>
<th valign="top" align="left"><bold>Test</bold></th>
<th valign="top" align="left"><bold>Gender</bold></th>
<th valign="top" align="center"><bold>Grade 3</bold></th>
<th valign="top" align="center"><bold>Grade 4</bold></th>
<th valign="top" align="center"><bold>Undisclosed</bold></th>
<th valign="top" align="center"><bold>Total</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">BCTt</td>
<td valign="top" align="left">Female</td>
<td valign="top" align="center">80</td>
<td valign="top" align="center">82</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">167</td>
</tr>
<tr>
<td/>
<td valign="top" align="left">Male</td>
<td valign="top" align="center">78</td>
<td valign="top" align="center">61</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">145</td>
</tr>
<tr>
<td/>
<td valign="top" align="left">Undisclosed</td>
<td/>
<td/>
<td valign="top" align="center">62</td>
<td valign="top" align="center">62</td>
</tr>
<tr>
<td/>
<td valign="top" align="left">Total</td>
<td valign="top" align="center">158</td>
<td valign="top" align="center">143</td>
<td valign="top" align="center">73</td>
<td valign="top" align="center">374</td>
</tr>
<tr>
<td valign="top" align="left">cCTt</td>
<td valign="top" align="left">Female</td>
<td valign="top" align="center">36</td>
<td valign="top" align="center">68</td>
<td/>
<td valign="top" align="center">104</td>
</tr>
<tr>
<td/>
<td valign="top" align="left">Male</td>
<td valign="top" align="center">38</td>
<td valign="top" align="center">59</td>
<td/>
<td valign="top" align="center">97</td>
</tr>
<tr>
<td/>
<td valign="top" align="left">Total</td>
<td valign="top" align="center">74</td>
<td valign="top" align="center">127</td>
<td/>
<td valign="top" align="center">201</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3. Results</title>
<sec>
<title>3.1. Score distribution</title>
<p>The distribution of scores obtained in the two tests (both out of a maximum of 25 points) is shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. The Shapiro-Wilk test of normality indicates that the distribution of the cCTt is normal (<italic>p</italic>&#x0003E;0.05 , fails to reject <italic>H</italic><sub>0</sub> ) and that the distribution of BCTt is not (<italic>p</italic> &#x0003C; 0.0001 , rejects <italic>H</italic><sub>0</sub> ). This is due to a ceiling effect, which is apparent for the BCTt (skew &#x0003D; &#x02212;1.23 , kurtosis &#x0003D; 1.98 ), but is not present in the case of the cCTt (skew &#x0003D; &#x02212;0.07 , kurtosis &#x0003D; &#x02212;0.13 ).<xref ref-type="fn" rid="fn0004"><sup>4</sup></xref> Neither instrument shows significant differences in scores between genders [one-way ANOVA <italic>F</italic><sub><italic>BCTt</italic></sub>(1) &#x0003D; 0.19 , <italic>p</italic><sub><italic>BCTt</italic></sub> &#x0003D; 0.67 ; one-way ANOVA <italic>F</italic><sub><italic>cCTt</italic></sub>(1) &#x0003D; 0.03 , <italic>p</italic><sub><italic>cCTt</italic></sub> &#x0003D; 0.86 ].</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Score distribution for the BCTt and cCTt. The histogram and boxplots show the ceiling effect of the BCTt while the cCTt exhibits a normal distribution centered around 15/25 (i.e., 60%).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-13-1082659-g0003.tif"/>
</fig>
<p>Where the BCTt is concerned, students in grade 4 (&#x003BC;<sub>4</sub> &#x0003D; 20.62 &#x000B1; 3.66 ) perform significantly better than students in grade 3 (&#x003BC;<sub>3</sub> &#x0003D; 19.18 &#x000B1; 4.16 ). Indeed, the one-way ANOVA indicates that the difference between grades is significant [<italic>F</italic><sub>(1)</sub> &#x0003D; 10.18 , <italic>p</italic> &#x0003D; 0.0016 , &#x00394;&#x003BC; &#x0003D; 1.44 out of 25 ] with a medium-small effect size (Cohen&#x00027;s <italic>d</italic> &#x0003D; 0.37 <xref ref-type="fn" rid="fn0005"><sup>5</sup></xref> Lakens, <xref ref-type="bibr" rid="B46">2013</xref>). This would appear to confirm the progression between grades on the BCTt observed in the original BCTt validation.</p>
<p>Where the cCTt is concerned, no significant differences exist between grades [one-way ANOVA <italic>F</italic><sub>(1)</sub> &#x0003D; 1.63 , <italic>p</italic> &#x0003D; 0.2 ]. The lack of distinction between grades in this sample is related to the fact that the grade 3 students are performing well on the test (&#x003BC; &#x0003D; 14.64 &#x000B1; 3.75 out of 25 ), and specifically as well as the grade 4 students (&#x003BC; &#x0003D; 15.45 &#x000B1; 4.68 ). Indeed, in the first study validating the cCTt, the grade 3 students scored an average of &#x003BC; &#x0003D; 12.62 &#x000B1; 5.18 (<italic>n</italic> = 711) and the grade 4 students &#x003BC; &#x0003D; 15.49 &#x000B1; 4.96 (<italic>n</italic> = 749) out of 25 .</p>
</sec>
<sec>
<title>3.2. Classical Test Theory</title>
<p>Cronbach&#x00027;s &#x003B1; (Bland and Altman, <xref ref-type="bibr" rid="B9">1997</xref>) measurement of internal consistency of scales was used as an indicator of the instruments&#x00027; reliability. According to the thresholds of Hinton et al. (<xref ref-type="bibr" rid="B36">2014</xref>) and Taherdoost (<xref ref-type="bibr" rid="B73">2016</xref>), both instruments exhibit high reliability (&#x003B1;<sub><italic>BCTt</italic></sub> &#x0003D; 0.82&#x0003E;0.7 , &#x003B1;<sub><italic>cCTt</italic></sub> &#x0003D; 0.78&#x0003E;0.7 ). Nonetheless, the individual item difficulties (i.e., the proportion of correct answers) and point biserial correlations (i.e., the difference between the high scorers and the low scorers of the sample population) provide useful insights into the developmental appropriateness of the instruments, by indicating which items could be revised to improve the validity of the instruments for the target populations.</p>
<p><xref ref-type="fig" rid="F4">Figure 4</xref> shows that both instruments present questions of decreasing difficulty (i.e., that are harder). The BCTt counts 13 questions which are above the maximum difficulty index threshold (i.e., are too easy) for the target age group, as opposed to 5 for the cCTt (including the 3 that were too easy in the original cCTt validation). The cCTt also exhibits two questions which are too hard (the same ones as in the original cCTt validation), which is not the case of the BCTt. Indeed, as <xref ref-type="fig" rid="F4">Figure 4</xref> shows, the BCTt covers a smaller range of item difficulties (BCTt difficulty indices min = 0.97 , max = 0.49 , range = 0.48 ; cCTt difficulty indices min = 0.96 , max = 0.18 , range = 0.79 ), lacking items in the lower half of the difficulty index range.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Classical Test Theory&#x02014;Item Difficulty indices (i.e., the proportion of correct responses) on the left, and Point-Biserial Correlation on the right. Items with difficulty indices above the 0.85 threshold are considered too easy while items with difficulty indices below the 0.25 threshold are considered too difficult. Items with a point-biserial correlation above the 0.2 threshold are considered acceptable while those above 0.25 are considered good.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-13-1082659-g0004.tif"/>
</fig>
<p>In terms of point-biserial correlation (see <xref ref-type="fig" rid="F4">Figure 4</xref>), questions that could be revised for students in grades 3&#x02013;4 are those below the 0.2 threshold. The metric indicates that only one item could be revised for the BCTt (question 24), while four items of the cCTt could be revised (questions 2, 17, 22, and 24). Interestingly, most of these questions were among the most difficult ones for the students.</p>
<p><xref ref-type="table" rid="T4">Table 4</xref> reports the Classical Test Theory analysis results for all questions in the two tests. Accounting for both difficulty indices and point biserial correlation, the number of questions that could be revised for students in grades 3 and 4 are higher for the BCTt (<italic>n</italic> &#x0003D; 14 ) than the cCTt (<italic>n</italic> &#x0003D; 8 ), as can be seen in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Full BCTt (Cronbach&#x00027;s &#x003B1;<sub><italic>BCTt</italic></sub> &#x0003D; 0.82 ) and cCTt (Cronbach&#x00027;s &#x003B1;<sub><italic>cCTt</italic></sub> &#x0003D; 0.78 ) Classical Test Theory Analysis.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th/>
<th/>
<th valign="top" align="center"><bold>BCTt</bold></th>
<th/>
<th/>
<th/>
<th/>
<th valign="top" align="center"><bold>cCTt</bold></th>
<th/>
<th/>
<th/>
</tr>
<tr>
<th valign="top" align="left"><bold>Q</bold></th>
<th valign="top" align="center"><bold>Difficulty index</bold></th>
<th valign="top" align="center"><bold>std</bold></th>
<th valign="top" align="center"><bold>PBC</bold></th>
<th valign="top" align="center"><bold>Drop alpha</bold></th>
<th valign="top" align="center"><bold>Revision</bold></th>
<th valign="top" align="center" style="border-left: thin solid #000000;"><bold>Q</bold></th>
<th valign="top" align="center"><bold>Difficulty index</bold></th>
<th valign="top" align="center"><bold>std</bold></th>
<th valign="top" align="center"><bold>PBC</bold></th>
<th valign="top" align="center"><bold>Drop alpha</bold></th>
<th valign="top" align="center"><bold>Revision</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center"><bold>0.95</bold></td>
<td valign="top" align="center">0.22</td>
<td valign="top" align="center">0.27</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">1</td>
<td valign="top" align="center"><bold>0.96</bold></td>
<td valign="top" align="center">0.19</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center"><bold>0.97</bold></td>
<td valign="top" align="center"><bold>0.16</bold></td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">2</td>
<td valign="top" align="center"><bold>0.96</bold></td>
<td valign="top" align="center">0.19</td>
<td valign="top" align="center"><bold>0.11</bold></td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center"><bold>0.96</bold></td>
<td valign="top" align="center">0.2</td>
<td valign="top" align="center">0.3</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">3</td>
<td valign="top" align="center">0.73</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">0.32</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center"><bold>0.91</bold></td>
<td valign="top" align="center">0.29</td>
<td valign="top" align="center">0.41</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">4</td>
<td valign="top" align="center"><bold>0.86</bold></td>
<td valign="top" align="center">0.35</td>
<td valign="top" align="center">0.23</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="center"><bold>0.9</bold></td>
<td valign="top" align="center">0.3</td>
<td valign="top" align="center">0.46</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">5</td>
<td valign="top" align="center">0.69</td>
<td valign="top" align="center">0.46</td>
<td valign="top" align="center">0.32</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">6</td>
<td valign="top" align="center"><bold>0.92</bold></td>
<td valign="top" align="center">0.27</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">6</td>
<td valign="top" align="center"><bold>0.88</bold></td>
<td valign="top" align="center">0.32</td>
<td valign="top" align="center">0.38</td>
<td valign="top" align="center">0.77</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">7</td>
<td valign="top" align="center"><bold>0.85</bold></td>
<td valign="top" align="center">0.35</td>
<td valign="top" align="center">0.37</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">7</td>
<td valign="top" align="center">0.77</td>
<td valign="top" align="center">0.42</td>
<td valign="top" align="center">0.3</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">8</td>
<td valign="top" align="center"><bold>0.91</bold></td>
<td valign="top" align="center">0.29</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">8</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">0.38</td>
<td valign="top" align="center">0.37</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">9</td>
<td valign="top" align="center"><bold>0.92</bold></td>
<td valign="top" align="center">0.27</td>
<td valign="top" align="center">0.41</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">9</td>
<td valign="top" align="center"><bold>0.86</bold></td>
<td valign="top" align="center">0.35</td>
<td valign="top" align="center">0.29</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">10</td>
<td valign="top" align="center"><bold>0.92</bold></td>
<td valign="top" align="center">0.27</td>
<td valign="top" align="center">0.32</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">10</td>
<td valign="top" align="center">0.58</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.43</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">11</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.37</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">0.82</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">11</td>
<td valign="top" align="center">0.61</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.46</td>
<td valign="top" align="center">0.76</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">12</td>
<td valign="top" align="center"><bold>0.93</bold></td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">0.42</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">12</td>
<td valign="top" align="center">0.73</td>
<td valign="top" align="center">0.45</td>
<td valign="top" align="center">0.46</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">13</td>
<td valign="top" align="center"><bold>0.9</bold></td>
<td valign="top" align="center">0.3</td>
<td valign="top" align="center">0.43</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">13</td>
<td valign="top" align="center">0.67</td>
<td valign="top" align="center">0.47</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center">0.76</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">14</td>
<td valign="top" align="center">0.59</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.43</td>
<td valign="top" align="center">0.81</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">14</td>
<td valign="top" align="center">0.6</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.32</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">15</td>
<td valign="top" align="center">0.57</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.43</td>
<td valign="top" align="center">0.81</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">15</td>
<td valign="top" align="center">0.59</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.76</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">16</td>
<td valign="top" align="center">0.79</td>
<td valign="top" align="center">0.41</td>
<td valign="top" align="center">0.37</td>
<td valign="top" align="center">0.82</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">16</td>
<td valign="top" align="center">0.56</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">17</td>
<td valign="top" align="center"><bold>0.92</bold></td>
<td valign="top" align="center">0.27</td>
<td valign="top" align="center">0.32</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">17</td>
<td valign="top" align="center"><bold>0.2</bold></td>
<td valign="top" align="center">0.4</td>
<td valign="top" align="center"><bold>0.19</bold></td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">18</td>
<td valign="top" align="center">0.57</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.52</td>
<td valign="top" align="center">0.81</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">18</td>
<td valign="top" align="center">0.59</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.23</td>
<td valign="top" align="center">0.78</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">19</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">0.42</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">0.82</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">19</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.28</td>
<td valign="top" align="center">0.78</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">20</td>
<td valign="top" align="center">0.61</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.39</td>
<td valign="top" align="center">0.82</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">20</td>
<td valign="top" align="center">0.27</td>
<td valign="top" align="center">0.45</td>
<td valign="top" align="center">0.3</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">21</td>
<td valign="top" align="center">0.59</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.35</td>
<td valign="top" align="center">0.82</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">21</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.28</td>
<td valign="top" align="center">0.78</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">22</td>
<td valign="top" align="center">0.54</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.37</td>
<td valign="top" align="center">0.82</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">22</td>
<td valign="top" align="center">0.38</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center"><bold>0.15</bold></td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">23</td>
<td valign="top" align="center">0.79</td>
<td valign="top" align="center">0.41</td>
<td valign="top" align="center">0.29</td>
<td valign="top" align="center">0.82</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">23</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.3</td>
<td valign="top" align="center">0.77</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">24</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center"><bold>0.15</bold></td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">24</td>
<td valign="top" align="center"><bold>0.18</bold></td>
<td valign="top" align="center">0.38</td>
<td valign="top" align="center"><bold>0.1</bold></td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">25</td>
<td valign="top" align="center">0.78</td>
<td valign="top" align="center">0.41</td>
<td valign="top" align="center">0.33</td>
<td valign="top" align="center">0.82</td>
<td/>
<td valign="top" align="center" style="border-left: thin solid #000000;">25</td>
<td valign="top" align="center">0.31</td>
<td valign="top" align="center">0.46</td>
<td valign="top" align="center">0.26</td>
<td valign="top" align="center">0.78</td>
<td/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Q, question; Difficulty index, proportion of correct responses; std, standard deviation; PBC, Point-Biserial Correlation. Items that are too easy (i.e., &#x003BC;&#x0003E;0.85 ), too difficult (i.e., &#x003BC; &#x0003C; 0.25 ), or with a low point-biserial correlation (&#x0003C; 0.2 ) are marked in bold as elements which could be revised.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>3.3. Item Response Theory (IRT)</title>
<sec>
<title>3.3.1. Verifying the unidimensionality to compare instruments through Confirmatory Factor Analysis</title>
<p>One criteria required to compare instruments through IRT is that the data measure the same latent trait. We thus employed Confirmatory Factor Analysis (CFA) as done by Kong and Lai (<xref ref-type="bibr" rid="B43">2022</xref>), with a Diagonally Weighted Least Squares estimator to account for the binary inputs (see <xref ref-type="table" rid="T5">Table 5</xref> for the fit indices). The Kaiser, Meyer, Olkin (KMO) measure of sampling adequacy indicates that the data is appropriate for factor analysis in both cases. Bartlett&#x00027;s test of sphericity also suggests that there is sufficient significant correlation in the data for factor analysis. For the full instruments (with 25 items) the model fit indices are also adequate in terms of the &#x003C7;<sup>2</sup> criteria statistic, the CFI and TLI indices for both instruments. The RMSEA is below 0.6 in both cases. Finally, the SRMR is considered acceptable for the cCTt and just shy of the limit for the BCTt (<italic>SRMR</italic><sub><italic>BCTt</italic></sub> &#x0003D; 0.084 ). The modification indices for the BCTt-CFA indicate high correlations between 3 items from the BCTt (Q14, Q15, and Q18) which address the notions of complex loops. Removing item 15 from the factor analysis improves the model fit and meets the threshold requirements for the different fit indices (see <xref ref-type="table" rid="T5">Table 5</xref>). Furthermore, we exclude items with low CFA factor loadings (&#x0003C; 0.2 ) from the IRT analysis. Please note that all remaining items have significant factor loadings and that the excluded items correspond to questions which have low point biserial correlations (namely Q24 in the BCTt, and Q2, Q17, Q22, and Q24 in the cCTt). The corresponding fit indices for the final 1 factor CFA are provided in <xref ref-type="table" rid="T5">Table 5</xref>. With these adjustments, a 1 factor structure appears suitable for both instruments (when excluding Q15 and Q24 from the BCTt, and Q2, Q17, Q22, and Q24 from the cCTt).</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Confirmatory factor analysis fit indices for unidimensionality.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th valign="top" align="left"><bold>Modification</bold></th>
<th valign="top" align="left"><bold>KMO</bold></th>
<th valign="top" align="left"><bold>Bartlett&#x00027;s test of sphericity</bold></th>
<th valign="top" align="left"><bold>&#x003C7;<sup>2</sup></bold></th>
<th valign="top" align="left"><bold>&#x003C7;<sup>2</sup>/df</bold></th>
<th valign="top" align="left"><bold>CFI</bold></th>
<th valign="top" align="left"><bold>TLI</bold></th>
<th valign="top" align="left"><bold>RMSEA</bold></th>
<th valign="top" align="left"><bold>SRMR</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">BCTt (25 items)</td>
<td/>
<td valign="top" align="left">0.84</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(300) &#x0003D; 2026, <italic>p</italic> &#x0003C; 0.001</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(275) &#x0003D; 464, <italic>p</italic> &#x0003C; 0.001</td>
<td valign="top" align="left">True</td>
<td valign="top" align="left">0.92</td>
<td valign="top" align="left">0.913</td>
<td valign="top" align="left">0.43, 90<italic>%ci</italic> &#x0003D; [0.036 &#x02212; 0.05]</td>
<td valign="top" align="left">0.084</td>
</tr>
<tr>
<td valign="top" align="left">BCTt (24 items)</td>
<td valign="top" align="left">Removing Q15 (high correlations with Q14 % Q18)</td>
<td valign="top" align="left">0.84</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(276) &#x0003D; 1825, <italic>p</italic> &#x0003C; 0.001</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(252) &#x0003D; 357, <italic>p</italic> &#x0003C; 0.001</td>
<td valign="top" align="left">True</td>
<td valign="top" align="left">0.945</td>
<td valign="top" align="left">0.940</td>
<td valign="top" align="left">0.033, 90<italic>%ci</italic> &#x0003D; [0.025 &#x02212; 0.041]</td>
<td valign="top" align="left">0.076</td>
</tr>
<tr>
<td valign="top" align="left">BCTt (23 items)</td>
<td valign="top" align="left">Removing Q24 (low factor loading)</td>
<td valign="top" align="left">0.84</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(253) &#x0003D; 1779, <italic>p</italic> &#x0003C; .001</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(230) &#x0003D; 322, <italic>p</italic> &#x0003C; 0.001</td>
<td valign="top" align="left">True</td>
<td valign="top" align="left">0.951</td>
<td valign="top" align="left">0.946</td>
<td valign="top" align="left">0.033, 90<italic>%ci</italic> &#x0003D; [0.024 &#x02212; 0.041]</td>
<td valign="top" align="left">0.074</td>
</tr>
<tr>
<td valign="top" align="left">cCTt (25 items)</td>
<td/>
<td valign="top" align="left">0.75</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(300) &#x0003D; 877, <italic>p</italic> &#x0003C; 0.001</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(275) &#x0003D; 350, <italic>p</italic> &#x0003D; 0.001</td>
<td valign="top" align="left">True</td>
<td valign="top" align="left">0.935</td>
<td valign="top" align="left">0.929</td>
<td valign="top" align="left">0.037, 90<italic>%ci</italic> &#x0003D; [0.024 &#x02212; 0.049]</td>
<td valign="top" align="left">0.077</td>
</tr>
<tr>
<td valign="top" align="left">cCTt (21 items)</td>
<td valign="top" align="left">Removing Q2, Q17, Q22, Q24 (low factor loading)</td>
<td valign="top" align="left">0.77</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(210) &#x0003D; 761, <italic>p</italic> &#x0003C; .001</td>
<td valign="top" align="left">&#x003C7;<sup>2</sup>(189) &#x0003D; 216, <italic>p</italic> &#x0003C; 0.089</td>
<td valign="top" align="left">True</td>
<td valign="top" align="left">0.975</td>
<td valign="top" align="left">0.972</td>
<td valign="top" align="left">0.027, 90<italic>%ci</italic> &#x0003D; [0.000 &#x02212; 0.043]</td>
<td valign="top" align="left">0.071</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>3.3.2. Comparing the instruments</title>
<p>As indicated previously, we only consider the 1-PL and 2-PL models in our study due to the low sample sizes which prevent us from finding stable solutions in the case of the 3-PL model and prevent us from converging in the case of the 4-PL model (see global model fit indices for the 1-PL and 2-PL models in <xref ref-type="table" rid="T6">Table 6</xref>). For both the BCTt and the cCTt, the 2-PL model was selected as an ANOVA indicated that the 2-PL model improved the fit significantly compared to the 1-PL model in both cases [<inline-formula><mml:math id="M2"><mml:msubsup><mml:mrow><mml:mi>&#x003C7;</mml:mi></mml:mrow><mml:mrow><mml:mi>B</mml:mi><mml:mi>C</mml:mi><mml:mi>T</mml:mi><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>22</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>62</mml:mn><mml:mo>.</mml:mo><mml:mn>92</mml:mn></mml:math></inline-formula>, <italic>p</italic><sub><italic>BCTt</italic></sub> &#x0003C; 0.0001, <inline-formula><mml:math id="M3"><mml:msubsup><mml:mrow><mml:mi>&#x003C7;</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>C</mml:mi><mml:mi>T</mml:mi><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>20</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>79</mml:mn><mml:mo>.</mml:mo><mml:mn>84</mml:mn></mml:math></inline-formula>, <italic>p</italic><sub><italic>cCTt</italic></sub> &#x0003C; 0.0001]. Individual item discrimination, difficulties, and fit indices are provided for the 2-PL models in <xref ref-type="table" rid="T7">Table 7</xref>. The results indicate that the &#x003C7;<sup>2</sup>/<italic>df</italic> &#x0003C; 3 criterion is achieved for all items, and that all but three items have RMSEA just shy of the 0.6 threshold (considering that the rounded values would be equal to 0.6 these can be considered acceptable, Ockey and Choi, <xref ref-type="bibr" rid="B55">2015</xref>). We then verify the local independence using Yen (<xref ref-type="bibr" rid="B90">1984</xref>)&#x00027;s Q3 statistic and find that it is below the 0.3 threshold for all pairs of items in the BCTt and in the cCTt.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>IRT model parameter fit indices for 1-PL and 2-PL models with the BCTt and cCTt.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th/>
<th valign="top" align="center"><bold><italic>M</italic><sub>2</sub></bold></th>
<th valign="top" align="center"><bold><italic>df</italic></bold></th>
<th valign="top" align="center"><bold><italic>p</italic></bold></th>
<th valign="top" align="center"><bold>RMSEA</bold></th>
<th valign="top" align="center"><bold>ci RMSEA 5%</bold></th>
<th valign="top" align="center"><bold>ci RMSEA 95%</bold></th>
<th valign="top" align="center"><bold>SRMR</bold></th>
<th valign="top" align="center"><bold>TLI</bold></th>
<th valign="top" align="center"><bold>CFI</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">BCTt (23 items)</td>
<td valign="top" align="center">1-PL</td>
<td valign="top" align="center">514</td>
<td valign="top" align="center">253</td>
<td valign="top" align="center">0.000</td>
<td valign="top" align="center">0.053</td>
<td valign="top" align="center">0.046</td>
<td valign="top" align="center">0.059</td>
<td valign="top" align="center">0.098</td>
<td valign="top" align="center">0.929</td>
<td valign="top" align="center">0.929</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">2-PL</td>
<td valign="top" align="center">415</td>
<td valign="top" align="center">230</td>
<td valign="top" align="center">0.000</td>
<td valign="top" align="center">0.046</td>
<td valign="top" align="center">0.039</td>
<td valign="top" align="center">0.053</td>
<td valign="top" align="center">0.068</td>
<td valign="top" align="center">0.945</td>
<td valign="top" align="center">0.950</td>
</tr>
<tr>
<td valign="top" align="left">cCTt (21 items)</td>
<td valign="top" align="center">1-PL</td>
<td valign="top" align="center">392</td>
<td valign="top" align="center">210</td>
<td valign="top" align="center">0.000</td>
<td valign="top" align="center">0.067</td>
<td valign="top" align="center">0.056</td>
<td valign="top" align="center">0.077</td>
<td valign="top" align="center">0.102</td>
<td valign="top" align="center">0.849</td>
<td valign="top" align="center">0.849</td>
</tr>
<tr>
<td/>
<td valign="top" align="center">2-PL</td>
<td valign="top" align="center">294</td>
<td valign="top" align="center">189</td>
<td valign="top" align="center">0.000</td>
<td valign="top" align="center">0.053</td>
<td valign="top" align="center">0.041</td>
<td valign="top" align="center">0.065</td>
<td valign="top" align="center">0.075</td>
<td valign="top" align="center">0.903</td>
<td valign="top" align="center">0.913</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>BCTt and cCTt item parameters and fit indices.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th/>
<th/>
<th/>
<th valign="top" align="center"><bold>BCTt</bold></th>
<th/>
<th/>
<th/>
<th/>
<th/>
<th/>
<th/>
<th valign="top" align="center"><bold>cCTt</bold></th>
<th/>
<th/>
<th/>
</tr>
<tr>
<th valign="top" align="left"><bold>Item</bold></th>
<th valign="top" align="center"><bold>Dscr</bold></th>
<th valign="top" align="center"><bold>Dffc</bold></th>
<th valign="top" align="center"><bold>S-&#x003C7;<sup>2</sup></bold></th>
<th valign="top" align="center"><bold>df S-&#x003C7;<sup>2</sup></bold></th>
<th valign="top" align="center"><bold>RMSEA</bold></th>
<th valign="top" align="center"><bold>p S-&#x003C7;<sup>2</sup></bold></th>
<th valign="top" align="center"><bold>S-&#x003C7;<sup>2</sup>/df</bold></th>
<th valign="top" align="center" style="border-left: thin solid #000000;"><bold>Item</bold></th>
<th valign="top" align="center"><bold>Dscr</bold></th>
<th valign="top" align="center"><bold>Dffc</bold></th>
<th valign="top" align="center"><bold>S-&#x003C7;<sup>2</sup></bold></th>
<th valign="top" align="center"><bold>df S-&#x003C7;<sup>2</sup></bold></th>
<th valign="top" align="center"><bold>RMSEA</bold></th>
<th valign="top" align="center"><bold>p S-&#x003C7;<sup>2</sup></bold></th>
<th valign="top" align="center"><bold>S-&#x003C7;<sup>2</sup>/df</bold></th>
</tr>
<tr>
<th/>
<th/>
<th/>
<th/>
<th/>
<th valign="top" align="center"><bold>S-&#x003C7;<sup>2</sup></bold></th>
<th/>
<th/>
<th style="border-left: thin solid #000000;"/>
<th/>
<th/>
<th/>
<th/>
<th valign="top" align="center"><bold>S-&#x003C7;<sup>2</sup></bold></th>
<th/>
<th/>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Q1</td>
<td valign="top" align="center">1.29</td>
<td valign="top" align="center">-2.8</td>
<td valign="top" align="center">11.5</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">0.034</td>
<td valign="top" align="center">0.175</td>
<td valign="top" align="center">1.44</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q1</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">-2.38</td>
<td valign="top" align="center">3.3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">0.058</td>
<td valign="top" align="center">0.191</td>
<td valign="top" align="center">1.66</td>
</tr>
<tr>
<td valign="top" align="left">Q2</td>
<td valign="top" align="center">2.79</td>
<td valign="top" align="center">-2.28</td>
<td valign="top" align="center">2.3</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">0.019</td>
<td valign="top" align="center">0.323</td>
<td valign="top" align="center">1.13</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q3</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">-1.27</td>
<td valign="top" align="center">13.7</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.035</td>
<td valign="top" align="center">0.251</td>
<td valign="top" align="center">1.24</td>
</tr>
<tr>
<td valign="top" align="left">Q3</td>
<td valign="top" align="center">1.73</td>
<td valign="top" align="center">-2.47</td>
<td valign="top" align="center">3.2</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.786</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q4</td>
<td valign="top" align="center">0.76</td>
<td valign="top" align="center">-2.6</td>
<td valign="top" align="center">15.2</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.052</td>
<td valign="top" align="center">0.124</td>
<td valign="top" align="center">1.52</td>
</tr>
<tr>
<td valign="top" align="left">Q4</td>
<td valign="top" align="center">1.82</td>
<td valign="top" align="center">-1.85</td>
<td valign="top" align="center">13.8</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.026</td>
<td valign="top" align="center">0.245</td>
<td valign="top" align="center">1.25</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q5</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">-1</td>
<td valign="top" align="center">11.6</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.017</td>
<td valign="top" align="center">0.392</td>
<td valign="top" align="center">1.06</td>
</tr>
<tr>
<td valign="top" align="left">Q5</td>
<td valign="top" align="center">2.41</td>
<td valign="top" align="center">-1.6</td>
<td valign="top" align="center">20.3</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">0.058</td>
<td valign="top" align="center">0.016</td>
<td valign="top" align="center">2.26</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q6</td>
<td valign="top" align="center">2.01</td>
<td valign="top" align="center">-1.56</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">0.038</td>
<td valign="top" align="center">0.254</td>
<td valign="top" align="center">1.28</td>
</tr>
<tr>
<td valign="top" align="left">Q6</td>
<td valign="top" align="center">1.66</td>
<td valign="top" align="center">-2.03</td>
<td valign="top" align="center">13.1</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.022</td>
<td valign="top" align="center">0.29</td>
<td valign="top" align="center">1.19</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q7</td>
<td valign="top" align="center">1.02</td>
<td valign="top" align="center">-1.4</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.447</td>
<td valign="top" align="center">1</td>
</tr>
<tr>
<td valign="top" align="left">Q7</td>
<td valign="top" align="center">1.38</td>
<td valign="top" align="center">-1.67</td>
<td valign="top" align="center">15.8</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">0.029</td>
<td valign="top" align="center">0.2</td>
<td valign="top" align="center">1.32</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q8</td>
<td valign="top" align="center">1.46</td>
<td valign="top" align="center">-1.45</td>
<td valign="top" align="center">9.5</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">0.016</td>
<td valign="top" align="center">0.396</td>
<td valign="top" align="center">1.05</td>
</tr>
<tr>
<td valign="top" align="left">Q8</td>
<td valign="top" align="center">2.56</td>
<td valign="top" align="center">-1.6</td>
<td valign="top" align="center">11.8</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">0.029</td>
<td valign="top" align="center">0.224</td>
<td valign="top" align="center">1.31</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q9</td>
<td valign="top" align="center">1.47</td>
<td valign="top" align="center">-1.67</td>
<td valign="top" align="center">16.2</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">0.064</td>
<td valign="top" align="center">0.062</td>
<td valign="top" align="center">1.8</td>
</tr>
<tr>
<td valign="top" align="left">Q9</td>
<td valign="top" align="center">2.16</td>
<td valign="top" align="center">-1.82</td>
<td valign="top" align="center">8.6</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.574</td>
<td valign="top" align="center">0.86</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q10</td>
<td valign="top" align="center">1.52</td>
<td valign="top" align="center">-0.33</td>
<td valign="top" align="center">14.1</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.046</td>
<td valign="top" align="center">0.167</td>
<td valign="top" align="center">1.41</td>
</tr>
<tr>
<td valign="top" align="left">Q10</td>
<td valign="top" align="center">1.41</td>
<td valign="top" align="center">-2.26</td>
<td valign="top" align="center">18.2</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.042</td>
<td valign="top" align="center">0.077</td>
<td valign="top" align="center">1.66</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q11</td>
<td valign="top" align="center">1.76</td>
<td valign="top" align="center">-0.4</td>
<td valign="top" align="center">5.3</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.871</td>
<td valign="top" align="center">0.53</td>
</tr>
<tr>
<td valign="top" align="left">Q11</td>
<td valign="top" align="center">1.42</td>
<td valign="top" align="center">-1.55</td>
<td valign="top" align="center">14.6</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">0.018</td>
<td valign="top" align="center">0.334</td>
<td valign="top" align="center">1.12</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q12</td>
<td valign="top" align="center">1.98</td>
<td valign="top" align="center">-0.82</td>
<td valign="top" align="center">14.3</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">0.063</td>
<td valign="top" align="center">0.075</td>
<td valign="top" align="center">1.79</td>
</tr>
<tr>
<td valign="top" align="left">Q12</td>
<td valign="top" align="center">2.02</td>
<td valign="top" align="center">-1.95</td>
<td valign="top" align="center">9.9</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.451</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q13</td>
<td valign="top" align="center">2.75</td>
<td valign="top" align="center">-0.53</td>
<td valign="top" align="center">7.9</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">0.026</td>
<td valign="top" align="center">0.338</td>
<td valign="top" align="center">1.13</td>
</tr>
<tr>
<td valign="top" align="left">Q13</td>
<td valign="top" align="center">1.93</td>
<td valign="top" align="center">-1.73</td>
<td valign="top" align="center">13.3</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.024</td>
<td valign="top" align="center">0.275</td>
<td valign="top" align="center">1.21</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q14</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">-0.49</td>
<td valign="top" align="center">17.1</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.061</td>
<td valign="top" align="center">0.071</td>
<td valign="top" align="center">1.71</td>
</tr>
<tr>
<td valign="top" align="left">Q14</td>
<td valign="top" align="center">1.31</td>
<td valign="top" align="center">-0.36</td>
<td valign="top" align="center">14.7</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.035</td>
<td valign="top" align="center">0.145</td>
<td valign="top" align="center">1.47</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q15</td>
<td valign="top" align="center">2.35</td>
<td valign="top" align="center">-0.3</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">0.057</td>
<td valign="top" align="center">0.112</td>
<td valign="top" align="center">1.62</td>
</tr>
<tr>
<td valign="top" align="left">Q16</td>
<td valign="top" align="center">1.25</td>
<td valign="top" align="center">-1.37</td>
<td valign="top" align="center">19.3</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">0.036</td>
<td valign="top" align="center">0.113</td>
<td valign="top" align="center">1.49</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q16</td>
<td valign="top" align="center">1.09</td>
<td valign="top" align="center">-0.27</td>
<td valign="top" align="center">5.7</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.837</td>
<td valign="top" align="center">0.57</td>
</tr>
<tr>
<td valign="top" align="left">Q17</td>
<td valign="top" align="center">1.43</td>
<td valign="top" align="center">-2.22</td>
<td valign="top" align="center">11.8</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.014</td>
<td valign="top" align="center">0.383</td>
<td valign="top" align="center">1.07</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q18</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center">-0.75</td>
<td valign="top" align="center">16.3</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">0.043</td>
<td valign="top" align="center">0.178</td>
<td valign="top" align="center">1.36</td>
</tr>
<tr>
<td valign="top" align="left">Q18</td>
<td valign="top" align="center">1.77</td>
<td valign="top" align="center">-0.26</td>
<td valign="top" align="center">8.6</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">0.014</td>
<td valign="top" align="center">0.379</td>
<td valign="top" align="center">1.07</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q19</td>
<td valign="top" align="center">0.62</td>
<td valign="top" align="center">-0.22</td>
<td valign="top" align="center">9.2</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.687</td>
<td valign="top" align="center">0.77</td>
</tr>
<tr>
<td valign="top" align="left">Q19</td>
<td valign="top" align="center">1.21</td>
<td valign="top" align="center">-1.3</td>
<td valign="top" align="center">11.4</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.01</td>
<td valign="top" align="center">0.408</td>
<td valign="top" align="center">1.04</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q20</td>
<td valign="top" align="center">0.65</td>
<td valign="top" align="center">1.66</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.032</td>
<td valign="top" align="center">0.284</td>
<td valign="top" align="center">1.2</td>
</tr>
<tr>
<td valign="top" align="left">Q20</td>
<td valign="top" align="center">1.13</td>
<td valign="top" align="center">-0.5</td>
<td valign="top" align="center">9.2</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.607</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q21</td>
<td valign="top" align="center">0.49</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center">13.4</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">0.024</td>
<td valign="top" align="center">0.343</td>
<td valign="top" align="center">1.11</td>
</tr>
<tr>
<td valign="top" align="left">Q21</td>
<td valign="top" align="center">0.85</td>
<td valign="top" align="center">-0.52</td>
<td valign="top" align="center">7.8</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.733</td>
<td valign="top" align="center">0.71</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q23</td>
<td valign="top" align="center">0.63</td>
<td valign="top" align="center">0.38</td>
<td valign="top" align="center">9.5</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.0</td>
<td valign="top" align="center">0.577</td>
<td valign="top" align="center">0.86</td>
</tr>
<tr>
<td valign="top" align="left">Q22</td>
<td valign="top" align="center">1.08</td>
<td valign="top" align="center">-0.18</td>
<td valign="top" align="center">17</td>
<td valign="top" align="center">10</td>
<td valign="top" align="center">0.043</td>
<td valign="top" align="center">0.073</td>
<td valign="top" align="center">1.7</td>
<td valign="top" align="center" style="border-left: thin solid #000000;">Q25</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">1.66</td>
<td valign="top" align="center">11.3</td>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.011</td>
<td valign="top" align="center">0.421</td>
<td valign="top" align="center">1.02</td>
</tr>
<tr>
<td valign="top" align="left">Q23</td>
<td valign="top" align="center">0.86</td>
<td valign="top" align="center">-1.74</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">0.032</td>
<td valign="top" align="center">0.157</td>
<td valign="top" align="center">1.39</td>
<td style="border-left: thin solid #000000;"/>
<td/>
<td/>
<td/>
<td/>
<td/>
<td/>
<td/>
</tr>
<tr>
<td valign="top" align="left">Q25</td>
<td valign="top" align="center">0.92</td>
<td valign="top" align="center">-1.63</td>
<td valign="top" align="center">14.2</td>
<td valign="top" align="center">13</td>
<td valign="top" align="center">0.016</td>
<td valign="top" align="center">0.357</td>
<td valign="top" align="center">1.1</td>
<td style="border-left: thin solid #000000;"/>
<td/>
<td/>
<td/>
<td/>
<td/>
<td/>
<td/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Dscr, Discrimination; Dffc, Difficulty.</p>
</table-wrap-foot>
</table-wrap>
<p>The results of the IRT analyses are shown in <xref ref-type="fig" rid="F5">Figures 5A</xref>&#x02013;<xref ref-type="fig" rid="F5">D</xref>. While the Item Characteristic curves (<xref ref-type="fig" rid="F5">Figure 5A</xref>) appear to indicate that the BCTt questions have higher &#x0201C;discrimination power&#x0201D; than the cCTt questions, this difference is not significant [one-way ANOVA <italic>F</italic><sub>(1)</sub> &#x0003D; 3.11 , <italic>p</italic> &#x0003D; 0.085 , see <xref ref-type="fig" rid="F6">Figure 6</xref>]. This means that both tests are as good at discriminating between students, however where they discriminate best differs<xref ref-type="fn" rid="fn0006"><sup>6</sup></xref>. The Item Information Curves (<xref ref-type="fig" rid="F5">Figure 5B</xref>) shows that the BCTt questions provide most information in the low ability range, while the Item Information is more distributed along the low-medium range for the cCTt. The resulting TIFs (<xref ref-type="fig" rid="F5">Figure 5C</xref>) therefore confirm that the BCTt is better at discriminating between students with low ability, while the cCTt is better at discriminating between low-medium abilities. As such, the IRT findings support that the cCTt overall fits grade 3&#x02013;4 individuals and it decently works all along the ability range.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Item Response Theory curves for the BCTt and the cCTt. <bold>(A)</bold> Item Characteristic Curves (ICC). The figure shows that the items have varying difficulties and discrimination (slopes), with BCTt items showing higher discriminability in the low ability range and cCTt items showing higher discriminability in the low and medium ability ranges. <bold>(B)</bold> Item Response Theory Item Information Curves (IIC). Items in both instruments provide varying amount of information at different ability levels. Similarly to the ICC curves in Panel <bold>(A)</bold>, the information of the BCTt is mainly in the low ability range, while the information of the cCTt is in the low and medium ability ranges. Item Response Theory curves for the BCTt and the cCTt. <bold>(C)</bold> Test information function (TIF). The TIF being the sum of each instruments&#x00027; Item Information Curves [see Panel <bold>(B)</bold>], the results confirm prior observations: the BCTt provides most of its information in the low ability range while the cCTt provides most information in the low and medium ability ranges. <bold>(D)</bold> Reliability at different ability levels. The figures show that both instruments have low reliability in the high ability range. The BCTt reliability peak is shifted toward the lower ability range while the cCTt reliability peak is toward the medium ability range. Please note that the marginal reliability <italic>r</italic><sub><italic>xx</italic></sub> for the BCTt is <italic>r</italic><sub><italic>xx</italic></sub>(<italic>BCTt</italic>) &#x0003D; 0.75, and for the cCTt <italic>r</italic><sub><italic>xx</italic></sub>(<italic>cCTt</italic>) &#x0003D; 0.80.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-13-1082659-g0005.tif"/>
</fig>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Item Response Theory BCTt&#x02013;cCTt item discrimination comparison [one-way ANOVA <italic>F</italic><sub>(1)</sub> &#x0003D; 3.11 , <italic>p</italic> &#x0003D; 0.085 ].</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-13-1082659-g0006.tif"/>
</fig>
</sec>
</sec>
<sec>
<title>3.4. Limitations</title>
<p>As in all studies, the study presents certain limitations. Aside the inherent limitations pertaining to the specific use of Classical Test Theory and Item Response Theory which are well documented in the literature, the following elements are specific to the current study.</p>
<p>The instruments were tested on two populations from different schools, one year apart, and may thus differ in their CT abilities. While the students in the same grades should be expected to have the same level of CT-skills, this may not be the case. However, certain elements help mitigate this risk and counter the limitation: the schools are in the same country and district and thus follow the same mandatory curriculum (which does not include CS or CT), the measurements took place at the same time of the academic year, and we employed IRT as it tends to be sample agnostic.</p>
<p>The relatively small sample sizes prevented us from testing more complex models, such as 3-PL and 4-PL models. Indeed, larger sample sizes, in particular for the cCTt (<italic>n</italic> = 200), would have likely improved the model fit and reliability of the item difficulty and discrimination indices. These indices should only be considered as indicative of where the test provides more information, also since the IRT analysis was conducted on a subset of the items to meet the unidimensionality criteria. However, please note that the IRT analysis was also conducted with the full subset of items (although not presented in the article) and lead to the same conclusions. Such an analysis is possible as the violation of the unidimentionality criteria leads to &#x0201C;an overestimation of the discrimination parameter, (ii) with little impact on the difficulty estimation&#x0201D; (Kahraman, <xref ref-type="bibr" rid="B40">2013</xref>; Rajlic, <xref ref-type="bibr" rid="B60">2019</xref>), with &#x0201C;the impact on the estimated parameters [being] smaller the closer we are to the unidimensionality criteria&#x0201D; (Kahraman, <xref ref-type="bibr" rid="B40">2013</xref>; Rajlic, <xref ref-type="bibr" rid="B60">2019</xref>). Given the small samples and the fact that the IRT parameters were estimated on a subset of the items, it would be best to avoid using the IRT parameter estimates of the present study, in particular for the cCTt, to estimate the students&#x00027; abilities on the latent ability scale.</p>
</sec>
</sec>
<sec id="s4">
<title>4. Recommendations for the use of the BCTt and the cCTt</title>
<p>Considering (i) the present BCTt-cCTt comparison, (ii) the results of the BCTt validation conducted by Zapata-C&#x000E1;ceres et al. (<xref ref-type="bibr" rid="B93">2020</xref>) over grades 1&#x02013;6, and (iii) the cCTt validation conducted by El-Hamamsy et al. (<xref ref-type="bibr" rid="B29">2022a</xref>) over grades 3&#x02013;4, we propose the following recommendations with respect to these two instruments for grades 3&#x02013;4:</p>
<list list-type="bullet">
<list-item><p>The cCTt should be preferred for grades 3&#x02013;4 as it differentiates better between students in this age group and ability level, in addition to discriminating moderately well along the entire ability range. The cCTt is thus better suited to evaluate the efficacy of the intervention itself, in a pre- post-test design.</p></list-item>
<list-item><p>The BCTt could be employed for low-ability students in grades 3&#x02013;4, depending on the assessors&#x00027; prior knowledge of the context and the students being assessed given the good discriminability the BCTt offers in grades 3&#x02013;4 for low ability students.</p></list-item>
<list-item><p>The BCTt could be employed as a screening mechanism to identify low-ability students which could prove useful for practitioners prior to an intervention, e.g., to ensure that the intervention is well-tailored to the abilities of the students and ensure that nobody is &#x0201C;left behind.&#x0201D;</p></list-item>
</list>
</sec>
<sec id="s5">
<title>5. Discussion and conclusion</title>
<p>The BCTt and the cCTt are two instruments that expand the portfolio of validated CT assessments, in particular, at the level of primary education. These instruments overlap in their target age ranges, notably in grades 3&#x02013;4, and had not yet been compared psychometrically for those age groups. This study thus looked to establish the limits of validity of the two instruments by providing a detailed comparison of their psychometric properties on data acquired from 575 students (374 doing the BCTt and 201 doing the cCTt). Indeed, as:</p>
<list list-type="order">
<list-item><p>The BCTt and the cCTt were validated in different countries, and thus potentially different contexts</p></list-item>
<list-item><p>There were only <italic>n</italic> = 52 grade 4 students in the BCTt validation, and <italic>n</italic> = 0 grade 3 students, with limited psychometric analyses conducted for the BCTt in those grades specifically.</p></list-item>
</list>
<p>The present study looked to conduct a detailed psychometric analysis of the BCTt in grades 3&#x02013;4 (which was not yet conducted) and compare the validity of the two instruments on a large and comparable pool of grade 3&#x02013;4 students from a third, and single, country.</p>
<p>The findings from the psychometric analyses of the two instruments help re-establish their validity in grades 3 and 4 with both a new population and with students from a new country (here <italic>n</italic> = 575in Portugal, while the cCTt was validated with <italic>n</italic> = 1,519 grade 3&#x02013;4 students in Switzerland, El-Hamamsy et al., <xref ref-type="bibr" rid="B29">2022a</xref>, and the BCTt with <italic>n</italic> = 299 grade 1&#x02013;6 students in Spain, Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>). Where the cCTt is concerned, while there were no differences between students in grades 3&#x02013;4 in the present sample, the general conclusions drawn from the Classical Test Theory analysis and overall IRT are coherent with those obtained by El-Hamamsy et al. (<xref ref-type="bibr" rid="B29">2022a</xref>). Where the BCTt is concerned, the results confirm the ceiling effect observed in grade 4 in the original study (Zapata-C&#x000E1;ceres et al., <xref ref-type="bibr" rid="B93">2020</xref>) and extend it to students in grade 3 who were not part of the initial pool of students who were administered the BCTt. The psychometric comparison indicates that <italic>the cCTt should be preferred for students in grades 3 and 4</italic>, as students already have a good assimilation of basic CT concepts pertaining to sequences and loops. Therefore, students in grades 3&#x02013;4 perform too well on the easier BCTt (which employs smaller 3 &#x000D7; 3 grids), giving rise to a ceiling effect. The <italic>BCTt should instead be preferred if the objective is to discriminate between students with low abilities in grades 3 and 4</italic>.</p>
<p>The findings are consistent with other studies that found that simple loops are already mastered in early primary school (Montuori et al., <xref ref-type="bibr" rid="B52">2022</xref>), with very young students (starting 3 years old) already being able to solve algorithmic problems and their results improving with age (Piatti et al., <xref ref-type="bibr" rid="B57">2022</xref>). As CT skills relate to students&#x00027; numerical, verbal, and non-verbal reasoning abilities (Tsarava et al., <xref ref-type="bibr" rid="B76">2022</xref>), it is likely that the findings align with students&#x00027; maturation, increase in working memory (which is required to achieve tasks, Cowan, <xref ref-type="bibr" rid="B20">2016</xref>), and executive functions over time. Therefore, as students get older, they should be able to deal with more complex computational concepts (e.g., conditionals and while loops), including those with more complex perceptual configurations (e.g., the 4 &#x000D7; 4 grids), corroborating the differences observed between both instruments. Future work should therefore consider continuing to refine the limits of validity of the instruments. Indeed, refinement studies are common in educational psychology, with similar work having already been undertaken for (i) the original CTt (aimed at 10&#x02013;16 year old students) to improve it&#x00027;s validity for 16 year old students and above (Guggemos et al., <xref ref-type="bibr" rid="B34">2022</xref>), and (ii) The TechCheck and it&#x00027;s variants to improve the validity for kindergarden students (Relkin et al., <xref ref-type="bibr" rid="B62">2020</xref>; Relkin and Bers, <xref ref-type="bibr" rid="B61">2021</xref>).</p>
<p>Two key takeaways emerge from the present study:</p>
<list list-type="order">
<list-item><p>The importance of building and validating CT assessments for each specific age: children in the early stages of education undergo rapid cognitive development, so an instrument designed for a specific age range is likely to be too difficult for those immediately younger and too easy for those immediately older.</p></list-item>
<list-item><p>The importance of psychometrically comparing existing, overlapping CT instruments to establish their limits of validity. By providing detailed comparisons, researchers and practitioners may be able to choose the assessment in an informed way, and in accordance with their requirements and objectives.</p></list-item>
</list>
<p>As numerous researchers have put forward, instruments such as the BCTt and the cCTt should be combined with other forms of assessments in a systems of assessments (Grover et al., <xref ref-type="bibr" rid="B33">2015</xref>; Rom&#x000E1;n-Gonz&#x000E1;lez et al., <xref ref-type="bibr" rid="B64">2019</xref>; Weintrop et al., <xref ref-type="bibr" rid="B81">2021a</xref>) to accurately measure the full range of competencies at play when considering CT (Brennan and Resnick, <xref ref-type="bibr" rid="B10">2012</xref>; Piatti et al., <xref ref-type="bibr" rid="B57">2022</xref>). The systems of assessments could therefore include other instruments which assess CT practices such as the test by Li et al. (<xref ref-type="bibr" rid="B48">2021</xref>), employ direct observations of students&#x00027; thought processes and strategies (Lye and Koh, <xref ref-type="bibr" rid="B49">2014</xref>; Chevalier et al., <xref ref-type="bibr" rid="B16">2020</xref>), or learning analytics and educational data mining techniques (Cock et al., <xref ref-type="bibr" rid="B19">2021</xref>; Nasir et al., <xref ref-type="bibr" rid="B54">2021</xref>; Zapata-C&#x000E1;ceres and Mart&#x000ED;n-Barroso, <xref ref-type="bibr" rid="B92">2021</xref>). Complementary assessments would not only help gain a more accurate and in-depth picture of student learning but also feed into the learning activity design and intervention process (Chevalier et al., <xref ref-type="bibr" rid="B15">2022</xref>). For completeness, the system of assessments should also include instruments that measure CT perspectives (e.g., such as those developed for high school, Yagci, <xref ref-type="bibr" rid="B89">2019</xref> and undergraduates, Korkmaz et al., <xref ref-type="bibr" rid="B44">2017</xref>).</p>
<p>Provided that validation is a multi-step process that requires &#x0201C;collect[ing] multiple sources of evidence to support the proposed interpretation and use of assessment result[s] [and] multiple methodologies, sources of data, and types of analysis&#x0201D; (Gane et al., <xref ref-type="bibr" rid="B32">2021</xref>), it is important to note that the BCTt and cCTt may still undergo further validation by including evidence of criterion validity. This can be achieved through several means. The first is comparing with other existing validated assessments. For instance, Relkin et al. (<xref ref-type="bibr" rid="B62">2020</xref>) compared the TechCheck with the TACTIC-KIBO, while (Li et al., <xref ref-type="bibr" rid="B48">2021</xref>) went one step further and correlated the CTA-CES with reasoning, spatial abilities, and verbal abilities. The second is establishing the test&#x00027;s predictive validity, for example by establishing whether the instrument can predict academic performance and coding achievement as done by Rom&#x000E1;n-Gonz&#x000E1;lez et al. (<xref ref-type="bibr" rid="B66">2018</xref>). The third is determining the instruments&#x00027; concurrent validity, that is to say seeing whether the instrument is able to distinguish between two groups that differ, for instance novices and experts, or according to students expressed digital proficiency as done by Li et al. (<xref ref-type="bibr" rid="B48">2021</xref>).</p>
</sec>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The data presented in this study can be found on Zenodo (El-Hamamsy et al., <xref ref-type="bibr" rid="B30">2022b</xref>).</p>
</sec>
<sec sec-type="ethics-statement" id="s7">
<title>Ethics statement</title>
<p>The studies involving human participants were reviewed and approved by Comit&#x000E9; de &#x000C9;tica de la Investigaci&#x000F3;n de la Universidad Rey Juan Carlos. Written informed consent to participate in this study was provided by the participants&#x00027; legal guardian/next of kin.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>LEH, MZC, PM, BB, and EMB: conceptualization. LEH, MZC, and PM: methodology. MRG: validation. LEH: formal analysis, writing&#x02014;original draft and preparation, and visualization. PM: investigation. PM and LEH: data curation. LEH, MZC, PM, JD, BB, EMB, and MRG: writing&#x02013;review and editing. BB and EMB: supervision. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>This work was supported by (i) the NCCR Robotics, a National Centre of Competence in Research, funded by the Swiss National Science Foundation (grant number 51NF40_185543), (ii) the Madrid Regional Government through the project e-Madrid-CM (P2018/TCS-4307) which is co-financed by the Structural Funds (FSE and FEDER), and (iii) the Funda&#x000E7;&#x000E3;o Calouste Gulbenkian (Gulbenkian Programme for Knowledge).</p>
</sec>
<ack><p>We would like to thank Professor Jean-Philippe Antonietti for answering our questions pertaining to Confirmatory Factor Analysis and Item Response Theory.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn0001"><p><sup>1</sup>Please note that the full BCTt is available upon request to the co-authors of Zapata-C&#x000E1;ceres et al. (<xref ref-type="bibr" rid="B93">2020</xref>), and the cCTt items are presented in El-Hamamsy et al. (<xref ref-type="bibr" rid="B29">2022a</xref>) with an editable version available upon request to the co-authors of the article.</p></fn>
<fn id="fn0002"><p><sup>2</sup>In the original validation of the BCTt by Zapata-C&#x000E1;ceres et al. (<xref ref-type="bibr" rid="B93">2020</xref>) they obtained average scores of &#x003BC;<sub><italic>grade</italic>1</sub> &#x0003D; 16.52 &#x000B1; 3.31 , &#x003BC;<sub><italic>grade</italic>2</sub> &#x0003D; 16.78 &#x000B1; 2.49 , &#x003BC;<sub><italic>grade</italic>4</sub> &#x0003D; 21.57 &#x000B1; 3.04 , &#x003BC;<sub><italic>grade</italic>5</sub> &#x0003D; 21.84 &#x000B1; 2.61 , &#x003BC;<sub><italic>grade</italic>6</sub> &#x0003D; 21.72 &#x000B1; 2.62 out of 25. Please note that their sample did not include grade 3 students.</p></fn>
<fn id="fn0003"><p><sup>3</sup>The data is available on Zenodo (El-Hamamsy et al., <xref ref-type="bibr" rid="B30">2022b</xref>).</p></fn>
<fn id="fn0004"><p><sup>4</sup>Skew (i.e., the asymmetry of a distribution) and kurtosis (i.e., the location of the peak of a distribution) of a normal distribution are close to 0 (Kim, <xref ref-type="bibr" rid="B41">2013</xref>).</p></fn>
<fn id="fn0005"><p><sup>5</sup>Cohen&#x00027;s <italic>d</italic> effect size is a quantitative measure of the magnitude of the observed difference. It is a standardized measure of the difference between the two means which is calculated by dividing the difference of the means by the standard deviation. Cohen suggested that 0.2 is a small effect size, 0.5 a medium effect size, and 0.8 a large effect size (Lakens, <xref ref-type="bibr" rid="B46">2013</xref>).</p></fn>
<fn id="fn0006"><p><sup>6</sup>The &#x0201C;discrimination power&#x0201D; of the instrument which relates to how high the discrimination is over all the questions of the assessment and is provided by slope of the ICCs, maximum values of the IICs. This is related to where the assessment, and thus the individual questions, discriminate best (which is provided by the <italic>y</italic> &#x0003D; 0.5 crossing of the ICCs, or the peak of the IICs).</p></fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aho</surname> <given-names>A. V.</given-names></name></person-group> (<year>2012</year>). <article-title>Computation and computational thinking</article-title>. <source>Comput. J.</source> <volume>55</volume>, <fpage>832</fpage>&#x02013;<lpage>835</lpage>. <pub-id pub-id-type="doi">10.1093/comjnl/bxs074</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alavi</surname> <given-names>M.</given-names></name> <name><surname>Visentin</surname> <given-names>D. C.</given-names></name> <name><surname>Thapa</surname> <given-names>D. K.</given-names></name> <name><surname>Hunt</surname> <given-names>G. E.</given-names></name> <name><surname>Watson</surname> <given-names>R.</given-names></name> <name><surname>Cleary</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>Chi-square for model fit in confirmatory factor analysis</article-title>. <source>J. Adv. Nurs.</source> <volume>76</volume>, <fpage>2209</fpage>&#x02013;<lpage>2211</lpage>. <pub-id pub-id-type="doi">10.1111/jan.14399</pub-id><pub-id pub-id-type="pmid">32323338</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Andersson</surname> <given-names>B.</given-names></name> <name><surname>Xin</surname> <given-names>T.</given-names></name></person-group> (<year>2018</year>). <article-title>Large sample confidence intervals for item response theory reliability coefficients</article-title>. <source>Educ. Psychol. Meas.</source> <volume>78</volume>, <fpage>32</fpage>&#x02013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1177/0013164417713570</pub-id><pub-id pub-id-type="pmid">29795945</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Awopeju</surname> <given-names>O. A.</given-names></name> <name><surname>Afolabi</surname> <given-names>E. R. I.</given-names></name></person-group> (<year>2016</year>). <article-title>Comparative analysis of classical test theory and item response theory based item parameter estimates of senior school certificate mathematics examination</article-title>. <source>Eur. Sci. J.</source> <volume>12</volume>:<fpage>263</fpage>. <pub-id pub-id-type="doi">10.19044/esj.2016.v12n28p263</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bakala</surname> <given-names>E.</given-names></name> <name><surname>Gerosa</surname> <given-names>A.</given-names></name> <name><surname>Hourcade</surname> <given-names>J. P.</given-names></name> <name><surname>Tejera</surname> <given-names>G.</given-names></name></person-group> (<year>2021</year>). <article-title>Preschool children, robots, and computational thinking: a systematic review</article-title>. <source>Int. J. Child Comput. Interact.</source> <volume>29</volume>:<fpage>100337</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijcci.2021.100337</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Basu</surname> <given-names>S.</given-names></name> <name><surname>Rutstein</surname> <given-names>D.</given-names></name> <name><surname>Xu</surname> <given-names>Y.</given-names></name> <name><surname>Shear</surname> <given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;A principled approach to designing a computational thinking practices assessment for early grades,&#x0201D;</article-title> in <source>SIGCSE &#x00027;20: Proceedings of the 51st ACM Technical Symposium on Computer Science Education</source> (<publisher-loc>Portland OR</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>912</fpage>&#x02013;<lpage>918</lpage>. <pub-id pub-id-type="doi">10.1145/3328778.3366849</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bellettini</surname> <given-names>C.</given-names></name> <name><surname>Lonati</surname> <given-names>V.</given-names></name> <name><surname>Malchiodi</surname> <given-names>D.</given-names></name> <name><surname>Monga</surname> <given-names>M.</given-names></name> <name><surname>Morpurgo</surname> <given-names>A.</given-names></name> <name><surname>Torelli</surname> <given-names>M.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;How challenging are bebras tasks? An IRT analysis based on the performance of Italian students,&#x0201D;</article-title> in <source>Proceedings of the 2015 ACM Conference on Innovation and Technology in Computer Science Education, ITiCSE &#x00027;15</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>27</fpage>&#x02013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1145/2729094.2742603</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Biggs</surname> <given-names>J.</given-names></name></person-group> (<year>1996</year>). <article-title>Enhancing teaching through constructive alignment</article-title>. <source>High. Educ.</source> <volume>32</volume>, <fpage>347</fpage>&#x02013;<lpage>364</lpage>. <pub-id pub-id-type="doi">10.1007/BF00138871</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bland</surname> <given-names>J. M.</given-names></name> <name><surname>Altman</surname> <given-names>D. G.</given-names></name></person-group> (<year>1997</year>). <article-title>Statistics notes: Cronbach&#x00027;s alpha</article-title>. <source>BMJ</source> <volume>314</volume>:<fpage>572</fpage>. <pub-id pub-id-type="doi">10.1136/bmj.314.7080.572</pub-id><pub-id pub-id-type="pmid">9055718</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brennan</surname> <given-names>K.</given-names></name> <name><surname>Resnick</surname> <given-names>M.</given-names></name></person-group> (<year>2012</year>). <article-title>&#x0201C;New frameworks for studying and assessing the development of computational thinking,&#x0201D;</article-title> in <source>Proceedings of the 2012 Annual Meeting of the American Educational Research Association</source>, Vol. 1 (Vancouver) p. 25.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chae</surname> <given-names>Y.-M.</given-names></name> <name><surname>Park</surname> <given-names>S. G.</given-names></name> <name><surname>Park</surname> <given-names>I.</given-names></name></person-group> (<year>2019</year>). <article-title>The relationship between classical item characteristics and item response time on computer-based testing</article-title>. <source>Korean J. Med. Educ.</source> <volume>31</volume>, <fpage>1</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.3946/kjme.2019.113</pub-id><pub-id pub-id-type="pmid">30852856</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chalmers</surname> <given-names>R. P.</given-names></name></person-group> (<year>2012</year>). <article-title>mirt: A multidimensional item response theory package for the R environment</article-title>. <source>J. Stat. Softw.</source> <volume>48</volume>, <fpage>1</fpage>&#x02013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v048.i06</pub-id></citation>
</ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>F.</given-names></name> <name><surname>Curran</surname> <given-names>P. J.</given-names></name> <name><surname>Bollen</surname> <given-names>K. A.</given-names></name> <name><surname>Kirby</surname> <given-names>J.</given-names></name> <name><surname>Paxton</surname> <given-names>P.</given-names></name></person-group> (<year>2008</year>). <article-title>An empirical evaluation of the use of fixed cutoff points in RMSEA test statistic in structural equation models</article-title>. <source>Sociol. Methods Res.</source> <volume>36</volume>, <fpage>462</fpage>&#x02013;<lpage>494</lpage>. <pub-id pub-id-type="doi">10.1177/0049124108314720</pub-id><pub-id pub-id-type="pmid">19756246</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>G.</given-names></name> <name><surname>Shen</surname> <given-names>J.</given-names></name> <name><surname>Barth-Cohen</surname> <given-names>L.</given-names></name> <name><surname>Jiang</surname> <given-names>S.</given-names></name> <name><surname>Huang</surname> <given-names>X.</given-names></name> <name><surname>Eltoukhy</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Assessing elementary students? computational thinking in everyday reasoning and robotics programming</article-title>. <source>Comput. Educ.</source> <volume>109</volume>, <fpage>162</fpage>&#x02013;<lpage>175</lpage>. <pub-id pub-id-type="doi">10.1016/j.compedu.2017.03.001</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chevalier</surname> <given-names>M.</given-names></name> <name><surname>Giang</surname> <given-names>C.</given-names></name> <name><surname>El-Hamamsy</surname> <given-names>L.</given-names></name> <name><surname>Bonnet</surname> <given-names>E.</given-names></name> <name><surname>Papaspyros</surname> <given-names>V.</given-names></name> <name><surname>Pellet</surname> <given-names>J.-P.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>The role of feedback and guidance as intervention methods to foster computational thinking in educational robotics learning activities for primary school</article-title>. <source>Comput. Educ.</source> <volume>180</volume>:<fpage>104431</fpage>. <pub-id pub-id-type="doi">10.1016/j.compedu.2022.104431</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chevalier</surname> <given-names>M.</given-names></name> <name><surname>Giang</surname> <given-names>C.</given-names></name> <name><surname>Piatti</surname> <given-names>A.</given-names></name> <name><surname>Mondada</surname> <given-names>F.</given-names></name></person-group> (<year>2020</year>). <article-title>Fostering computational thinking through educational robotics: a model for creative computational problem solving</article-title>. <source>Int. J. STEM Educ.</source> 7:<volume>39</volume>, <fpage>1</fpage>&#x02013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1186/s40594-020-00238-z</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Christensen</surname> <given-names>K. B.</given-names></name> <name><surname>Makransky</surname> <given-names>G.</given-names></name> <name><surname>Horton</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Critical values for Yen&#x00027;s <italic>Q</italic> <sub>3</sub> : identification of local dependence in the Rasch model using residual correlations</article-title>. <source>Appl. Psychol. Meas.</source> <volume>41</volume>, <fpage>178</fpage>&#x02013;<lpage>194</lpage>. <pub-id pub-id-type="doi">10.1177/0146621616677520</pub-id><pub-id pub-id-type="pmid">29881087</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Clarke-Midura</surname> <given-names>J.</given-names></name> <name><surname>Silvis</surname> <given-names>D.</given-names></name> <name><surname>Shumway</surname> <given-names>J. F.</given-names></name> <name><surname>Lee</surname> <given-names>V. R.</given-names></name> <name><surname>Kozlowski</surname> <given-names>J. S.</given-names></name></person-group> (<year>2021</year>). <article-title>Developing a kindergarden computational thinking assessment using evidence-centered design: the case of algorithmic thinking</article-title>. <source>Comput. Sci. Educ.</source> <volume>31</volume>, <fpage>117</fpage>&#x02013;<lpage>140</lpage>. <pub-id pub-id-type="doi">10.1080/08993408.2021.1877988</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cock</surname> <given-names>J.</given-names></name> <name><surname>Marras</surname> <given-names>M.</given-names></name> <name><surname>Giang</surname> <given-names>C.</given-names></name> <name><surname>Kaser</surname> <given-names>T.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Early prediction of conceptual understanding in interactive simulations,&#x0201D;</article-title> in <source>Proceedings of The 14th International Conference on Educational Data Mining (EDM21)</source> (<publisher-loc>Paris</publisher-loc>), <fpage>161</fpage>&#x02013;<lpage>171</lpage>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cowan</surname> <given-names>N.</given-names></name></person-group> (<year>2016</year>). <article-title>Working memory maturation: can we get at the essence of cognitive growth?</article-title> <source>Perspect. Psychol. Sci.</source> <volume>11</volume>, <fpage>239</fpage>&#x02013;<lpage>264</lpage>. <pub-id pub-id-type="doi">10.1177/1745691615621279</pub-id><pub-id pub-id-type="pmid">26993277</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dai</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Jian</surname> <given-names>X.</given-names></name></person-group> (<year>2020</year>). <article-title>Comparison of trust assessment scales based on item response theory</article-title>. <source>Front. Psychol.</source> <volume>11</volume>:<fpage>10</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2020.00010</pub-id><pub-id pub-id-type="pmid">32038438</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dai</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Svetina</surname> <given-names>D.</given-names></name></person-group> (<year>2022</year>). <source>subscore: Computing Subscores in Classical Test Theory and Item Response Theory</source>. R Package Version 3.3.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Ayala</surname> <given-names>R. J.</given-names></name> <name><surname>Little</surname> <given-names>T. D.</given-names></name></person-group> (<year>2022</year>). <source>The Theory and Practice of Item Response Theory, 2nd Edn.</source> <publisher-loc>Methodology in the Social Sciences. New York, NY</publisher-loc>: <publisher-name>Guilford Press</publisher-name>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Champlain</surname> <given-names>A. F.</given-names></name></person-group> (<year>2010</year>). <article-title>A primer on classical test theory and item response theory for assessments in medical education</article-title>. <source>Med. Educ.</source> <volume>44</volume>, <fpage>109</fpage>&#x02013;<lpage>117</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-2923.2009.03425.x</pub-id><pub-id pub-id-type="pmid">20078762</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Denning</surname> <given-names>P. J.</given-names></name> <name><surname>Tedre</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Computational thinking: a disciplinary perspective</article-title>. <source>Inform. Educ.</source> <volume>20</volume>, <fpage>361</fpage>&#x02013;<lpage>390</lpage>. <pub-id pub-id-type="doi">10.15388/infedu.2021.21</pub-id></citation>
</ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>DeVellis</surname> <given-names>R. F.</given-names></name></person-group> (<year>2006</year>). <article-title>Classical test theory</article-title>. <source>Med. Care</source> <volume>44</volume>, <fpage>S50</fpage>&#x02013;<lpage>S59</lpage>. <pub-id pub-id-type="doi">10.1097/01.mlr.0000245426.10853.30</pub-id><pub-id pub-id-type="pmid">17060836</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>El-Hamamsy</surname> <given-names>L.</given-names></name> <name><surname>Bruno</surname> <given-names>B.</given-names></name> <name><surname>Chessel-Lazzarotto</surname> <given-names>F.</given-names></name> <name><surname>Chevalier</surname> <given-names>M.</given-names></name> <name><surname>Roy</surname> <given-names>D.</given-names></name> <name><surname>Zufferey</surname> <given-names>J. D.</given-names></name> <etal/></person-group>. (<year>2021a</year>). <article-title>The symbiotic relationship between educational robotics and computer science in formal education</article-title>. <source>Educ. Inform. Technol.</source> <volume>26</volume>, <fpage>5077</fpage>&#x02013;<lpage>5107</lpage>. <pub-id pub-id-type="doi">10.1007/s10639-021-10494-3</pub-id><pub-id pub-id-type="pmid">33841027</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>El-Hamamsy</surname> <given-names>L.</given-names></name> <name><surname>Chessel-Lazzarotto</surname> <given-names>F.</given-names></name> <name><surname>Bruno</surname> <given-names>B.</given-names></name> <name><surname>Roy</surname> <given-names>D.</given-names></name> <name><surname>Cahlikova</surname> <given-names>T.</given-names></name> <name><surname>Chevalier</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2021b</year>). <article-title>A computer science and robotics integration model for primary school: evaluation of a large-scale in-service K-4 teacher-training program</article-title>. <source>Educ. Inform. Technol.</source> <volume>26</volume>, <fpage>2445</fpage>&#x02013;<lpage>2475</lpage>. <pub-id pub-id-type="doi">10.1007/s10639-020-10355-5</pub-id><pub-id pub-id-type="pmid">33162777</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>El-Hamamsy</surname> <given-names>L.</given-names></name> <name><surname>Zapata-C&#x000E1;ceres</surname> <given-names>M.</given-names></name> <name><surname>Barroso</surname> <given-names>E. M.</given-names></name> <name><surname>Mondada</surname> <given-names>F.</given-names></name> <name><surname>Zufferey</surname> <given-names>J. D.</given-names></name> <etal/></person-group>. (<year>2022a</year>). <article-title>The competent computational thinking test: Development and validation of an unplugged computational thinking test for upper primary school</article-title>. <source>J. Educ. Comput. Res.</source> <volume>60</volume>:<fpage>07356331221081753</fpage>. <pub-id pub-id-type="doi">10.1177/07356331221081753</pub-id></citation>
</ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>El-Hamamsy</surname> <given-names>L.</given-names></name> <name><surname>Zapata-C&#x000E1;ceres</surname> <given-names>M.</given-names></name> <name><surname>Marcelino</surname> <given-names>P.</given-names></name> <name><surname>Zufferey</surname> <given-names>J. D.</given-names></name> <name><surname>Bruno</surname> <given-names>B.</given-names></name> <name><surname>Barroso</surname> <given-names>E. M.</given-names></name> <etal/></person-group>. (<year>2022b</year>). <article-title>Dataset for the comparison of two Computational Thinking (CT) test for upper primary school (grades 3-4) : the Beginners&#x00027; CT test (BCTt) and the competent CT test (cCTt)</article-title>. <source>Zenodo</source>. <pub-id pub-id-type="doi">10.5281/zenodo.5885033</pub-id></citation>
</ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><collab>European Union</collab></person-group> (<year>2006</year>). <source>Recommendation of the European Parliament and of the Council of 18 December 2006 on Key Competences for Lifelong Learning.</source> Technical report.</citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gane</surname> <given-names>B. D.</given-names></name> <name><surname>Israel</surname> <given-names>M.</given-names></name> <name><surname>Elagha</surname> <given-names>N.</given-names></name> <name><surname>Yan</surname> <given-names>W.</given-names></name> <name><surname>Luo</surname> <given-names>F.</given-names></name> <name><surname>Pellegrino</surname> <given-names>J. W.</given-names></name></person-group> (<year>2021</year>). <article-title>Design and validation of learning trajectory-based assessments for computational thinking in upper elementary grades</article-title>. <source>Comput. Sci. Educ.</source> <volume>31</volume>, <fpage>141</fpage>&#x02013;<lpage>168</lpage>. <pub-id pub-id-type="doi">10.1080/08993408.2021.1874221</pub-id></citation>
</ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grover</surname> <given-names>S.</given-names></name> <name><surname>Pea</surname> <given-names>R.</given-names></name> <name><surname>Cooper</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). &#x0201C;&#x0201C;Systems of assessments&#x0201D; for deeper learning of computational thinking in k-12,&#x0201D; in <italic>Proceedings of the 2015 Annual Meeting of the American Educational Research Association</italic> (Chicago, IL), <fpage>15</fpage>&#x02013;<lpage>20</lpage>.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guggemos</surname> <given-names>J.</given-names></name> <name><surname>Seufert</surname> <given-names>S.</given-names></name></person-group> Rom&#x000E1;n-Gonz&#x000E1;lez, M. Computational thinking assessment - towards more vivid interpretations. <italic>Tech Know Learn</italic>. (<year>2022</year>). <pub-id pub-id-type="doi">10.1007/s10758-021-09587-2</pub-id></citation>
</ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hambleton</surname> <given-names>R. K.</given-names></name> <name><surname>Jones</surname> <given-names>R. W.</given-names></name></person-group> (<year>1993</year>). <article-title>Comparison of classical test theory and item response theory and their applications to test development</article-title>. <source>Educ. Meas. Issues Pract.</source> <volume>12</volume>, <fpage>38</fpage>&#x02013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1111/j.1745-3992.1993.tb00543.x</pub-id></citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hinton</surname> <given-names>P.</given-names></name> <name><surname>McMurray</surname> <given-names>I.</given-names></name> <name><surname>Brownlow</surname> <given-names>C.</given-names></name></person-group> (<year>2014</year>). <source>SPSS Explained</source>. London: Routledge. <pub-id pub-id-type="doi">10.4324/9781315797298</pub-id></citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>L.-T.</given-names></name> <name><surname>Bentler</surname> <given-names>P. M.</given-names></name></person-group> (<year>1999</year>). <article-title>Cutoff criteria for fit indexes in covariance structure analysis: conventional criteria versus new alternatives</article-title>. <source>Struct. Equat. Model. Multidiscipl. J.</source> <volume>6</volume>, <fpage>1</fpage>&#x02013;<lpage>55</lpage>. <pub-id pub-id-type="doi">10.1080/10705519909540118</pub-id></citation>
</ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hubwieser</surname> <given-names>P.</given-names></name> <name><surname>M&#x000FC;hling</surname> <given-names>A.</given-names></name></person-group> (<year>2014</year>). <article-title>&#x0201C;Playing PISA with bebras,&#x0201D;</article-title> in <source>Proceedings of the 9th Workshop in Primary and Secondary Computing Education</source>, WiPSCE &#x00027;14 (New York, NY: Association for Computing Machinery), <fpage>128</fpage>&#x02013;<lpage>129</lpage>. <pub-id pub-id-type="doi">10.1145/2670757.2670759</pub-id><pub-id pub-id-type="pmid">25712180</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jabrayilov</surname> <given-names>R.</given-names></name> <name><surname>Emons</surname> <given-names>W. H. M.</given-names></name> <name><surname>Sijtsma</surname> <given-names>K.</given-names></name></person-group> (<year>2016</year>). <article-title>Comparison of classical test theory and item response theory in individual change assessment</article-title>. <source>Appl. Psychol. Meas.</source> <volume>40</volume>, <fpage>559</fpage>&#x02013;<lpage>572</lpage>. <pub-id pub-id-type="doi">10.1177/0146621616664046</pub-id><pub-id pub-id-type="pmid">29881070</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kahraman</surname> <given-names>N.</given-names></name></person-group> (<year>2013</year>). <article-title>Unidimensional interpretations for multidimensional test items</article-title>. <source>J. Educ. Meas.</source> <volume>50</volume>, <fpage>227</fpage>&#x02013;<lpage>246</lpage>. <pub-id pub-id-type="doi">10.1111/jedm.12012</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>H.-Y.</given-names></name></person-group> (<year>2013</year>). <article-title>Statistical notes for clinical researchers: assessing normal distribution (2) using skewness and kurtosis</article-title>. <source>Restor. Dent. Endod.</source> <volume>38</volume>, <fpage>52</fpage>&#x02013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.5395/rde.2013.38.1.52</pub-id><pub-id pub-id-type="pmid">23495371</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Knapp</surname> <given-names>T. R.</given-names></name></person-group> (<year>2016</year>). <article-title>Why is the one-group pretest&#x02013;posttest design still used?</article-title> <source>Clin. Nurs. Res.</source> <volume>25</volume>, <fpage>467</fpage>&#x02013;<lpage>472</lpage>. <pub-id pub-id-type="doi">10.1177/1054773816666280</pub-id><pub-id pub-id-type="pmid">27558917</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kong</surname> <given-names>S.-C.</given-names></name> <name><surname>Lai</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Validating a computational thinking concepts test for primary education using item response theory: an analysis of students? responses</article-title>. <source>Comput. Educ.</source> <volume>187</volume>:<fpage>104562</fpage>. <pub-id pub-id-type="doi">10.1016/j.chb.2017.01.005</pub-id></citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Korkmaz</surname> <given-names>z.</given-names></name> <name><surname>akir</surname> <given-names>R.</given-names></name> <name><surname>zden</surname> <given-names>M. Y.</given-names></name></person-group> (<year>2017</year>). <article-title>A validity and reliability study of the computational thinking scales (CTS)</article-title>. <source>Comput. Hum. Behav.</source> <volume>72</volume>, <fpage>558</fpage>&#x02013;<lpage>569</lpage>.</citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kyriazos</surname> <given-names>T. A.</given-names></name></person-group> (<year>2018</year>). <article-title>Applied psychometrics: writing-up a factor analysis construct validation study with examples</article-title>. <source>Psychology</source> <volume>9</volume>, <fpage>2503</fpage>&#x02013;<lpage>2530</lpage>. <pub-id pub-id-type="doi">10.4236/psych.2018.911144</pub-id></citation>
</ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lakens</surname> <given-names>D.</given-names></name></person-group> (<year>2013</year>). <article-title>Calculating and reporting effect sizes to facilitate cumulative science: a practical primer for <italic>t</italic>-tests and ANOVAs</article-title>. <source>Front. Psychol.</source> <volume>4</volume>:<fpage>863</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2013.00863</pub-id><pub-id pub-id-type="pmid">24324449</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Schoenfeld</surname> <given-names>A. H.</given-names></name> <name><surname>diSessa</surname> <given-names>A. A.</given-names></name> <name><surname>Graesser</surname> <given-names>A. C.</given-names></name> <name><surname>Benson</surname> <given-names>L. C.</given-names></name> <name><surname>English</surname> <given-names>L. D.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Computational thinking is more about thinking than computing</article-title>. <source>J. STEM Educ. Res.</source> <volume>3</volume>, <fpage>1</fpage>&#x02013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1007/s41979-020-00030-2</pub-id><pub-id pub-id-type="pmid">32838129</pub-id></citation></ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>S.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name></person-group> (<year>2021</year>). <article-title>Development and validation of computational thinking assessment of Chinese elementary school students</article-title>. <source>J. Pacific Rim Psychol.</source> <volume>15</volume>:<fpage>183449092110102</fpage>. <pub-id pub-id-type="doi">10.1177/18344909211010240</pub-id></citation>
</ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lye</surname> <given-names>S. Y.</given-names></name> <name><surname>Koh</surname> <given-names>J. H. L.</given-names></name></person-group> (<year>2014</year>). <article-title>Review on teaching and learning of computational thinking through programming: what is next for K-12?</article-title> <source>Comput. Hum. Behav.</source> <volume>41</volume>, <fpage>51</fpage>&#x02013;<lpage>61</lpage>. <pub-id pub-id-type="doi">10.1016/j.chb.2014.09.012</pub-id></citation>
</ref>
<ref id="B50">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Mannila</surname> <given-names>L.</given-names></name> <name><surname>Dagiene</surname> <given-names>V.</given-names></name> <name><surname>Demo</surname> <given-names>B.</given-names></name> <name><surname>Grgurina</surname> <given-names>N.</given-names></name> <name><surname>Mirolo</surname> <given-names>C.</given-names></name> <name><surname>Rolandsson</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>&#x0201C;Computational thinking in K-9 education,&#x0201D;</article-title> in <source>ITiCSE-WGR &#x00027;14: Proceedings of the Working Group Reports of the 2014 on Innovation &#x00026; Technology in Computer Science Education Conference</source> (<publisher-loc>Uppsala</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>29</lpage>, <pub-id pub-id-type="doi">10.1145/2713609.2713610</pub-id></citation>
</ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marais</surname> <given-names>I.</given-names></name></person-group> (<year>2012</year>). <article-title>&#x0201C;Local dependence,&#x0201D;</article-title> in <source>Rasch Models in Health</source>, eds K. B. Christensen, S. Kreiner, and M. Mesbah (Hoboken, NJ: John Wiley &#x00026; Sons, Ltd.), <fpage>111</fpage>&#x02013;<lpage>130</lpage>. <pub-id pub-id-type="doi">10.1002/9781118574454.ch7</pub-id></citation>
</ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Montuori</surname> <given-names>C.</given-names></name> <name><surname>Ronconi</surname> <given-names>L.</given-names></name> <name><surname>Vardanega</surname> <given-names>T.</given-names></name> <name><surname>Arf&#x000E9;</surname> <given-names>B.</given-names></name></person-group> (<year>2022</year>). <article-title>Exploring gender differences in coding at the beginning of primary school</article-title>. <source>Front. Psychol.</source> <volume>13</volume>:<fpage>887280</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2022.887280</pub-id><pub-id pub-id-type="pmid">36211854</pub-id></citation></ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moreno-Le&#x000F3;n</surname> <given-names>J.</given-names></name> <name><surname>Robles</surname> <given-names>G.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Dr. scratch: a web tool to automatically evaluate scratch projects,&#x0201D;</article-title> in <source>Proceedings of the Workshop in Primary and Secondary Computing Education, WiPSCE &#x00027;15</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>132</fpage>&#x02013;<lpage>133</lpage>. <pub-id pub-id-type="doi">10.1145/2818314.2818338</pub-id></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nasir</surname> <given-names>J.</given-names></name> <name><surname>Kothiyal</surname> <given-names>A.</given-names></name> <name><surname>Bruno</surname> <given-names>B.</given-names></name> <name><surname>Dillenbourg</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>Many are the ways to learn: identifying multi-modal behavioral profiles of collaborative learning in constructivist activities</article-title>. <source>Int. J. Comput.-Support. Collab. Learn.</source> <volume>16</volume>, <fpage>485</fpage>&#x02013;<lpage>523</lpage>. <pub-id pub-id-type="doi">10.1007/s11412-022-09368-8</pub-id></citation>
</ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ockey</surname> <given-names>G. J.</given-names></name> <name><surname>Choi</surname> <given-names>I.</given-names></name></person-group> (<year>2015</year>). <article-title>Structural equation modeling reporting practices for language assessment</article-title>. <source>Lang. Assess. Quart. Int. J.</source> <volume>12</volume>, <fpage>305</fpage>&#x02013;<lpage>319</lpage>. <pub-id pub-id-type="doi">10.1080/15434303.2015.1050101</pub-id></citation>
</ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Parker</surname> <given-names>M.</given-names></name> <name><surname>Kao</surname> <given-names>Y.</given-names></name> <name><surname>Saito-Stehberger</surname> <given-names>D.</given-names></name> <name><surname>Franklin</surname> <given-names>D.</given-names></name> <name><surname>Krause</surname> <given-names>S.</given-names></name> <name><surname>Richardson</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Development and preliminary validation of the assessment of computing for elementary students (ACES),&#x0201D;</article-title> in <source>SIGCSE &#x00027;21: Proceedings of the 52nd ACM Technical Symposium on Computer Science Education</source> (<publisher-loc>Virtual Event</publisher-loc>), <fpage>10</fpage>&#x02013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1145/3408877.3432376</pub-id></citation>
</ref>
<ref id="B57">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Piatti</surname> <given-names>A.</given-names></name> <name><surname>Adorni</surname> <given-names>G.</given-names></name> <name><surname>El-Hamamsy</surname> <given-names>L.</given-names></name> <name><surname>Negrini</surname> <given-names>L.</given-names></name> <name><surname>Assaf</surname> <given-names>D.</given-names></name> <name><surname>Gambardella</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>The CT-cube: a framework for the design and the assessment of computational thinking activities</article-title>. <source>Comput. Hum. Behav. Rep.</source> <volume>5</volume>:<fpage>100166</fpage>. <pub-id pub-id-type="doi">10.1016/j.chbr.2021.100166</pub-id></citation>
</ref>
<ref id="B58">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Prudon</surname> <given-names>P.</given-names></name></person-group> (<year>2015</year>). <article-title>Confirmatory factor analysis as a tool in research using questionnaires: a critique</article-title>. <source>Comprehens. Psychol.</source> <volume>4</volume>:<fpage>03</fpage>.CP.4.10. <pub-id pub-id-type="doi">10.2466/03.CP.4.10</pub-id></citation>
</ref>
<ref id="B59">
<citation citation-type="journal"><person-group person-group-type="author"><collab>R Core Team</collab></person-group> (<year>2019</year>). <source>R: A Language and Environment for Statistical Computing</source>. <publisher-loc>Vienna</publisher-loc>: <publisher-name>R Foundation for Statistical Computing</publisher-name>.</citation>
</ref>
<ref id="B60">
<citation citation-type="thesis"><person-group person-group-type="author"><name><surname>Rajlic</surname> <given-names>G.</given-names></name></person-group> (<year>2019</year>). <source>Violations of Unidimensionality and Local Independence in Measures Intended as Unidimensional: Assessing Levels of Violations and the Accuracy in Unidimensional IRT Model Estimates</source>. Ph.D. thesis, University of British Columbia.</citation>
</ref>
<ref id="B61">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Relkin</surname> <given-names>E.</given-names></name> <name><surname>Bers</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Techcheck-k: a measure of computational thinking for kindergarden children,&#x0201D;</article-title> in <source>2021 IEEE Global Engineering Education Conference (EDUCON)</source> (<publisher-loc>Vienna</publisher-loc>), <fpage>1696</fpage>&#x02013;<lpage>1702</lpage>. <pub-id pub-id-type="doi">10.1109/EDUCON46332.2021.9453926</pub-id><pub-id pub-id-type="pmid">27295638</pub-id></citation></ref>
<ref id="B62">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Relkin</surname> <given-names>E.</given-names></name> <name><surname>de Ruiter</surname> <given-names>L.</given-names></name> <name><surname>Bers</surname> <given-names>M. U.</given-names></name></person-group> (<year>2020</year>). <article-title>TechCheck: development and validation of an unplugged assessment of computational thinking in early childhood education</article-title>. <source>J. Sci. Educ. Technol.</source> <volume>29</volume>, <fpage>482</fpage>&#x02013;<lpage>498</lpage>. <pub-id pub-id-type="doi">10.1007/s10956-020-09831-x</pub-id></citation>
</ref>
<ref id="B63">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Revelle</surname> <given-names>W.</given-names></name></person-group> (<year>2021</year>). <source>psych: Procedures for Psychological, Psychometric, and Personality Research</source>. <publisher-loc>Evanston, IL</publisher-loc>: <publisher-name>Northwestern University</publisher-name>.</citation>
</ref>
<ref id="B64">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rom&#x000E1;n-Gonz&#x000E1;lez</surname> <given-names>M.</given-names></name> <name><surname>Moreno-Le&#x000F3;n</surname> <given-names>J.</given-names></name> <name><surname>Robles</surname> <given-names>G.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Combining assessment tools for a comprehensive evaluation of computational thinking interventions,&#x0201D;</article-title> in <source>Computational Thinking Education</source>, eds S.-C. Kong and H. Abelson (<publisher-loc>Singapore</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>79</fpage>&#x02013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1007/978-981-13-6528-7_6</pub-id></citation>
</ref>
<ref id="B65">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rom&#x000E1;n-Gonz&#x000E1;lez</surname> <given-names>M.</given-names></name> <name><surname>P&#x000E9;rez-Gonz&#x000E1;lez</surname> <given-names>J.-C.</given-names></name> <name><surname>Jim&#x000E9;nez-Fern&#x000E1;ndez</surname> <given-names>C.</given-names></name></person-group> (<year>2017</year>). <article-title>Which cognitive abilities underlie computational thinking? Criterion validity of the computational thinking test</article-title>. <source>Comput. Hum. Behav.</source> <volume>72</volume>, <fpage>678</fpage>&#x02013;<lpage>691</lpage>. <pub-id pub-id-type="doi">10.1016/j.chb.2016.08.047</pub-id></citation>
</ref>
<ref id="B66">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rom&#x000E1;n-Gonz&#x000E1;lez</surname> <given-names>M.</given-names></name> <name><surname>P&#x000E9;rez-Gonz&#x000E1;lez</surname> <given-names>J.-C.</given-names></name> <name><surname>Moreno-Le&#x000F3;n</surname> <given-names>J.</given-names></name> <name><surname>Robles</surname> <given-names>G.</given-names></name></person-group> (<year>2018</year>). <article-title>Can computational talent be detected? Predictive validity of the computational thinking test</article-title>. <source>Int. J. Child Comput. Interact.</source> <volume>18</volume>, <fpage>47</fpage>&#x02013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijcci.2018.06.004</pub-id></citation>
</ref>
<ref id="B67">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rosseel</surname> <given-names>Y.</given-names></name></person-group> (<year>2012</year>). <article-title>lavaan: an R package for structural equation modeling</article-title>. <source>J. Stat. Softw.</source> <volume>48</volume>, <fpage>1</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v048.i02</pub-id><pub-id pub-id-type="pmid">25601849</pub-id></citation></ref>
<ref id="B68">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rosseel</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <source>Structural Equation Modeling with lavaan [PowerPoint Slides].</source> Department of Data Analysis, Ghent University.</citation>
</ref>
<ref id="B69">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rowe</surname> <given-names>E.</given-names></name> <name><surname>Almeda</surname> <given-names>M. V.</given-names></name> <name><surname>Asbell-Clarke</surname> <given-names>J.</given-names></name> <name><surname>Scruggs</surname> <given-names>R.</given-names></name> <name><surname>Baker</surname> <given-names>R.</given-names></name> <name><surname>Bardar</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Assessing implicit computational thinking in Zoombinis puzzle gameplay</article-title>. <source>Comput. Hum. Behav</source> <volume>120</volume>:<fpage>106707</fpage>. <pub-id pub-id-type="doi">10.1016/j.chb.2021.106707</pub-id></citation>
</ref>
<ref id="B70">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sahin</surname> <given-names>A.</given-names></name> <name><surname>Anil</surname> <given-names>D.</given-names></name></person-group> (<year>2017</year>). <article-title>The effects of test length and sample size on item parameters in item response theory</article-title>. <source>Educ. Sci. Theory Pract.</source> <volume>17</volume>, <fpage>321</fpage>&#x02013;<lpage>335</lpage>. <pub-id pub-id-type="doi">10.12738/estp.2017.1.0270</pub-id></citation>
</ref>
<ref id="B71">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schweizer</surname> <given-names>K.</given-names></name> <name><surname>Ren</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>T.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;A comparison of confirmatory factor analysis of binary data on the basis of tetrachoric correlations and of probability-based covariances: a simulation study,&#x0201D;</article-title> in <source>Quantitative Psychology Research, Springer Proceedings in Mathematics &#x00026; Statistics</source>, eds R. E. Millsap, D. M. Bolt, L. A. van der Ark, and W.-C. Wang, (Cham: Springer International Publishing), <fpage>273</fpage>&#x02013;<lpage>292</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-07503-7_17</pub-id></citation>
</ref>
<ref id="B72">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Selby</surname> <given-names>C.</given-names></name> <name><surname>Woollard</surname> <given-names>J.</given-names></name></person-group> (<year>2013</year>). <article-title>&#x0201C;Computational thinking: the developing definition,&#x0201D;</article-title> in <source>Special Interest Group on Computer Science Education (SIGCSE) 2014</source> (<publisher-loc>Atlanta, GA</publisher-loc>).</citation>
</ref>
<ref id="B73">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Taherdoost</surname> <given-names>H.</given-names></name></person-group> (<year>2016</year>). <article-title>Validity and reliability of the research instrument; how to test the validation of a questionnaire/survey in a research</article-title>. <source>SSRN Electr. J.</source> <volume>5</volume>, <fpage>28</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.2139/ssrn.3205040</pub-id></citation>
</ref>
<ref id="B74">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname> <given-names>X.</given-names></name> <name><surname>Yin</surname> <given-names>Y.</given-names></name> <name><surname>Lin</surname> <given-names>Q.</given-names></name> <name><surname>Hadad</surname> <given-names>R.</given-names></name> <name><surname>Zhai</surname> <given-names>X.</given-names></name></person-group> (<year>2020</year>). <article-title>Assessing computational thinking: a systematic review of empirical studies</article-title>. <source>Comput. Educ.</source> <volume>148</volume>:<fpage>103798</fpage>. <pub-id pub-id-type="doi">10.1016/j.compedu.2019.103798</pub-id><pub-id pub-id-type="pmid">35121855</pub-id></citation></ref>
<ref id="B75">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tikva</surname> <given-names>C.</given-names></name> <name><surname>Tambouris</surname> <given-names>E.</given-names></name></person-group> (<year>2021</year>). <article-title>Mapping computational thinking through programming in K-12 education: a conceptual model based on a systematic literature review</article-title>. <source>Comput. Educ.</source> <volume>162</volume>:<fpage>104083</fpage>. <pub-id pub-id-type="doi">10.1016/j.compedu.2020.104083</pub-id></citation>
</ref>
<ref id="B76">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tsarava</surname> <given-names>K.</given-names></name> <name><surname>Moeller</surname> <given-names>K.</given-names></name> <name><surname>Rom&#x000E1;n-Gonz&#x000E1;lez</surname> <given-names>M.</given-names></name> <name><surname>Golle</surname> <given-names>J.</given-names></name> <name><surname>Leifheit</surname> <given-names>L.</given-names></name> <name><surname>Butz</surname> <given-names>M. V.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>A cognitive definition of computational thinking in primary education</article-title>. <source>Comput. Educ.</source> <volume>179</volume>:<fpage>104425</fpage>. <pub-id pub-id-type="doi">10.1016/j.compedu.2021.104425</pub-id></citation>
</ref>
<ref id="B77">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Varma</surname> <given-names>S.</given-names></name></person-group> (<year>2006</year>). <source>Preliminary Item Statistics Using Point-Biserial Correlation and p-Values</source>. <publisher-loc>Morgan Hill, CA</publisher-loc>: <publisher-name>Educational Data Systems Inc</publisher-name>.</citation>
</ref>
<ref id="B78">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vincent</surname> <given-names>W.</given-names></name> <name><surname>Shanmugam</surname> <given-names>S. K. S.</given-names></name></person-group> (<year>2020</year>). <article-title>The role of classical test theory to determine the quality of classroom teaching test items</article-title>. <source>Pedagog. J. Pendid.</source> <volume>9</volume>, <fpage>5</fpage>&#x02013;<lpage>34</lpage>. <pub-id pub-id-type="doi">10.21070/pedagogia.v9i1.123</pub-id></citation>
</ref>
<ref id="B79">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wainer</surname> <given-names>H.</given-names></name> <name><surname>Thissen</surname> <given-names>D.</given-names></name></person-group> (<year>2001</year>). <article-title>&#x0201C;True score theory: The traditional method,&#x0201D;</article-title> in <source>Test Scoring</source>, eds D. Thissen and H. Wainer (Mahwah, NJ: Lawrence Erlbaum Associates), <fpage>35</fpage>&#x02013;<lpage>84</lpage>.</citation>
</ref>
<ref id="B80">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weintrop</surname> <given-names>D.</given-names></name></person-group> (<year>2016</year>). <article-title>Defining computational thinking for mathematics and science classrooms</article-title>. <source>J. Sci. Educ. Technol.</source> <volume>25</volume>, <fpage>127</fpage>&#x02013;<lpage>147</lpage>. <pub-id pub-id-type="doi">10.1007/S10956-015-9581-5</pub-id></citation>
</ref>
<ref id="B81">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weintrop</surname> <given-names>D.</given-names></name> <name><surname>Rutstein</surname> <given-names>D.</given-names></name> <name><surname>Bienkowski</surname> <given-names>M.</given-names></name> <name><surname>McGee</surname> <given-names>S.</given-names></name></person-group> (<year>2021a</year>). <article-title>&#x0201C;Assessment of computational thinking,&#x0201D;</article-title> in <source>Computational Thinking in Education, 1st Edn.</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Routledge</publisher-name>), <fpage>90</fpage>&#x02013;<lpage>111</lpage>. <pub-id pub-id-type="doi">10.4324/9781003102991-6</pub-id></citation>
</ref>
<ref id="B82">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weintrop</surname> <given-names>D.</given-names></name> <name><surname>Wise Rutstein</surname> <given-names>D.</given-names></name> <name><surname>Bienkowski</surname> <given-names>M.</given-names></name> <name><surname>McGee</surname> <given-names>S.</given-names></name></person-group> (<year>2021b</year>). <article-title>Assessing computational thinking: an overview of the field</article-title>. <source>Comput. Sci. Educ.</source> <volume>31</volume>, <fpage>113</fpage>&#x02013;<lpage>116</lpage>. <pub-id pub-id-type="doi">10.1080/08993408.2021.1918380</pub-id><pub-id pub-id-type="pmid">33016456</pub-id></citation></ref>
<ref id="B83">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Werner</surname> <given-names>L.</given-names></name> <name><surname>Denner</surname> <given-names>J.</given-names></name> <name><surname>Campe</surname> <given-names>S.</given-names></name> <name><surname>Kawamoto</surname> <given-names>D. C.</given-names></name></person-group> (<year>2012</year>). <article-title>&#x0201C;The fairy performance assessment: measuring computational thinking in middle school,&#x0201D;</article-title> in <source>SIGCSE &#x00027;12: The 43rd ACM Technical Symposium on Computer Science Education</source> (<publisher-loc>Raleigh, NC</publisher-loc>), <fpage>215</fpage>&#x02013;<lpage>220</lpage>. <pub-id pub-id-type="doi">10.1145/2157136.2157200</pub-id></citation>
</ref>
<ref id="B84">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wheaton</surname> <given-names>B.</given-names></name> <name><surname>Muthen</surname> <given-names>B.</given-names></name> <name><surname>Alwin</surname> <given-names>D. F.</given-names></name> <name><surname>Summers</surname> <given-names>G. F.</given-names></name></person-group> (<year>1977</year>). <article-title>Assessing reliability and stability in panel models</article-title>. <source>Sociol. Methodol.</source> <volume>8</volume>, <fpage>84</fpage>&#x02013;<lpage>136</lpage>. <pub-id pub-id-type="doi">10.2307/270754</pub-id></citation>
</ref>
<ref id="B85">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Willse</surname> <given-names>J. T.</given-names></name></person-group> (<year>2018</year>). <source>CTT: Classical Test Theory Functions</source>. R Package Version 2.3.3</citation>
</ref>
<ref id="B86">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wing</surname> <given-names>J. M.</given-names></name></person-group> (<year>2006</year>). <article-title>Computational thinking</article-title>. <source>Commun. ACM</source> <volume>49</volume>, <fpage>33</fpage>&#x02013;<lpage>35</lpage>. <pub-id pub-id-type="doi">10.1145/1118178.1118215</pub-id></citation>
</ref>
<ref id="B87">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xia</surname> <given-names>Y.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name></person-group> (<year>2019</year>). <article-title>RMSEA, CFI, and TLI in structural equation modeling with ordered categorical data: the story they tell depends on the estimation methods</article-title>. <source>Behav. Res.</source> <volume>51</volume>, <fpage>409</fpage>&#x02013;<lpage>428</lpage>. <pub-id pub-id-type="doi">10.3758/s13428-018-1055-2</pub-id><pub-id pub-id-type="pmid">29869222</pub-id></citation></ref>
<ref id="B88">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xie</surname> <given-names>B.</given-names></name> <name><surname>Davidson</surname> <given-names>M. J.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Ko</surname> <given-names>A. J.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;An item response theory evaluation of a language-independent CS1 knowledge assessment,&#x0201D;</article-title> in <source>Proceedings of the 50th ACM Technical Symposium on Computer Science Education</source> (<publisher-loc>Minneapolis MN</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>699</fpage>&#x02013;<lpage>705</lpage>. <pub-id pub-id-type="doi">10.1145/3287324.3287370</pub-id></citation>
</ref>
<ref id="B89">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yagci</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>A valid and reliable tool for examining computational thinking skills</article-title>. <source>Educ. Inf. Technol.</source> <volume>24</volume>, <fpage>929</fpage>&#x02013;<lpage>951</lpage>. <pub-id pub-id-type="doi">10.1007/s10639-018-9801-8</pub-id></citation>
</ref>
<ref id="B90">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yen</surname> <given-names>W. M.</given-names></name></person-group> (<year>1984</year>). <article-title>Effects of local item dependence on the fit and equating performance of the three-parameter logistic model</article-title>. <source>Appl. Psychol. Meas.</source> <volume>8</volume>, <fpage>125</fpage>&#x02013;<lpage>145</lpage>. <pub-id pub-id-type="doi">10.1177/014662168400800201</pub-id></citation>
</ref>
<ref id="B91">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zapata-C&#x000E1;ceres</surname> <given-names>M.</given-names></name> <name><surname>Fanchamps</surname> <given-names>N.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Using the beginners computational thinking test to measure development on computational concepts among preschoolers,&#x0201D;</article-title> in <source>Proceedings of the 5th APSCE International Computational Thinking and STEM in Education Conference 2021</source> (<publisher-loc>Delft</publisher-loc>: <publisher-name>Asia-Pacific Society for Computers in Education</publisher-name>), <fpage>32</fpage>&#x02013;<lpage>37</lpage>.</citation>
</ref>
<ref id="B92">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zapata-C&#x000E1;ceres</surname> <given-names>M.</given-names></name> <name><surname>Mart&#x000ED;n-Barroso</surname> <given-names>E.</given-names></name></person-group> (<year>2021</year>). <article-title>Applying game learning analytics to a voluntary video game: intrinsic motivation, persistence, and rewards in learning to program at an early age</article-title>. <source>IEEE Access.</source> <volume>9</volume>, <fpage>123588</fpage>&#x02013;<lpage>123602</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3110475</pub-id></citation>
</ref>
<ref id="B93">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zapata-C&#x000E1;ceres</surname> <given-names>M.</given-names></name> <name><surname>Mart&#x000ED;n-Barroso</surname> <given-names>E.</given-names></name> <name><surname>Rom&#x000E1;n-Gonz&#x000E1;lez</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Computational thinking test for beginners: design and content validation,&#x0201D;</article-title> in <source>2020 IEEE Global Engineering Education Conference (EDUCON)</source> (<publisher-loc>Porto</publisher-loc>), <fpage>1905</fpage>&#x02013;<lpage>1914</lpage>. <pub-id pub-id-type="doi">10.1109/EDUCON45650.2020.9125368</pub-id><pub-id pub-id-type="pmid">27295638</pub-id></citation></ref>
</ref-list>


</back>
</article> 