<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="methods-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2025.1506320</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Psychology</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A multidimensional Bayesian IRT method for discovering misconceptions from concept test data</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Segado</surname> <given-names>Martin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2858796/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Adair</surname> <given-names>Aaron</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2860392/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Stewart</surname> <given-names>John</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1331164/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Ma</surname> <given-names>Yunfei</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Drury</surname> <given-names>Byron</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Pritchard</surname> <given-names>David</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2318739/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Mechanical Engineering, Massachusetts Institute of Technology</institution>, <addr-line>Cambridge, MA</addr-line>, <country>United States</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Physics, Massachusetts Institute of Technology</institution>, <addr-line>Cambridge, MA</addr-line>, <country>United States</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Physics and Astronomy, West Virginia University</institution>, <addr-line>Morgantown, WV</addr-line>, <country>United States</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Electrical Engineering and Computer Science, Massachusetts Institute of Technology</institution>, <addr-line>Cambridge, MA</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Georgios Sideridis, Harvard Medical School, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Jason Morphew, Purdue University, United States</p>
<p>Trevor Smith, Rowan University, United States</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Martin Segado <email>msegado&#x00040;mit.edu</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>29</day>
<month>01</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1506320</elocation-id>
<history>
<date date-type="received">
<day>04</day>
<month>10</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>06</day>
<month>01</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Segado, Adair, Stewart, Ma, Drury and Pritchard.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Segado, Adair, Stewart, Ma, Drury and Pritchard</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>We present an exploratory method for discovering likely misconceptions from multiple-choice concept test data, as well as preliminary evidence that this method recovers known misconceptions from real student responses. Our procedure is based on a Bayesian implementation of the Multidimensional Nominal Categories IRT model (MNCM) combined with standard factor-analytic rotation methods; by analyzing student responses at the level of individual distractors rather than at the level of entire questions, this approach is able to highlight multiple likely misconceptions for subsequent investigation without requiring any manual labeling of test content. We explore the performance of the Bayesian MNCM on synthetic data and find that it is able to recover multidimensional item parameters consistently at achievable sample sizes. These studies demonstrate the method&#x00027;s robustness to overfitting and ability to perform automatic dimensionality assessment and selection. The method also compares favorably to existing IRT software implementing marginal maximum likelihood estimation which we use as a validation benchmark. We then apply our method to approximately 10,000 students&#x00027; responses to a research-designed concept test: the Force Concept Inventory. In addition to a broad first dimension strongly correlated with overall test score, we discover thirteen additional dimensions which load on smaller sets of distractors; we discuss two as examples, showing that these are consistent with already-known misconceptions in Newtonian mechanics. While work remains to validate our findings, our hope is that future applications of this method could aid in the refinement of existing concept inventories or the development of new ones, enable the discovery of previously-unknown student misconceptions across a variety of disciplines, and&#x02014;by leveraging the method&#x00027;s ability to quantify the prevalence of particular misconceptions&#x02014;provide opportunities for targeted instruction at both the individual and classroom level.</p></abstract>
<kwd-group>
<kwd>item response theory</kwd>
<kwd>student misconceptions</kwd>
<kwd>multiple-choice questions</kwd>
<kwd>distractor analysis</kwd>
<kwd>multidimensional nominal categories model</kwd>
<kwd>mean-field variational inference</kwd>
<kwd>hierarchical priors</kwd>
</kwd-group>
<counts>
<fig-count count="4"/>
<table-count count="3"/>
<equation-count count="13"/>
<ref-count count="30"/>
<page-count count="15"/>
<word-count count="11838"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Quantitative Psychology and Measurement</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Research-designed multiple-choice concept tests commonly have wrong answer choices (&#x0201C;distractors&#x0201D;) that reflect typical incorrect student responses; these are often discerned from research or by first administering items in open-response format and identifying commonalities in the answers. Earlier work with P&#x000E9;rez-Lemonche et al. (<xref ref-type="bibr" rid="B18">2019</xref>) showed that even students whose raw score was below chance strongly favored particular incorrect responses on such tests, implying that systematic mental processes rather than random guessing underlie the selection of distractors. Various theories of these mental processes have been proposed, including knowledge in fragments (diSessa, <xref ref-type="bibr" rid="B10">1993</xref>), ontological categories (Chi and Slotta, <xref ref-type="bibr" rid="B9">1993</xref>), mixed ontological categories or models (Adair, <xref ref-type="bibr" rid="B1">2013</xref>), dual process theories (Gette et al., <xref ref-type="bibr" rid="B11">2018</xref>), misconceptions (a better definition is &#x0201C;an alternative hypothesis [to the current paradigm]&#x0201D;), and specific misunderstandings (e.g., &#x0201C;cannot interpret graphs&#x0201D; or inconsistent errors in applying Newton&#x00027;s 3rd law). The present work is based on the misconception/misunderstanding viewpoint, wherein common wrong answers&#x02014;and the resulting research-determined distractors&#x02014;often result from and thus encode common student misconceptions or misunderstandings.</p>
<p>When a research-designed test is administered, student misconceptions (whether already known to the researchers or not) manifest as an increased likelihood of a student co-selecting sets of distractors consistent with their (mis)understandings of the domain. The misconceptions may present in differing degrees for different students, with stronger misconceptions leading students to endorse a higher fraction of those distractors consistent with their incorrect belief. Notably, distractors in any one item may reflect distinct misconceptions, either alone or in combination&#x02014;in a physics question involving a skydiver for example, one distractor might reflect a common misunderstanding about acceleration, another might encode an incorrect mental model of air resistance, and a third might combine elements of both. That is, <italic>misconceptions are encoded at the level of individual response categories, and less at the level of whole items</italic>. Thus discovering them requires an analysis capable of capturing multidimensionality within categories, not only within items.</p>
<p>In this work, we present an exploratory analysis procedure for discovering the types of misconceptions discussed above. Our methods are most appropriate for research-designed concept tests with distractors based on common wrong answers, but they may also prove suitable for examining multiple-choice assessments developed using other research-based approaches (such as think-aloud protocols). Longitudinal studies using such instruments may yield insights about how student reasoning manifests and develops over time <italic>in situ</italic>, providing clues about how various mental models emerge (Brown, <xref ref-type="bibr" rid="B6">2014</xref>) and ultimately about how people transition from novices to experts (Burkholder et al., <xref ref-type="bibr" rid="B7">2020</xref>).</p>
<p>We base our approach on a flexible multidimensional IRT model for multiple-choice data known as the Multidimensional Nominal Categories Model (MNCM, discussed in more detail in Section 3). In contrast with unidimensional IRT&#x02014;which ranks testees on a single monotonic scale corresponding to ability or some other psychological trait of interest&#x02014;multidimensional models such as the MNCM rank testees along <italic>several</italic> distinct dimensions, each capturing different aspects of the interaction between the latent mindsets of the testees and the constructs of the test. A combination of parameter constraints and standard factor-analytic rotation methods then aids in finding a representation of these dimensions that allows insightful interpretation. This approach is, in essence, a form of item factor analysis (Bock et al., <xref ref-type="bibr" rid="B5">1988</xref>), though it is perhaps better understood as &#x0201C;category factor analysis&#x0201D; given the flexibility of the MNCM to capture within-category multidimensionality.</p>
<p>While our methodological choices were made with an eye to identifying student misconceptions from concept-test data, we note that the method itself is agnostic to the meaning of any traits it discovers. Some of these may indeed be misconceptions&#x02014;and we have seen preliminary evidence that many are&#x02014;but others might represent misunderstandings of the questions themselves or even factors outside the intended scope of the test (such as graphical literacy on a test of Newtonian physics). Ultimately, any discovered traits will require interpretation and eventually validation, and in this sense we view the method as exploratory and complementary to other modes of research (on misconceptions or otherwise).</p>
<p>This paper begins with a brief introduction to a classic IRT model for multiple-choice questions&#x02014;the Nominal Categories Model (Bock, <xref ref-type="bibr" rid="B4">1972</xref>)&#x02014;followed by the MNCM which can be understood as one of its most general multidimensional extensions. We then present a Bayesian implementation of the MNCM based on a variational inference approach with hierarchical priors, which we find to be robust to small sample sizes while not requiring careful tuning of item prior widths to match the dataset. Using simulated data, we validate this implementation in its unidimensional limit against existing open-source software implementing the marginal maximum likelihood method for nominal responses. We also study how the number and quality of recoverable dimensions depends on the sample size and the strengths of the item-testee interactions in each dimension, and discuss an emergent dimensionality self-selection property of the method.</p>
<p>We then present some preliminary results from the application of this procedure to &#x0007E;10,000 students&#x00027; responses to the Force Concept Inventory (FCI), the original research-designed multiple-choice concept test in STEM (Hestenes et al., <xref ref-type="bibr" rid="B14">1992</xref>). The Bayesian MNCM method extracts 14 dimensions from these data, and we choose an exploratory bi-factor rotation method (which promotes sparse loadings in all but one dimension) to yield interpretable results. In addition to identifying a prominent general dimension highly correlated with the raw test score, we find that some of the sparse dimensions are identifiable misconceptions familiar from the literature on student misconceptions in introductory Newtonian mechanics. Two illustrative examples are discussed in this work, with a more comprehensive analysis reserved for a forthcoming paper (in preparation). While additional research is needed to establish the broader validity of these findings, they nevertheless serve as a promising indicator of the value of our method.</p>
<p>We conclude by stressing the usefulness of methods such as ours for both formative assessment and research. Although the techniques of our approach are solidly within the purview of IRT, our application of these to study details of not-knowing within a domain is relatively novel in a discipline traditionally devoted to measuring or certifying knowing within a domain. Future iterations of these methods could provide guidance to teachers (by informing them of the particularly severe misconceptions of their students) or help researchers design and improve other assessments, especially where student misconceptions are not so well studied as in mechanics. Our work invites further exploration of the similarities of misconceptions across different universities or skill levels of the students and application to pre- and post-test data to reveal the effects of instruction and the changes in student thinking it might catalyze.</p></sec>
<sec id="s2">
<title>2 The nominal categories model</title>
<p>On a multiple-choice test, each question (&#x0201C;item&#x0201D; in IRT parlance) contains a limited set of possible response alternatives (&#x0201C;categories&#x0201D;), of which testees may choose only one. In some cases, these categories have an inherent ordering; an assessment of anxiety symptoms may use categories ranging from &#x0201C;never describes me&#x0201D; to &#x0201C;always describes me,&#x0201D; with various gradations in between. However, many multiple-choice tests contain items in which categories are qualitative with no inherent ordering, or in which an ordering may not be known <italic>a priori</italic>. Responses to such items only encode <italic>which</italic> category was selected&#x02014;without any associated ranking or quantitative value&#x02014;and are referred to as nominal (in contrast to ordinal) responses.</p>
<p>While nominal multiple-choice questions are sometimes graded as simply &#x0201C;correct&#x0201D; or &#x0201C;incorrect&#x0201D; in an educational context, doing so discards information about the items and testees conveyed by the specific distractors selected. This is true even in a unidimensional case where our concern is ranking students along a single ability scale: even here, some distractors may be &#x0201C;more wrong&#x0201D; than others.</p>
<p>An attractive alternative to dichotomous grading is to use a model specifically intended for nominal data. Perhaps the best known of these is the Nominal Categories Model (NCM; Bock, <xref ref-type="bibr" rid="B4">1972</xref>), also often called the Nominal Response Model in IRT literature and software. The NCM assigns each student a &#x0201C;response tendency&#x0201D; for each category in an item, with the probability of selecting a particular response category related to these by the multinomial logistic function (the exponent of the tendency divided by the sum of the exponents of the tendencies of all categories in the item). Mathematically, the probability that student <italic>s</italic> will select category <italic>c</italic> as their response <inline-formula><mml:math id="M1"><mml:msubsup><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> to item <italic>i</italic> is</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x02223;</mml:mo><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>t</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo class="qopname">exp</mml:mo><mml:msubsup><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mo class="qopname">exp</mml:mo><mml:msubsup><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M3"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>t</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>C</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:math></inline-formula> is a vector of the aforementioned response tendencies for the student and item and <italic>C</italic> is the number of categories. For the NCM, these tendencies are given by a linear functions of some latent student ability <italic>&#x003B8;</italic><sub><italic>s</italic></sub>,</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>a</italic><sup>(<italic>i,c</italic>)</sup> is a slope parameter for category <italic>c</italic> of item <italic>i</italic>, and <italic>b</italic><sup>(<italic>i,c</italic>)</sup> is an intercept parameter for the same.<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref></p>
<p>In addition to being substantially more flexible than a dichotomous model, the NCM also has a plausible psychological interpretation which makes it well suited to our use case: it may be understood as approximating a comparative choice process in which each student assigns some (unobserved) preferences to all response categories in an item, then chooses the category for which their preference is greatest.<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref> An excellent exposition of this topic is presented by Thissen et al. (<xref ref-type="bibr" rid="B26">2010</xref>, p. 49&#x02013;50 &#x00026; 66&#x02013;70). This understanding forms the basis for the theoretical explainability of the model parameters in <xref ref-type="disp-formula" rid="E2">Equation 2</xref>.</p>
<p>The latent ability parameter <italic>&#x003B8;</italic><sub><italic>s</italic></sub> has a straightforward meaning which matches that of dichotomous IRT: it measures a student&#x00027;s overall skill level in the test domain relative to that of other students. The slope parameters, <italic>a</italic><sup>(<italic>i,c</italic>)</sup>, require more care to explain due to the non-linear nature of <xref ref-type="disp-formula" rid="E1">Equation 1</xref>, especially since the tendencies for <italic>all</italic> categories in an item are present in the denominator of the response probability (a form of normalization which is necessary to ensure that the probabilities sum to one across all possible responses). Despite the apparent similarity to a dichotomous two-parameter logistic (2PL) model in slope-intercept form, the coupling between category probabilities introduced by normalization means that the <italic>a</italic><sup>(<italic>i,c</italic>)</sup> terms in the NCM cannot be thought of as discriminations. Rather, they provide a relative measure of the association between latent ability and each of the response categories, and as such serve to indicate the empirical ordering of the categories in an item.</p>
<p>The intercept parameters, <italic>b</italic><sup>(<italic>i,c</italic>)</sup>, may be understood as a measure of how inherently attractive each category is to a student with ability <italic>&#x003B8;</italic><sub><italic>s</italic></sub> &#x0003D; 0; this corresponds to an &#x0201C;average student&#x0201D; under the typical (albeit arbitrary) IRT convention of fixing the population mean of the abilities to zero. Even if this convention is assumed, though, we must stress that &#x02018;attractiveness to the average student&#x00027; is <italic>not</italic> the same as &#x02018;average attractiveness to students,&#x00027; and that these two are not even guaranteed to be monotonically related. Consequently, we view these terms as somewhat less suited to direct interpretation compared to <italic>&#x003B8;</italic><sub><italic>s</italic></sub> and <italic>a</italic><sup>(<italic>i,c</italic>)</sup>.</p>
<p>Nevertheless, the intercept parameters contribute essential flexibility to the model. For items with three or more categories, the combination of the normalization step in <xref ref-type="disp-formula" rid="E1">Equation 1</xref> and the per-category intercepts in <xref ref-type="disp-formula" rid="E2">Equation 2</xref> permits the NCM to model category response curves with intermediate maxima&#x02014;that is, those having a response probability which peaks at some finite <italic>&#x003B8;</italic> and decays to zero in both limits as <italic>&#x003B8;</italic> &#x02192; &#x000B1;&#x0221E;. Such curves frequently occur in real test data (P&#x000E9;rez-Lemonche et al., <xref ref-type="bibr" rid="B18">2019</xref>; Stewart et al., <xref ref-type="bibr" rid="B23">2021</xref>), and are produced by the NCM whenever <italic>a</italic><sup>(<italic>i,c</italic>)</sup> for some category <italic>c</italic> lies between <italic>a</italic><sup>(<italic>i</italic>,<italic>c</italic></sup>&#x02032;) and <italic>a</italic><sup>(<italic>i</italic>,<italic>c</italic></sup>&#x02033;), where <italic>c</italic>&#x02032; and <italic>c</italic>&#x02033; are other categories in the same item. This is clearly not possible with dichotomous models such as the 2PL, in which all incorrect responses are lumped together and have a response probability which decreases monotonically with increasing ability.</p></sec>
<sec id="s3">
<title>3 The multidimensional nominal categories model</title>
<p>Since different distractors on research-validated instruments typically reflect different misconceptions, and since we want to interpret the different dimensions of the latent ability space in correspondence with different (mis)conceptions, we need a multidimensional model where different dimensions indicate different sets of wrong responses. Such an extension of IRT to <italic>D</italic> dimensions would allow each response category in an item to have a unique slope and intercept term for each dimension, thus allowing categories to have distinct directions in <italic>D</italic>-dimensional space. The tendencies in such a model would then become:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Having unique intercepts for each dimension is redundant however as these combine into a single constant in the tendency expression. Denoting this as <italic>b</italic><sup>(<italic>i,c</italic>)</sup> (with no subscript) yields the typical form of the Multidimensional Nominal Categories Model (MNCM) first introduced by Takane and de Leeuw (<xref ref-type="bibr" rid="B24">1987</xref>) in which the tendency for student <italic>s</italic> to give response <italic>c</italic> to question <italic>i</italic> is</p>
<disp-formula id="E4"><label>(4)</label><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Alternative parameterizations of the MNCM have also been proposed in more recent literature, most notably by Thissen et al. (<xref ref-type="bibr" rid="B26">2010</xref>). In place of the slope parameters, they use a product of a <italic>D</italic>-dimensional &#x0201C;overall discrimination&#x0201D; vector (which is shared across all categories in an item) with a set of <italic>C</italic> scalar &#x0201C;scoring function values&#x0201D; (shared across all dimensions) which dictate the relative ordering of the categories. Subsequent presentations also permit these scoring function values to be multidimensional (Thissen and Cai, <xref ref-type="bibr" rid="B25">2016</xref>). Such parameterizations have the advantage of providing an intuitive measure of overall item discrimination and direction, much like a multidimensional 2PL model. As our approach to identifying misconceptions relies on examining the relationships between individual distractors and each latent ability dimension, we will use the more traditional parameterization in <xref ref-type="disp-formula" rid="E4">Equation 4</xref> which provides more directly interpretable parameters for this application.</p>
<p>In summary, the MNCM has several important properties that make it well suited to the task of misconception analysis:</p>
<list list-type="bullet">
<list-item><p>It has a plausible psychological basis, providing a theoretical foundation for its use in understanding student though processes.</p></list-item>
<list-item><p>It is designed for nominal multiple-choice data. No inherent ordering is imposed on the response categories <italic>a-priori</italic>, and the predicted probabilities across possible responses to an item always sum to one, reflecting the constraint that students can select only one of the available choices.</p></list-item>
<list-item><p>It is multidimensional at the level of individual categories. Each distractor can have its own direction (vector) in the multi-dimensional ability space, such that different distractors in the same item may reflect different misconceptions.</p></list-item>
</list></sec>
<sec id="s4">
<title>4 The MNCM-Bayes method</title>
<p>Fitting the MNCM to real data is challenging due to the large number of free model parameters and the small fraction of response patterns that are ever observed (e.g., a 30-item multiple choice test like the FCI with five categories per item has approximately 10<sup>21</sup> distinct ways in which the questions can be answered). This can lead to large errors when using maximum-likelihood-based fitting methods as some parameters may have little data informing their estimates, especially with smaller sample sizes.</p>
<p>In order to better recover model parameters at small sample sizes, we take a Bayesian approach to fitting the MNCM as suggested by Revuelta and Xim&#x000E9;nez (<xref ref-type="bibr" rid="B21">2017</xref>). This section describes the details of our resulting method, which we will refer to in this paper as <italic>MNCM-Bayes</italic>, as well as providing relevant background about various elements of our approach. We will begin with a discussion of the invariances present in the MNCM and our procedure for imposing identification constraints; while this is the final step in our method procedurally, it is helpful to introduce it first as it aids in understanding some aspects of the earlier steps.</p>
<sec>
<title>4.1 A note on notation</title>
<p>Throughout this section we will make use of matrices (i.e., two dimensional arrays of parameters) to simplify our explanations. We will denote such matrices by bold, italicized capital letters&#x02014;for example, <italic><bold>A</bold></italic> for a matrix containing all slope parameters&#x02014;and denote their individual elements by italicized lower case letters as we have done thus far, with row indices indicated by subscripts and column indices indicated by parenthesized superscripts. Bold lower case letters will indicate row or column vectors.</p>
<p>In the case of matrices with dimensions of size <italic>IC</italic> (corresponding to the set of all response categories on all items), we use the index pair (<italic>i,c</italic>) as a shorthand for the index corresponding to category <italic>c</italic> of item <italic>i</italic>. The definitions and shapes of several key matrices are provided in <xref ref-type="table" rid="T1">Table 1</xref> for reference.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Symbols and dimensions for key matrix-valued quantities.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Symbol</bold></th>
<th valign="top" align="center"><bold>Description</bold></th>
<th valign="top" align="center"><bold>Dimensions</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic><bold>R</bold></italic></td>
<td valign="top" align="center">Responses (observed)</td>
<td valign="top" align="center"><italic>S</italic>&#x000D7;<italic>I</italic></td>
</tr> <tr>
<td valign="top" align="left"><italic><bold>&#x00398;</bold></italic></td>
<td valign="top" align="center">Student abilities/latent traits</td>
<td valign="top" align="center"><italic>S</italic>&#x000D7;<italic>D</italic></td>
</tr> <tr>
<td valign="top" align="left"><italic><bold>A</bold></italic></td>
<td valign="top" align="center">Category slopes</td>
<td valign="top" align="center"><italic>D</italic>&#x000D7;<italic>IC</italic></td>
</tr> <tr>
<td valign="top" align="left"><italic><bold>b</bold></italic></td>
<td valign="top" align="center">Category intercepts</td>
<td valign="top" align="center">1 &#x000D7; <italic>IC</italic></td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>4.2 Identifying the MNCM</title>
<p>Specifying a procedure to uniquely identify the MNCM is required because the model has a number of symmetries that result in invariances&#x02014;mathematical transformations to the model parameters that leave the predicted probabilities unchanged. Consequently, varying a set of free parameters to find the best fit to a data set does not uniquely determine those parameters, which are arbitrary within variations that honor the invariances. In order to compare results from different data sets&#x02014;or even from different subsets of the same data&#x02014;we must impose additional constraints on the parameters in order to <italic>identify</italic> (uniquely specify) the model. A familiar example from dichotomous 2PL IRT is that shifting item difficulties and student abilities by the same amount does not change the predicted probability for student <italic>s</italic> to answer item <italic>i</italic> correctly; typically, the corresponding identification constraint for this invariance is to set the mean ability to zero.</p>
<p>The greater mathematical complexity of the MNCM results in a greater number of symmetries, and hence a larger number of identification constraints are necessary to specify reproducible results. Additionally, for a multidimensional model in which the probabilities depend on a scalar product (like the MNCM; see <xref ref-type="disp-formula" rid="E4">Equation 4</xref>), rotations of the vector space leave the probabilities unchanged; we exploit several affordances of this when fitting data (both synthetic data and actual student data).</p>
<p>Our identification procedure below is a specification of certain constraints on model parameters (or on properties of sets of parameters such as means or standard deviations), and the parameter transformations we use to effect these constraints. Since by definition the identification procedures do not change the probabilities (and often do not even change the tendencies), they can be applied either while fitting the model to data as an integral part of that process, or afterward as a separate post-processing step on the estimated parameters.</p>
<p>Note that in order to leave the predicted probabilities unchanged, enforcing identification constraints on some parameters often requires making corresponding changes to other parameters linked by the model invariances.</p>
<sec>
<title>4.2.1 Identification procedure</title>
<p>We center <italic><bold>&#x00398;</bold></italic> at zero over the sample (standard practice in IRT) for every dimension by subtracting the mean ability <inline-formula><mml:math id="M7"><mml:msub><mml:mrow><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x003B8;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo>&#x0232A;</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> from each <inline-formula><mml:math id="M8"><mml:msubsup><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>; in order to keep the tendencies (<xref ref-type="disp-formula" rid="E4">Equation 4</xref>) and thus also the probabilities (<xref ref-type="disp-formula" rid="E1">Equation 1</xref>) unchanged, this requires that we also shift each <italic>b</italic><sup>(<italic>i,c</italic>)</sup> by an amount</p>
<disp-formula id="E5"><label>(5)</label><mml:math id="M9"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext>&#x00394;</mml:mtext><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msub><mml:mrow><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x003B8;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo>&#x0232A;</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>We then shift the item parameters <italic><bold>A</bold></italic> and <italic><bold>b</bold></italic> in one of two ways depending on the nature of the data being analyzed. For data derived from multiple-choice assessments with a &#x0201C;correct answer&#x0201D; category, we set both <inline-formula><mml:math id="M10"><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">correct</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> and <italic>b</italic><sup>(<italic>i</italic>, correct)</sup> to zero for each item (by means of a suitable opposite shift of all <italic>a</italic><sup>(<italic>i,c</italic>)</sup> and <italic>b</italic><sup>(<italic>i,c</italic>)</sup>); identifying in this way highlights the distractors that are most different from the correct answer, which is desirable for misconception analysis. When no obvious reference category exists (such as in our synthetic data studies), we set the means <inline-formula><mml:math id="M11"><mml:msub><mml:mrow><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>&#x0232A;</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math id="M12"><mml:msub><mml:mrow><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo>&#x0232A;</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> to zero instead. These two approaches are sometimes known as <italic>simple constraints</italic> and <italic>deviation constraints</italic>, respectively (see Revuelta and Xim&#x000E9;nez, <xref ref-type="bibr" rid="B21">2017</xref>, p. 2). In either case, these changes shift the tendencies (<xref ref-type="disp-formula" rid="E4">Equation 4</xref>) by the same amount across all categories in an item, which multiplies all terms in <xref ref-type="disp-formula" rid="E1">Equation 1</xref> by the same factor and hence leaves the probabilities unchanged.</p>
<p>Additional invariances occur because the tendencies given by <xref ref-type="disp-formula" rid="E4">Equation 4</xref> contain a scalar product of <italic><bold>A</bold></italic> and <italic><bold>&#x00398;</bold></italic>, the slope matrix and the student abilities respectively. In consequence, <italic><bold>A</bold></italic> and <italic><bold>&#x00398;</bold></italic> can be transformed by any invertible linear mapping: this includes any combination of scaling (e.g., increasing <italic><bold>a</bold></italic><sub><italic>d</italic></sub> and decreasing <italic><bold>&#x003B8;</bold></italic><sub><italic>d</italic></sub>), rotation, shearing, reflection (sign reversal of both <italic><bold>a</bold></italic><sub><italic>d</italic></sub> and <italic><bold>&#x003B8;</bold></italic><sub><italic>d</italic></sub>), or permutation of dimensions.</p>
<p>The scaling invariance allows us to constrain the variance of the <italic><bold>&#x00398;</bold></italic>-distribution to be unity in each dimension, as frequently done in IRT. We initially identify rotation and shearing by specifying that <italic><bold>&#x00398;</bold></italic> and <italic><bold>A</bold></italic> have diagonal covariance matrices&#x02014;the data therefore determines the scale of the estimated <italic><bold>a</bold></italic><sub><italic>d</italic></sub> vectors, which we rank in order of decreasing variance by default to identify permutation. These last identification constraints are often short-lived, though, as when analyzing real data it is common to further transform model parameters to maximize interpretability (e.g., by applying factor-analytic rotation methods as we discuss in Section 6.1).</p></sec></sec>
<sec>
<title>4.3 Bayesian modeling of the MNCM</title>
<p>As mentioned earlier, we take a Bayesian approach to fitting the MNCM. In the Bayesian paradigm, the probability of a student selecting a particular response category is still modeled by <xref ref-type="disp-formula" rid="E1">Equations 1</xref>, <xref ref-type="disp-formula" rid="E4">4</xref>; however, every model parameter [each individual <inline-formula><mml:math id="M13"><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>, <italic>b</italic><sup>(<italic>i,c</italic>)</sup>, and <inline-formula><mml:math id="M14"><mml:msubsup><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>] is treated as having an entire probability <italic>distribution</italic> over possible values rather than a single &#x0201C;optimal&#x0201D; value. This provides a principled way of modeling the effects of parameter uncertainty. In addition, Bayesian methods allow us to incorporate reasonable prior beliefs about how parameters will be distributed. For example, it is typical to assume <italic>a priori</italic> that population values will be more-or-less normally distributed. These two qualities make Bayesian modeling well suited to the challenges of fitting the real data described earlier.</p>
<p>Our choice of prior probability distribution is based on the work of Natesan et al. (<xref ref-type="bibr" rid="B17">2016</xref>), who recommended the use of hierarchical models for Bayesian IRT:</p>
<disp-formula id="E6"><label>(6)</label><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>&#x003B2;</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mtext>HalfCauchy</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>5</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x0007E;</mml:mo><mml:mtext>Normal</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x0007E;</mml:mo><mml:mtext>Normal</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x0007E;</mml:mo><mml:mtext>Normal</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>In a hierarchical model, some of the prior probability distributions are not fully specified but are instead parameterized by additional &#x0201C;higher level&#x0201D; random variables [here, &#x003B1;<sub><italic>d</italic></sub> and &#x003B2;, which serve as scale parameters for the priors of <inline-formula><mml:math id="M16"><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> and <italic>b</italic><sup>(<italic>i,c</italic>)</sup> respectively]. These variables are then learned from the data. Such an approach allows item parameters with high-confidence estimates to inform the scale of the priors, which in turn better stabilizes estimates for the parameters which have less supporting data.</p>
<p>Note that all prior means for both ability and item parameters are fixed at zero. Due to the invariances of the model, this does not result in any loss of generality. Instead, it serves as a &#x0201C;soft&#x0201D; identification constraint which stabilizes the location of the parameters during the fitting process. Similarly, the scale of <italic><bold>&#x00398;</bold></italic> (and therefore of <italic><bold>A</bold></italic>) is identified by the fixed variance of the <inline-formula><mml:math id="M17"><mml:msubsup><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> prior, and the rotation and shearing of the model are partly identified by the use of independent priors for each dimension (which implicitly results in diagonal covariance matrices for both <italic><bold>A</bold></italic> and <italic><bold>&#x00398;</bold></italic>, again without any loss of generality). The signs and ordering of dimensions are left unidentified until after the fitting process, though this does not seem to adversely affect convergence due to our choice of fitting method discussed below.</p></sec>
<sec>
<title>4.4 Approximate Bayesian inference</title>
<p>The process of fitting a Bayesian model to data is called <italic>inference</italic>, and its output is an updated joint probability distribution over all model parameters called the <italic>posterior</italic>. Given a set of observed responses <italic><bold>R</bold></italic>, a prior probability distribution <inline-formula><mml:math id="M18"><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mrow><mml:mi mathvariant="script">&#x001B5;</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> over parameters <inline-formula><mml:math id="M19"><mml:mrow><mml:mi mathvariant="script">&#x001B5;</mml:mi></mml:mrow><mml:mo>&#x02261;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle><mml:mo>,</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>A</mml:mi></mml:mstyle><mml:mo>,</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>b</mml:mi></mml:mstyle><mml:mo>,</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x003B1;</mml:mi></mml:mstyle><mml:mo>,</mml:mo><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>, and a response model <inline-formula><mml:math id="M20"><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>R</mml:mi></mml:mstyle><mml:mo>&#x02223;</mml:mo><mml:mrow><mml:mi mathvariant="script">&#x001B5;</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> describing the probabilities of the observations given the parameters (<xref ref-type="disp-formula" rid="E1">Equations 1</xref>, <xref ref-type="disp-formula" rid="E3">3</xref> for the MNCM), then the posterior <inline-formula><mml:math id="M21"><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mrow><mml:mi mathvariant="script">&#x001B5;</mml:mi></mml:mrow><mml:mo>&#x02223;</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>R</mml:mi></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> may be found by applying Bayes&#x00027; rule:</p>
<disp-formula id="E7"><label>(7)</label><mml:math id="M22"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mrow><mml:mi mathvariant="script">&#x001B5;</mml:mi></mml:mrow><mml:mo>&#x02223;</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>R</mml:mi></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>R</mml:mi></mml:mstyle><mml:mo>&#x02223;</mml:mo><mml:mrow><mml:mi mathvariant="script">&#x001B5;</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mrow><mml:mi mathvariant="script">&#x001B5;</mml:mi></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>R</mml:mi></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>This equation, however, is deceptive in its simplicity; except in special cases, it is not possible to find a closed-form analytical expression for the posterior, and it must be approximated numerically.</p>
<p>Our work uses a variational inference (VI) approach in order to approximately solve <xref ref-type="disp-formula" rid="E7">Equation 7</xref>. VI methods work by re-framing Bayesian inference as an optimization problem: given an approximate (but analytically tractable) parameterized probability distribution, a numerical optimizer searches for the parameters which bring this approximant closest to the true posterior. Surprisingly, it is possible to do this without ever computing the true posterior by instead maximizing a surrogate objective function known as the Evidence Lower Bound (ELBO); further details are widely available in the academic literature (e.g. Blei et al., <xref ref-type="bibr" rid="B3">2017</xref>). In addition to being considerably faster than the more typical Markov Chain Monte Carlo methods used for Bayesian inference, this optimization-based approach allows VI to converge to just one of the many (arbitrary) permutation of dimensions and signs in an under-identified model.</p>
<p>While the form of the approximate posterior distribution in VI may be arbitrarily complex, a popular and quite effective simplification is to treat each random variable in the model as having its own independent univariate posterior distribution. This is known as the mean-field approximation. For all real-valued model variables (<inline-formula><mml:math id="M23"><mml:msubsup><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>, <inline-formula><mml:math id="M24"><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>, and <italic>b</italic><sup>(<italic>i,c</italic>)</sup>), we choose as the approximate posterior a normal distribution parameterized by a mean and a standard deviation, both of which may be freely varied by the optimizer.</p>
<p>The approximate posterior for the higher-level random variables &#x003B1;<sub><italic>d</italic></sub> and &#x003B2; require additional care since they cannot take negative values. This constraint is handled by introducing surrogate real-valued variables&#x02014;with posteriors modeled by unconstrained normal distributions as above&#x02014;and mapping these to positive numbers using an appropriate bijection (e.g., the softplus function <italic>x</italic> &#x021A6; log[1 &#x0002B; exp(<italic>x</italic>)]).</p></sec>
<sec>
<title>4.5 Dimensionality assessment</title>
<p>When selecting the dimensionality of a multidimensional IRT model, it is important to balance improvements in predictive ability from additional dimensions against the added degrees of freedom thereby introduced. Not doing so inevitably leads one to select a model which performs well only on the specific data used to estimate its parameters, but fails to explain new data generated from the same underlying statistic process or yields psychologically meaningless parameter estimates. This problem is known as <italic>overfactoring</italic>&#x02014;a type of overfitting&#x02014;and is often dealt with in a classical maximum-likelihood context by using likelihood-ratio tests or information criteria to compare models with different dimensionalities (van der Linden, <xref ref-type="bibr" rid="B27">2016</xref>, chs. 17 &#x00026; 18).</p>
<p>Bayesian methods are not entirely immune to overfactoring, though their principled inclusion of parameter uncertainty does provide some built-in protection against it. Several approaches to Bayesian dimensionality assessment were recently compared by Revuelta and Xim&#x000E9;nez (<xref ref-type="bibr" rid="B21">2017</xref>) for non-hierarchical MNCM models with up to three dimensions estimated using Markov Chain Monte Carlo methods. The authors recommended the use of a standardized generalized dimensionality discrepancy measure (SGDDM) in this context, noting that alternatives based on discrepancy measures struggled to correctly identifying the dimensionality of synthetic data.</p>
<p>In our work, we find that using a hierarchical model in combination with variational inference performs simultaneous parameter estimation and dimensionality selection: even when a <italic>D</italic>-dimensional model is specified, some of those may be &#x0201C;turned off&#x0201D; during inference (by setting the corresponding &#x003B1;<sub><italic>d</italic></sub> and <italic><bold>a</bold></italic><sub><italic>d</italic></sub> to &#x0007E;0) when the observed data provide insufficient evidence to confidently estimate their slopes. This results in considerable robustness to overfactoring as illustrated by our simulation study in Section 5.</p></sec>
<sec>
<title>4.6 Implementation details</title>
<p>The method described above was implemented in the Python programming language (version 3.11). We leveraged the NumPyro probabilistic programming framework (Phan et al., <xref ref-type="bibr" rid="B19">2019</xref>) to define the probabilistic model, automatically generate a corresponding mean-field normal approximating posterior (including surrogate variables and bijections for &#x003B1;<sub><italic>d</italic></sub> and &#x003B2;), and perform approximate inference using NumPyro&#x00027;s built-in Stochastic Variational Inference (SVI) algorithm.</p>
<p>As the name might suggest, SVI relies in part on random sampling to evaluate and optimize the variational objective function, and must therefore be paired with a noise-tolerant optimization routine. We use the Adam optimizer (Kingma and Ba, <xref ref-type="bibr" rid="B16">2014</xref>) configured with a variable &#x0201C;learning rate&#x0201D; parameter which is programmed to decrease from 0.05 to zero on a predetermined 30,000-step schedule.<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref></p>
<p>While SVI will often converge even if the posterior means of all variables are randomly initialized, we attempt to provide a more reasonable starting point for optimization by computing initial guesses of these using a fast IRT approximation (Zhang et al., <xref ref-type="bibr" rid="B30">2020</xref>),<xref ref-type="fn" rid="fn0004"><sup>4</sup></xref> resorting to random initialization only when this approach does not yield a solution. The posterior standard deviations of all variables are initialized to a fixed value of 0.1, which is the default value for mean-field normal posteriors in NumPyro.</p>
<p>The raw, post-optimization outputs of the SVI algorithm consist of an estimated mean and standard deviation for each variable in the approximate mean-field posterior&#x02014;that is, for each individual <inline-formula><mml:math id="M26"><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>, <italic>b</italic><sup>(<italic>i,c</italic>)</sup>, <inline-formula><mml:math id="M27"><mml:msubsup><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>, &#x003B1;<sub><italic>d</italic></sub>, and &#x003B2;. However, these raw outputs are only weakly identified by the Bayesian prior during inference, and require post-processing to exactly impose the identification constraints described in Section 4.2. We perform this step analytically, using the posterior standard deviations to account for uncertainty when finding the covariance matrices of <italic><bold>A</bold></italic> and <italic><bold>&#x00398;</bold></italic> (which are needed to fully identify the scale and rotation of the model) and the variance of <italic><bold>b</bold></italic> (which is included for completeness). The standard deviations are not used further in this work; we examine only the means of the identified parameters, which correspond to an <italic>expected a posteriori</italic> (EAP) solution.</p>
<p>Lastly, any dimensions inactivated during inference (Section 4.5) are omitted from the final estimates. These dimensions may be easily detected by computing the magnitudes of the estimated slope vectors and applying a simple threshold criterion; we use ||<italic><bold>a</bold></italic><sub><italic>d</italic></sub>|| &#x02264; 0.005.</p>
<p>The final identified outputs of our method therefore include:</p>
<list list-type="bullet">
<list-item><p><inline-formula><mml:math id="M28"><mml:mover accent="false"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, the total number of retained dimensions (with <inline-formula><mml:math id="M29"><mml:mover accent="false"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>&#x02264;</mml:mo><mml:mi>D</mml:mi></mml:math></inline-formula>);</p></list-item>
<list-item><p><inline-formula><mml:math id="M30"><mml:mover accent="false"><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, the matrix of EAP category slope estimates;</p></list-item>
<list-item><p><inline-formula><mml:math id="M31"><mml:mover accent="false"><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>b</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, the vector of EAP category intercept estimates;</p></list-item>
<list-item><p><inline-formula><mml:math id="M32"><mml:mover accent="false"><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, the matrix of EAP ability estimates;</p></list-item>
<list-item><p><inline-formula><mml:math id="M33"><mml:mover accent="false"><mml:mrow><mml:mtext>Cov</mml:mtext><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, the estimated <inline-formula><mml:math id="M34"><mml:mover accent="false"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>&#x000D7;</mml:mo><mml:mover accent="false"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> posterior covariance matrix of all the slopes; and</p></list-item>
<list-item><p><inline-formula><mml:math id="M35"><mml:mover accent="false"><mml:mrow><mml:mtext>Var</mml:mtext><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>b</mml:mi></mml:mstyle></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>, the estimated posterior variance of all the intercepts.</p></list-item>
</list></sec></sec>
<sec id="s5">
<title>5 Tests with synthetic data</title>
<p>In this section we apply the MNCM-Bayes method to synthetic response data (generated using the MNCM as the true underlying model) and study its ability to recover the multidimensional slope parameters used in the synthesis. We explore how this parameter recovery performance varies across a range of sample sizes and <italic><bold>a</bold></italic><sub><italic>d</italic></sub>-vector scales, and also compare the method&#x00027;s results to those obtained with established IRT software in a limiting unidimensional case.</p>
<sec>
<title>5.1 The synthesized data</title>
<p>Our synthesized datasets each comprised a set of synthetic student and item parameters <italic><bold>&#x00398;</bold></italic>, <italic><bold>A</bold></italic>, and <italic><bold>b</bold></italic> (all identified according to the constraints in Section 4.2) and a corresponding synthetic response matrix <italic><bold>R</bold></italic>. All datasets used <italic>I</italic> &#x0003D; 30 items and <italic>C</italic> &#x0003D; 5 categories per item, matching the structure of the real data we will analyze in Section 6. The standard deviation of the <italic><bold>b</bold></italic> vector was fixed at 1.5, which is also consistent with that found later for the real dataset; this yielded synthetic data with a range of observed responses fractions across the categories in each item.</p>
<p>For the multidimensional simulation study, datasets were generated with <italic>D</italic> &#x0003D; 9 synthesized dimensions. Six sample size levels <italic>S</italic> &#x02208; {50, 100, 200, 600, 2, 000, 10, 000} and two <italic><bold>A</bold></italic>-matrix covariance structures (described below) were explored according to a fully-crossed design. For each condition, 100 replications were generated with different pseudo-random parameter values for each replication, yielding a total of 1,200 datasets.</p>
<p>The invariances of the MNCM afford us substantial flexibility in choosing a covariance structure for the synthesized <italic><bold>A</bold></italic> matrices. Of note, <italic>all</italic> possible covariance matrices for <italic><bold>&#x00398;</bold></italic> and <italic><bold>A</bold></italic>&#x02014;even those with arbitrary structure and multiple highly-correlated dimensions&#x02014;are expressible as diagonal matrices in <italic>some</italic> choice of reference frame due to these invariances,<xref ref-type="fn" rid="fn0005"><sup>5</sup></xref> which allows us to synthesize <italic><bold>A</bold></italic> to have uncorrelated dimensions without any loss of generality. The standard deviations of these dimensions were selected to span a fairly wide gamut in order to explore the limits of parameter recovery using our method; we fix the standard deviation of the first dimension to 1.0 and specify that each subsequent dimension is smaller than the previous one by a factor &#x003B3; &#x02208; {0.8, 0.512}:</p>
<disp-formula id="E8"><label>(8)</label><mml:math id="M39"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>S</mml:mi><mml:mi>t</mml:mi><mml:mi>d</mml:mi><mml:mi>D</mml:mi><mml:mi>e</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>0</mml:mn><mml:mo>&#x000D7;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>The first condition, &#x003B3; &#x0003D; 0.8, yields data in which the smallest dimension accounts for approximately 1% of the overall variance in the synthesized tendencies. The second condition yields data with a much more rapid decrease in variance (as might be expected in a real dataset with fewer significant factors), with the specific choice of &#x003B3; &#x0003D; 0.8<sup>3</sup> &#x0003D; 0.512 intended to facilitate comparison across the two conditions: the standard deviations of dimensions 1&#x02013;3 in the &#x003B3; &#x0003D; 0.512 data exactly match those of dimensions 1, 4, and 7 in the &#x003B3; &#x0003D; 0.8 data.</p>
<p>For the unidimensional simulation study comparing the Bayesian method to an established software package implementing Marginal Maximum Likelihood Estimation (MMLE), datasets were generated with <italic>D</italic> &#x0003D; 1, sample sizes <italic>S</italic> &#x02208; {50, 100, 200, 600, 2, 000, 10, 000}, and a standard deviation of 1.0 for the sole slope vector <italic><bold>a</bold></italic><sub>1</sub>. Again, 100 replications were generated per condition, yielding an additional 600 datasets for this second study.</p>
<sec>
<title>5.1.1 Student parameter synthesis</title>
<p>The <italic><bold>&#x00398;</bold></italic> matrices were synthesized by drawing samples from a <italic>D</italic>-dimensional standard normal distribution and then standardizing this set of samples to have zero mean in all dimensions and an identity covariance matrix. This added standardization step (and those applied to the item parameters below) served two goals: imposing exact identification constraints to facilitate later comparison with the recovered parameters, and reducing the sample-to-sample variation in the scale of the model due to random sampling variation at smaller sample sizes.</p>
<p>Within each replication, a &#x0201C;nested&#x0201D; structure was used for the student parameters, such that the matrix for each sample size included those of all smaller sample sizes as subsets. That is, <italic><bold>&#x00398;</bold></italic>|<sub><italic>S</italic> &#x0003D; 100</sub> consisted of <italic><bold>&#x00398;</bold></italic>|<sub><italic>S</italic> &#x0003D; 50</sub> concatenated with an additional (and separately standardized) 50 rows, <italic><bold>&#x00398;</bold></italic>|<sub><italic>S</italic> &#x0003D; 200</sub> included <italic><bold>&#x00398;</bold></italic>|<sub><italic>S</italic> &#x0003D; 100</sub> plus 100 new rows, and so on. This was done both to reduce variance across conditions and to permit direct comparisons between parameter estimates if desired.</p></sec>
<sec>
<title>5.1.2 Item parameter synthesis</title>
<p>The <italic><bold>A</bold></italic> matrices and <italic><bold>b</bold></italic> vectors were generated using a similar procedure to <italic><bold>&#x00398;</bold></italic>. In order to allow the covariance between slopes and attractivenesses to be exactly specified, these two variables were initially treated as a single column-wise concatenated matrix [<italic><bold>A</bold></italic><sup>&#x022BA;</sup>|<italic><bold>b</bold></italic><sup>&#x022BA;</sup>] having dimensions <italic>IC</italic> &#x000D7; (<italic>D</italic> &#x0002B; 1), with rows drawn from a (<italic>D</italic> &#x0002B; 1)-dimensional standard normal distribution. As in Section 4.2, the combined parameters were identified such that each group of rows corresponding to a given item had zero mean in all <italic>D</italic> &#x0002B; 1 columns. The entire matrix was then standardized to have an identity covariance matrix, scaled to achieve the desired standard deviations for each column, and finally split and transposed into the individual variables <italic><bold>A</bold></italic> and <italic><bold>b</bold></italic>.</p></sec>
<sec>
<title>5.1.3 Response synthesis</title>
<p>The response matrices <italic><bold>R</bold></italic> were generated by sampling each element from a categorical distribution with probabilities given by <xref ref-type="disp-formula" rid="E1">Equations 1</xref>, <xref ref-type="disp-formula" rid="E4">4</xref>. As with the student parameters, a nested sampling approach was used, such that the response matrix for each sample size in a given replication included as subsets the response matrices of all smaller sample sizes.</p></sec></sec>
<sec>
<title>5.2 Aligning recovered and synthesized parameters</title>
<p>Our interest in finding misconceptions means that, when studying real data, we will need to rotate the coordinate system of our results to associate each dimension with an interpretable concept (common practice in exploratory analysis methods; see Section 6.1). We are therefore interested in evaluating how well the method recovers the <italic>basis-independent</italic> information present in our synthesized parameters and especially in the <italic><bold>A</bold></italic> matrix, rather than the particular coordinate system in which it initially extracts this information (which is arbitrary and does not affect the response probabilities due to the rotational invariance property of the MNCM). We achieving this by aligning the coordinate systems of the synthesized and recovered parameters prior to computing any evaluation metrics.</p>
<p>Note that using the same identification criteria for both sets of parameters does somewhat succeed in aligning their coordinate systems. Even so, the identified coordinate directions may themselves be sensitive to small errors in the parameters; this procedure may therefore lead to inflated errors which are due more to (arbitrary) differences in rotation than to differences which actually affect the response probabilities.</p>
<p>In this work, we use an orthogonal Procrustes procedure (Sch&#x000F6;nemann, <xref ref-type="bibr" rid="B22">1966</xref>) to align the recovered and synthesized parameters while respecting the invariances of the model. We first find an orthogonal matrix <italic><bold>Q</bold></italic> which best maps <italic><bold>&#x00398;</bold></italic> to <inline-formula><mml:math id="M40"><mml:mover accent="false"><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> in the least-squares sense, i.e., which minimizes</p>
<disp-formula id="E9"><label>(9)</label><mml:math id="M41"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mo>||</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle><mml:mstyle mathvariant="bold-italic"><mml:mi>Q</mml:mi></mml:mstyle><mml:mo>-</mml:mo><mml:mover accent="false"><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:msubsup><mml:mrow><mml:mo>||</mml:mo></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mtext>&#x02003;</mml:mtext><mml:mtext class="textrm" mathvariant="normal">s.t.&#x02003;</mml:mtext><mml:mstyle mathvariant="bold-italic"><mml:mi>Q</mml:mi></mml:mstyle><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>Q</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022BA;</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>I</mml:mi></mml:mstyle></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M42"><mml:mo>||</mml:mo><mml:mo>&#x000B7;</mml:mo><mml:msubsup><mml:mrow><mml:mo>||</mml:mo></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula> is the squared Frobenius norm (equal to the sum of squares of all matrix elements). This matrix is then used to rotate the synthesized parameters to allow direct comparisons with the estimates:</p>
<disp-formula id="E10"><label>(10)</label><mml:math id="M43"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022C6;</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle><mml:mstyle mathvariant="bold-italic"><mml:mi>Q</mml:mi></mml:mstyle></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022C6;</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>Q</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>&#x022BA;</mml:mtext></mml:mstyle></mml:mrow></mml:msup><mml:mstyle mathvariant="bold-italic"><mml:mi>A</mml:mi></mml:mstyle></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Note that such rotations do not alter the predicted probabilities in <xref ref-type="disp-formula" rid="E1">Equations 1</xref>, <xref ref-type="disp-formula" rid="E4">4</xref>.</p>
<p>The choice to align parameters to the reference frame of the estimates&#x02014;as opposed to that of the synthesized parameters&#x02014;is a deliberate one. Because of the self-limiting dimensionality of the MNCM-Bayes method, the number of dimensions extracted may be smaller than the number of dimensions synthesized. Remaining in the reference frame of the estimates allows us to limit our analysis to only these <inline-formula><mml:math id="M44"><mml:mover accent="false"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>&#x02264;</mml:mo><mml:mi>D</mml:mi></mml:math></inline-formula> extracted dimensions, and also allows us to examine parameter recovery metrics for each extracted dimension individually. In the reference frame of the synthesized parameters however, there is no longer a one-to-one correspondence between these extracted dimensions and individual coordinate directions, and limiting the analysis to the <inline-formula><mml:math id="M45"><mml:mover accent="false"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>-dimensional subspace of extracted dimensions becomes difficult or impossible.</p></sec>
<sec>
<title>5.3 Results and discussion</title>
<p>For this paper, we focus exclusively on the recovery of the slope parameters <inline-formula><mml:math id="M46"><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> as these provide the most information about the structure and content of an assessment instrument (which is our current research focus). We leave exploration of <italic><bold>&#x00398;</bold></italic> and <italic><bold>b</bold></italic> for future work.</p>
<p>We evaluate parameter recovery separately for each dimension. Our metric of choice is the squared Pearson correlation coefficient&#x02014;also known as the coefficient of determination&#x02014;computed between the synthesized, Procrustes-aligned <inline-formula><mml:math id="M47"><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x022C6;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> values and their estimated counterparts <inline-formula><mml:math id="M48"><mml:msubsup><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula>:</p>
<disp-formula id="E11"><label>(11)</label><mml:math id="M49"><mml:mrow><mml:msup><mml:mi>r</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mtext>&#x0200B;</mml:mtext><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mi>d</mml:mi><mml:mo>&#x022C6;</mml:mo></mml:msubsup><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mover accent='true'><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover></mml:mrow><mml:mi>d</mml:mi></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x02261;</mml:mo><mml:mfrac><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mstyle displaystyle='true'><mml:msub><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mi>d</mml:mi><mml:mrow><mml:mo>&#x022C6;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msubsup><mml:mtext>&#x0200B;</mml:mtext><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mi>d</mml:mi><mml:mo>&#x022C6;</mml:mo></mml:msubsup><mml:mo>&#x0232A;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle><mml:mtext>&#x0200B;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mover accent='true'><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mo>&#x0005E;</mml:mo></mml:mover><mml:mi>d</mml:mi><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msubsup><mml:mtext>&#x0200B;</mml:mtext><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:msub><mml:mrow><mml:mover accent='true'><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover></mml:mrow><mml:mi>d</mml:mi></mml:msub><mml:mo>&#x0232A;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mstyle displaystyle='true'><mml:msub><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mi>d</mml:mi><mml:mrow><mml:mo>&#x022C6;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msubsup><mml:mtext>&#x0200B;</mml:mtext><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mi>d</mml:mi><mml:mo>&#x022C6;</mml:mo></mml:msubsup><mml:mo>&#x0232A;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:msup><mml:mrow><mml:mstyle displaystyle='true'><mml:msub><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mover accent='true'><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle><mml:mo>&#x0005E;</mml:mo></mml:mover><mml:mi>d</mml:mi><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msubsup><mml:mtext>&#x0200B;</mml:mtext><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:msub><mml:mrow><mml:mover accent='true'><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover></mml:mrow><mml:mi>d</mml:mi></mml:msub><mml:mo>&#x0232A;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfrac><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p>We intentionally use a correlation-based metric instead of the more common root mean squared error (RMSE) in order to forgive differences in overall scale, which do not affect the subjective interpretation of the recovered parameters.</p>
<p>The <italic>r</italic><sup>2</sup> metric ranges from 0 to 1 and may be understood as the fraction of variation in the slope estimates attributable to variation in the (rotated) ground-truth values. This definition implies that values of <italic>r</italic><sup>2</sup> are analogous to reliability coefficients, except that these apply to recovered slope rather than test scores. We therefore suggest similar norms be used when evaluating <italic>r</italic><sup>2</sup>: values greater than 0.9 suggest sufficient accuracy for interpreting individual slopes, while those as low as 0.7 may still provide value when interpreting multiple slopes in aggregate.</p>
<sec>
<title>5.3.1 Multidimensional slope recovery</title>
<p>The <italic>r</italic><sup>2</sup> coefficients for the recovered slopes in the nine-dimensional simulation study are summarized in <xref ref-type="fig" rid="F1">Figure 1</xref>. Recovery ranged from excellent (<italic>r</italic><sup>2</sup>&#x0003E;0.9 for most dimensions at <italic>S</italic> &#x0003D; 10, 000) to poor (only marginally-acceptable performance in the first dimension for <italic>S</italic> &#x0003D; 50, with questionable results beyond this), though this was largely expected given the span of sample sizes tested.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Per-dimension coefficients of determination for recovered vs. synthesized slopes, after Procrustes alignment, for all multidimensional synthesized datasets. Points show results from individual replications, with jitter and horizontal offsetting used to reduce overplotting. Solid lines and shaded bands show median and interquartile ranges, respectively, whenever five or more replications are present for a given &#x003B3;, <italic>S</italic>, and <italic>d</italic>. To facilitate comparison between the two &#x003B3; conditions, the superscripts &#x0002A;, &#x02020; and &#x02021; are used to indicate dimensions with equal standard deviations.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1506320-g0001.tif"/>
</fig>
<p>Perhaps the most conspicuous feature of the results is the self-limiting dimensionality of the MNCM-Bayes method. At the largest sample size tested (<italic>S</italic> &#x0003D; 10, 000), the method was able to recover coefficients of all synthesized dimensions in the &#x003B3; &#x0003D; 0.8 data across the majority of experimental replications. As sample size decreases, however, the effects of smaller dimensions become increasingly difficult to distinguish from those of random noise in the data, increasing the risk that extracted parameters will differ from those of the true underlying model. Rather than yielding meaningless results for these dimensions, we find that method settles on solutions in which only a subset of the <italic>D</italic> &#x0003D; 9 estimated dimensions are used, with the remainder having EAP estimates set close to zero&#x02014;effectively pruning them from the final model.<xref ref-type="fn" rid="fn0006"><sup>6</sup></xref> This behavior provides considerable robustness to both overfactoring and overfitting: in general, the method appears to include dimensions only when it is confident that the resulting estimates contain real information about the underlying slopes, not simply when doing so would increase the likelihood of the observed responses on a particular dataset.</p>
<p>At the same time, we recognize that merely containing information is too low a bar when it comes to meaningful interpretation of model parameters. In this sense, our results underscore the critical role that large sample sizes play in allowing us to draw conclusions about the slope coefficients in lower-variance dimensions, especially if we wish to interpret these individually rather than in aggregate.</p>
<p>Comparing the results across the two levels of &#x003B3;, we find that recovery performance is quite similar for dimensions with similar overall standard deviations (indicated by matching superscripts on the dimension numbers in the two subplots). For example, the results for <italic>S</italic> &#x0003D; 2, 000 show similar performance (<italic>r</italic><sup>2</sup>&#x02248;0.8) in dimension 7 of the &#x003B3; &#x0003D; 0.8 data and dimension 3 of the &#x003B3; &#x0003D; 0.512 data&#x02014;both of which were synthesized with standard deviations of 0.262. This is true despite there being a greater <italic>number</italic> of dimensions having at least this standard deviation in the &#x003B3; &#x0003D; 0.8 data, suggesting that the dimensionality of the data plays a relatively minor role in determining parameter-recovery accuracy in any given dimension compared to the scale of the underlying <italic><bold>a</bold></italic><sub><italic>d</italic></sub> vector (and of course the sample size).</p>
<p>The number of dimensions retained by the method also seems to depend primarily on the scales of the <italic><bold>a</bold></italic><sub><italic>d</italic></sub> vectors at any given sample size. At <italic>S</italic> &#x0003D; 600 for example, the method yielded models with up to seven dimensions in the &#x003B3; &#x0003D; 0.8 data and only three dimensions in the &#x003B3; &#x0003D; 0.512 data, but in either case the smallest recovered dimension had StdDev[<italic><bold>a</bold></italic><sub><italic>d</italic></sub>]&#x02248;0.26. This result also implies that sample size should not be seen as limiting the <italic>number</italic> of dimensions that can be recovered by the method, but rather the <italic>smallest</italic> dimension that can be reliably extracted given the limited information about the model parameters provided by each response.</p></sec>
<sec>
<title>5.3.2 Agreement with established IRT software</title>
<p>We also compared the slope recovery of our method to those obtained with the widely adopted &#x0201C;mirt&#x0201D; package for the R language (Chalmers, <xref ref-type="bibr" rid="B8">2012</xref>; R Core Team, <xref ref-type="bibr" rid="B20">2021</xref>) using default settings, which corresponded to MMLE with a standard-normal ability prior and no item parameter priors. This comparison was limited to a unidimensional model since, as of the date of submission, &#x0201C;mirt&#x0201D; only supports the more restricted (Thissen et al., <xref ref-type="bibr" rid="B26">2010</xref>) form of the MNCM in which all categories slopes for a given item are assumed to share the same direction in the latent ability space. In the unidimensional case, the general and restricted models become equivalent, with both simply reducing to the NRM and differing only in their parameterization.</p>
<p>As shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, the MNCM-Bayes procedure performs quite favorably in this comparison, especially at smaller sample sizes, with some improvements to robustness visible up to sample sizes of <italic>S</italic> &#x0003D; 2, 000. These results are somewhat expected given the lack of item-parameter priors in the benchmark method, but still provide additional confirmation of the correctness and utility of our implementation.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Coefficients of determination for recovered vs. synthesized slopes using unidimensional synthetic data, comparing MNCM-Bayes (blue, offset left) to established IRT software implementing MMLE (orange, offset right) with default settings. Points show results from individual replications, with jitter used to reduce overplotting.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1506320-g0002.tif"/>
</fig>
<p>Nevertheless, these results do help underscore a key advantage of hierarchical priors: improved performance for novice users. Even when IRT packages do support the use of item priors, it is typically up to the user to manually configure the distributions and widths of these priors, and later evaluate the results to determine whether their choices were adequate. If too wide a prior distribution is used, some of the benefits of increased robustness will be lost. Too narrow a prior, on the other hand, may excessively bias parameter estimates by allowing insufficient flexibility to fit the observed data. Understandably, this requires more experience than simply fitting a model to data with the default options. In contrast, a hierarchical Bayesian model requires little or no manual tuning to achieve substantially-improved results, as the prior widths are themselves learned from the data.</p>
<p>Finally, an astute observer may note that MNCM-Bayes exhibits slightly better performance in <xref ref-type="fig" rid="F2">Figure 2</xref> compared to the results for <italic>d</italic> &#x0003D; 1 in <xref ref-type="fig" rid="F1">Figure 1</xref>, especially at lower sample sizes. This is likely attributable to the model being better specified here: i.e., we are fitting a unidimensional model to a unidimensional dataset. In contrast, any synthesized dimensions left unextracted when fitting the multidimensional datasets serve as a source of unmodeled noise, which may reduce the method&#x00027;s accuracy when recovering the slopes in the remaining dimensions.</p></sec></sec></sec>
<sec id="s6">
<title>6 Real data example: the force concept inventory</title>
<p>To demonstrate the promise of our method in a more realistic application, we present some preliminary results from applying the MNCM-Bayes to data obtained from administrations of the Force Concept Inventory. While these results do not yet meet the standard of rigor required to serve as standalone research findings (which we hope to provide in a followup paper), they do show that the method can identify real misconceptions in real concept-test data.</p>
<p>The FCI, which grew out of work by David Hestenes&#x00027; group in the mid-1980s, was first published by Hestenes et al. (<xref ref-type="bibr" rid="B14">1992</xref>) and later revised by Halloun et al. (<xref ref-type="bibr" rid="B12">1995</xref>). This popular assessment asked straightforward questions about simple physical situations that were covered in the introductory weeks of typical college-level Newtonian mechanics courses, but that were known from research to reveal student misconceptions (e.g., &#x0201C;what forces act on a ball that is thrown vertically upwards?&#x0201D;). College teachers predicted that their students would score very highly on this instrument and were doubly shocked. Not only did their students score a mere 55% post-instruction, but they had already scored just over 40% on the pretest: thus the teachers taught their students less than a quarter of the important mechanics concepts that they didn&#x00027;t already know on day one of the course.</p>
<p>Our FCI dataset comprises post-instruction responses from <italic>S</italic> &#x0003D; 10, 039 students at a state university in the Southwestern part of the United States. All administrations used the &#x0201C;v95&#x0201D; revised version of the instrument (Halloun et al., <xref ref-type="bibr" rid="B12">1995</xref>). We restricted our analysis to students responding to all 30 questions on the test in the expectation that these students were more likely to respond thoughtfully, which excluded 433 students from the sample. The MNCM-Bayes method was configured to allow a maximum of 16 dimensions, and returned a 14-dimensional fit for this dataset.</p>
<sec>
<title>6.1 Rotating results for interpretability</title>
<p>A key challenge that arises in applying multi-dimensional IRT models like the MNCM to real data is that of ascribing meaning to the dimensions thereby discovered. As with principal component analysis, the dimensions extracted by MNCM-Bayes are initially identified in a principal basis. Such solutions are generally not easy to interpret, as each dimension loads on many categories across many items.</p>
<p>The standard approach for increasing the interpretability of such results (both in multidimensional IRT and in classical factor analysis techniques) is to find a transformation which increases some measure of &#x0201C;simplicity&#x0201D; while not changing the modeled probabilities. Many such measures exist, as do standard methods for transforming solutions to maximize them given a matrix of slopes or factor loadings. For the sake of brevity, we will forgo a principled evaluation of these methods for now and present only one as an example: a bi-factor rotation method proposed by Jennrich and Bentler (<xref ref-type="bibr" rid="B15">2011</xref>) and implemented in the GPArotation R package (Bernaards and Jennrich, <xref ref-type="bibr" rid="B2">2005</xref>).</p>
<p>Bi-factor rotation methods such as the one above transform the coordinate system of <italic><bold>&#x00398;</bold></italic> and <italic><bold>A</bold></italic> such that each distractor has non-negligible slopes along only a small number of dimensions (ideally just two). The first of these dimensions is always a &#x02018;general&#x00027; factor on which all distractors load&#x02014;we have found the ability in this dimension to be strongly associated with students&#x00027; overall test score (Spearman rank correlation coefficient &#x003C1;<sub>s</sub> &#x0003D; 0.98). The remainder are &#x0201C;group&#x0201D; factors associated with only a small set of distractors. We call the slope vectors for each of these <italic>sparse distractor vectors</italic>, and define their positive directions as those which yield a positive Spearman correlation between the corresponding rows of <italic><bold>&#x00398;</bold></italic> and the test scores. (This sign determination method is sometimes marginal as some dimensions do not correlate strongly with score; for those that do, however, this causes the slope components which most characterize the non-Newtonian nature of each distractor to have negative signs).</p>
<p>To demonstrate the results of this transformation, we plot in <xref ref-type="fig" rid="F3">Figure 3</xref> the first two principal and sparse distractor vectors found for the large post-instruction data set described above. We display the components of each vector as a pattern of dots on an <italic>I</italic> &#x000D7; <italic>C</italic> grid, each dot having a size and intensity proportional to the magnitude of that component and being colored red when negative (blue if positive).</p>

<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Examples of principal and sparse distractor vectors (<inline-formula><mml:math id="M50"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>a</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> coefficients) for FCI post-test data. Dots are colored red when negative (blue if positive) with size and intensity showing magnitude relative to the largest coefficient in a given panel; correct answer choices are marked with a &#x0201C;&#x0002B;&#x0201D; and always have zero slope due to model identification constraints. The top two vectors are from the first dimension&#x02014;labeled &#x0201C;P1&#x0201D; for principal and &#x0201C;Bf1&#x0201D; for bi-factor rotated. They are correlated with each other (Pearson uncentered) at 0.998 and appear so visually similar that we carefully checked for accidental duplication. The second principal vector, &#x0201C;P2,&#x0201D; is obviously much denser than the second dimension of the rotated vector, &#x0201C;Bf2,&#x0201D; which has just a few large components that all deviate from zero in the negative (anti-Newtonian) direction.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1506320-g0003.tif"/>
</fig>
</sec>
<sec>
<title>6.2 Some dimensions represent misconceptions</title>
<p>Each sparse distractor vector loads heavily on just a few distractors. To determine whether a particular vector represents a misconception, we examine the most heavily weighted distractors (typically the largest &#x0007E;half-dozen assuming these stand out prominently from the background when plotted as in <xref ref-type="fig" rid="F3">Figure 3</xref>) and see if selecting them would indicate consistent application of some alternate hypothesis to Newtonian mechanics. This process is admittedly quite subjective and could likely be improved upon in future work (e.g., by using outlier analysis to differentiate between prominent and background values), but nevertheless identifies several clear examples of misconceptions in our present results.</p>
<sec>
<title>6.2.1 Impetus force along curved path</title>
<p>As an example, consider the distractor vector labeled &#x0201C;Bf2&#x0201D; in <xref ref-type="fig" rid="F3">Figure 3</xref>. Its six largest distractor components are on items 5 and 18, whose corresponding text is shown in <xref ref-type="table" rid="T2">Table 2</xref>. All six highly loaded distractors involve &#x0201C;force in the direction of motion.&#x0201D; Importantly, we note that the path is curved in both of these items, so we call this dimension <italic>Impetus Force Along Curved Path</italic>.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Most heavily loaded response categories for second dimension of bi-factor rotated FCI post-test results (shown as &#x0201C;Bf2&#x0201D; in <xref ref-type="fig" rid="F3">Figure 3</xref>). These choices are consistent with a student belief that an impetus force exists along curved paths.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Item</bold></th>
<th valign="top" align="left"><bold>Choice</bold></th>
<th valign="top" align="left"><bold>Text</bold></th>
<th valign="top" align="center"><bold>Slope <inline-formula><mml:math id="M51"><mml:mover accent="false"><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula></bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#dee1e1;">
<td valign="top" align="left" colspan="4"><italic><bold>Forces on a ball traveling in a circular track</bold></italic></td>
</tr> <tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">C</td>
<td valign="top" align="left">A force in the direction of motion</td>
<td valign="top" align="center">&#x02212;1.19</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">D</td>
<td valign="top" align="left">&#x02026;and a centripetal force</td>
<td valign="top" align="center">&#x02212;1.62</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">E</td>
<td valign="top" align="left">&#x02026;and a centrifugal force</td>
<td valign="top" align="center">&#x02212;1.93</td>
</tr> <tr style="background-color:#dee1e1;">
<td valign="top" align="left" colspan="4"><italic><bold>Forces on a boy swinging on a swing</bold></italic></td>
</tr> <tr>
<td valign="top" align="left">18</td>
<td valign="top" align="left">C</td>
<td valign="top" align="left">A force in the direction of the boy&#x00027;s motion</td>
<td valign="top" align="center">&#x02212;1.18</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">D</td>
<td valign="top" align="left">&#x02026;and a centripetal force</td>
<td valign="top" align="center">&#x02212;1.96</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">E</td>
<td valign="top" align="left">&#x02026;and a centrifugal force</td>
<td valign="top" align="center">&#x02212;2.34</td>
</tr></tbody>
</table>
</table-wrap>
<p>Impetus is best described as the Arabic and medieval physics concept that the force from the thrower imparts not only immediate motion to the projectile (as Aristotle said) but also a kind of &#x0201C;internalized force&#x0201D; that continues pushing it forward after it is no longer in contact with the thrower. We stress that this dimension applies impetus force to circular motion but less so to rectilinear motion as shown by the lesser loading on distractors 11B &#x00026; C and 13B &#x00026; C, which involve impetus force in linear motion.</p></sec>
<sec>
<title>6.2.2 Last force determines motion</title>
<p>Another example is distractor vector &#x0201C;Bf4&#x0201D; (i.e., the fourth dimension of the bi-factor rotated slopes), which is shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. The dominant components of this vector are described in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Fourth sparse distractor vector (&#x0201C;Bf4&#x0201D;) from bi-factor rotated FCI post-test results, encoding a <italic>Latest Force Determines Motion</italic> misconception. Dots are colored red when negative (blue if positive) with size and intensity showing relative magnitude; correct answer choices are marked with a &#x0201C;&#x0002B;.&#x0201D; <xref ref-type="table" rid="T3">Table 3</xref> summarizes the dominant response choices.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1506320-g0004.tif"/>
</fig>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Most heavily loaded response categories for fourth dimension of bi-factor rotated FCI post-test results (shown in <xref ref-type="fig" rid="F4">Figure 4</xref>).</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Item</bold></th>
<th valign="top" align="left"><bold>Choice</bold></th>
<th valign="top" align="left"><bold>Text</bold></th>
<th valign="top" align="center"><bold>Slope <inline-formula><mml:math id="M52"><mml:mover accent="false"><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula></bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#dee1e1;">
<td valign="top" align="left" colspan="4"><italic><bold>Puck moving along</bold> <italic>x</italic> <bold>is kicked by foot moving along</bold> <italic>y</italic></italic></td>
</tr> <tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left">A</td>
<td valign="top" align="left">Puck goes in direction of kick</td>
<td valign="top" align="center">&#x02212;1.46</td>
</tr> <tr style="background-color:#dee1e1;">
<td valign="top" align="left" colspan="4"><italic><bold>Rocket moving along</bold> <italic>x</italic> <bold>pointing along</bold> <italic>y</italic> <bold>starts firing</bold></italic></td>
</tr>
<tr>
<td valign="top" align="left">21</td>
<td valign="top" align="left">B</td>
<td valign="top" align="left">Rocket goes straight along <italic>y</italic> axis</td>
<td valign="top" align="center">&#x02212;2.44</td>
</tr> <tr style="background-color:#dee1e1;">
<td valign="top" align="left" colspan="4"><italic><bold>Rocket engine is now turned off</bold></italic></td>
</tr> <tr>
<td valign="top" align="left">23</td>
<td valign="top" align="left">A</td>
<td valign="top" align="left">Rocket goes straight along <italic>x</italic> axis</td>
<td valign="top" align="center">&#x02212;1.51</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">C</td>
<td valign="top" align="left">Rocket goes straight along <italic>y</italic> axis</td>
<td valign="top" align="center">&#x02212;1.97</td>
</tr></tbody>
</table>
</table-wrap>
<p>This dimension is mostly aligned with the (known) misconception that when a new force is applied to a moving object, the direction of motion immediately aligns with that new force, ignoring inertia and motion from previously applied forces&#x02014;described as the &#x0201C;last force to act determines motion&#x0201D; view by Hestenes and Jackson (<xref ref-type="bibr" rid="B13">2010</xref>) who identify categories 8A, 9B, 21B, and 23C as exemplars. This misconception is usually understood to include an expectation that the new motion persists at least initially after a force has ceased to act. Thus the inclusion in this misconception of 23A, where a rocket reverts to its <italic>original</italic> motion once its engine is turned off, is surprising and suggests further confusion about motion when forces <italic>stop</italic> being applied (a topic well outside the scope of this paper).</p></sec></sec>
<sec>
<title>6.3 Summary of results</title>
<p>The limited results we just discussed are clear-cut examples showing that the combination of the MNCM-Bayes method followed by rotations to find sparse distractor vectors can discover known misconceptions&#x02014;indeed, the two just discussed are among the top three found by Wheatley et al. (<xref ref-type="bibr" rid="B29">2022</xref>) using modified module analysis (Wells et al., <xref ref-type="bibr" rid="B28">2019</xref>). Our results also illustrate potential improvements to our understanding of existing misconceptions, for example by showing that the impetus concept applies more strongly for motion in a curved path than in a straight path, or that the belief that only the last force applied determines an object&#x00027;s motion is also associated with a peculiar view of what happens when such forces stop acting.</p>
<p>The remainder of our results in this example application are included in the <xref ref-type="supplementary-material" rid="SM1">supplemental materials</xref> for this paper (both as figures and as tabulated slope coefficients). While several additional dimensions in these results seem to have precedents in prior misconceptions research, we postpone further discussion to a later application-focused paper with a more thorough analysis of our full FCI dataset (&#x0007E;34,000 exams, including pre- and post-instruction data from eight colleges and universities).</p></sec></sec>
<sec id="s7">
<title>7 Concluding remarks</title>
<p>This work makes two primary contributions to the fields of psychometrics and education research:</p>
<list list-type="order">
<list-item><p>We present a Bayesian approach to fitting the very general Multidimensional Nominal Categories Model which combines several recommendations in recent IRT literature&#x02014;including the use of variational inference, hierarchical priors, and a fast approximate parameter initialization method. Using synthetic data, we explore the parameter recovery performance of our procedure, compare it to established IRT software in a limiting one-dimensional case, and demonstrate its robustness and self-limiting dimensionality behavior.</p></list-item>
<list-item><p>We present a fully-exploratory method aimed at discovering student misconceptions from multiple-choice concept test data. This method combines our general MNCM implementation with subsequent dimensional transformations to create sparse loadings which are usefully interpretable. Our findings suggest that this method is most likely to provide useful insights at large sample sizes (10,000&#x0002B;), and a real-data example provides preliminary evidence that this method can recover known misconceptions from student responses to the Force Concept Inventory, a pioneering research-developed concept test for Newtonian mechanics.</p></list-item>
</list>
<p>Overall, this synthesis of modern Bayesian methods with classical IRT and factor analysis techniques shows great promise for discovering student misconceptions from large sets of concept-test response data. While further work is needed to refine and validate our approach, we expect methods such as the one presented here to find broad applications in education, whether for conducting research on misconceptions, developing and refining concept inventories, or improving learning through targeted instruction.</p>
<sec>
<title>7.1 Future work</title>
<p>Many opportunities exist for improving the MNCM-Bayes method or extending its capabilities. One that we are already investigating is the choice of factor rotation method, which must ideally balance ease of interpretation with consistency of the discovered misconceptions. By comparing results obtained using different rotations across data from several colleges, we hope to better inform this choice in future version of our method. Another modification would be to allow partially-specified patterns of loadings. This would allow us to manually associate some dimensions with particular known misconceptions (by constraining any irrelevant slopes to zero) and could aid in steering any unconstrained dimensions toward novel misconceptions instead of existing ones.</p>
<p>On the applications side, a compelling first question for future study is whether the various misconceptions that we found for the FCI are consistent across different colleges, and whether similar misconceptions are as robustly found in pre-instruction data rather than post-instruction data; we plan to address both topics in a forthcoming paper (in preparation). One could also study how student ability scores for each misconception depends on factors such as gender, preparation, or background. And, by comparing results from pre- and post-instruction data, one might determine which instructional approaches are most effective in reducing the persistence of various misconceptions.</p>
<p>As our methods are applied to other research-designed instruments, it seems likely that new misconceptions&#x02014;or re-contextualized versions of existing ones&#x02014;will be discovered, especially in subject areas where few studies of student misconceptions exist. Finally, we need not limit ourselves to data from traditional concept tests: modified versions of this method could be applied to entire online courses where frequently-given wrong answers have already been mined from student data (as is done by e.g., <italic><ext-link ext-link-type="uri" xlink:href="https://MasteringPhysics.com">MasteringPhysics.com</ext-link></italic> and <italic><ext-link ext-link-type="uri" xlink:href="https://ExpertTA.com">ExpertTA.com</ext-link></italic>) and could be treated as distractors. Further applications await.</p></sec></sec>
</body>
<back>
<sec sec-type="data-availability" id="s8">
<title>Data availability statement</title>
<p>The data analyzed in this study is subject to the following licenses/restrictions: permission from originating institution(s) required to access. Requests to access these datasets should be directed to David Pritchard, <email>dpritch&#x00040;mit.edu</email>.</p></sec>
<sec sec-type="ethics-statement" id="s9">
<title>Ethics statement</title>
<p>Ethical approval was not required for the study involving humans in accordance with the local legislation and institutional requirements. Written informed consent to participate in this study was not required from the participants or the participants&#x00027; legal guardians/next of kin in accordance with the national legislation and the institutional requirements.</p>
</sec>
<sec sec-type="author-contributions" id="s10">
<title>Author contributions</title>
<p>MS: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Visualization, Writing &#x02013; review &#x00026; editing. AA: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Visualization, Writing &#x02013; review &#x00026; editing. JS: Data curation, Writing &#x02013; review &#x00026; editing. YM: Conceptualization, Formal analysis, Methodology, Software, Writing &#x02013; original draft. BD: Conceptualization, Data curation, Formal analysis, Resources, Software, Writing &#x02013; original draft. DP: Conceptualization, Data curation, Funding acquisition, Investigation, Methodology, Project administration, Resources, Supervision, Visualization, Writing &#x02013; original draft.</p>
</sec>
<sec sec-type="funding-information" id="s11">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. We are grateful for support from MIT and the Department of Physics at MIT as well as from the Center for Ultracold Atoms at MIT and Harvard.</p>
</sec>
<ack><p>We acknowledge help in getting data from H. Dedic, M. Dugdale, A. Fuchs, N. Lasry, S. Osborne Popp, and J. Stewart. We thank Apurva Chauvan for help with calculations. We acknowledge Mohamed Abdulhafez, Elaine Christman, Rachael Henderson, &#x000C1;ngel P&#x000E9;rez-Lemonche, Alexander Shvonski, and James Wells for informative discussions. This work made use of computational resources provided by subMIT at MIT Physics.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s12">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p></sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s14">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpsyg.2025.1506320/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpsyg.2025.1506320/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.csv" id="SM1" mimetype="text/csv" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Data_Sheet_2.pdf" id="SM2" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<fn-group>
<fn id="fn0001"><p><sup>1</sup>Note, this use of <italic>b</italic> should not be conflated with the difficulty parameter in the two-parameter logistic IRT model; despite sharing a variable name, the two have distinctly different meanings. Many IRT sources avoid this conflict by instead using <italic>c</italic> for intercepts, though the use of <italic>b</italic> is not without precedent (cf. Takane and de Leeuw, <xref ref-type="bibr" rid="B24">1987</xref>, e.q. 19). We argue that reserving &#x0201C;<italic>c</italic>&#x0201D; as an index variable for categories improves overall readability since the latter appears in formulas far more often than the former.</p></fn>
<fn id="fn0002"><p><sup>2</sup>In the context of an educational assessment, one would assume these &#x0201C;preference values&#x0201D; are derived from a student&#x00027;s perceived likelihood of each option being the correct response.</p></fn>
<fn id="fn0003"><p><sup>3</sup>The &#x0201C;learning rate&#x0201D; is the Adam analog of a step size in other gradient descent optimization methods.</p></fn>
<fn id="fn0004"><p><sup>4</sup>Specifically, we adapt &#x0201C;Algorithm 1&#x0201D; Zhang et al., (<xref ref-type="bibr" rid="B30">2020</xref>, p. 359&#x02013;360) to work with nominal data by setting their inverse link function &#x0201C;<italic>f</italic>&#x0201D; to our <xref ref-type="disp-formula" rid="E1">Equation 1</xref> and their input response matrix &#x0201C;<italic><bold>Y</bold></italic>&#x0201D; to an <italic>S</italic> &#x000D7; <italic>IC</italic> indicator matrix with components derived from our (nominal) matrix <italic><bold>R</bold></italic> according to</p>
<p><disp-formula id="E12"><mml:math id="M25"><mml:mrow><mml:msubsup><mml:mi>y</mml:mi><mml:mi>s</mml:mi><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable columnalign='left'><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign='left'><mml:mrow><mml:mtext>if&#x000A0;</mml:mtext><mml:msubsup><mml:mi>r</mml:mi><mml:mi>s</mml:mi><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign='left'><mml:mrow><mml:mtext>otherwise</mml:mtext><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mrow></mml:math></disp-formula></p>
<p>The number of factors &#x0201C;<italic>K</italic>&#x0201D; and truncation parameter &#x0201C;&#x003F5;&#x0201D; in Algorithm 1 are set to <italic>D</italic> and 10<sup>&#x02212;4</sup>, respectively. The outputs of the algorithm serve as initial guesses for the posterior means of <italic><bold>&#x00398;</bold></italic>, <italic><bold>A</bold></italic>, and <italic><bold>b</bold></italic>, with the corresponding initial guesses for <bold>&#x003B1;</bold> and &#x003B2; computed by taking the standard deviations of <italic><bold>A</bold></italic> (separately for each dimension) and <italic><bold>b</bold></italic>.</p></fn>
<fn id="fn0005"><p><sup>5</sup>This may be seen by factoring the inner product of <italic><bold>&#x00398;</bold></italic> and <italic><bold>A</bold></italic> via compact singular value decomposition followed by some algebraic manipulation,</p>
<p><disp-formula id="E13"><mml:math id="M36"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle><mml:mstyle mathvariant="bold-italic"><mml:mi>A</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>U</mml:mi></mml:mstyle><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x003A3;</mml:mi></mml:mstyle><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>V</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022BA;</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>U</mml:mi></mml:mstyle><mml:msqrt><mml:mrow><mml:mi>S</mml:mi></mml:mrow></mml:msqrt></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x003A3;</mml:mi></mml:mstyle><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>V</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022BA;</mml:mo></mml:mrow></mml:msup><mml:mstyle mathsize="1.19em"><mml:mrow></mml:mrow></mml:mstyle><mml:msqrt><mml:mrow><mml:mi>S</mml:mi></mml:mrow></mml:msqrt></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022C6;</mml:mo></mml:mrow></mml:msup><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022C6;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula></p>
<p>where <italic><bold>U</bold></italic> &#x02208; &#x0211D;<sup><italic>S</italic> &#x000D7; <italic>D</italic></sup> and <italic><bold>V</bold></italic><sup>&#x022BA;</sup> &#x02208; &#x0211D;<sup><italic>D</italic> &#x000D7; <italic>IC</italic></sup> are both semi-orthogonal and <bold>&#x003A3;</bold> &#x02208; &#x0211D;<sup><italic>D</italic> &#x000D7; <italic>D</italic></sup> is diagonal. The transformed variables <inline-formula><mml:math id="M37"><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022C6;</mml:mo></mml:mrow></mml:msup><mml:mo>&#x02261;</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>U</mml:mi></mml:mstyle><mml:msqrt><mml:mrow><mml:mi>S</mml:mi></mml:mrow></mml:msqrt></mml:math></inline-formula> and <inline-formula><mml:math id="M38"><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022C6;</mml:mo></mml:mrow></mml:msup><mml:mo>&#x02261;</mml:mo><mml:mstyle mathvariant="bold-italic"><mml:mi>&#x003A3;</mml:mi></mml:mstyle><mml:msup><mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>V</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo>&#x022BA;</mml:mo></mml:mrow></mml:msup><mml:mo>/</mml:mo><mml:msqrt><mml:mrow><mml:mi>S</mml:mi></mml:mrow></mml:msqrt></mml:math></inline-formula> then have diagonal covariance matrices Cov[<italic><bold>&#x00398;</bold></italic><sup>&#x022C6;</sup>] &#x0003D; <italic><bold>I</bold></italic> and Cov[<italic><bold>A</bold></italic><sup>&#x022C6;</sup>] &#x0003D; <bold>&#x003A3;</bold><sup>2</sup>/(<italic>SIC</italic>), with the entries of the latter conventionally arranged in descending order.</p></fn>
<fn id="fn0006"><p><sup>6</sup>These &#x0201C;unused&#x0201D; dimensions did still seem to contribute a small but consistent amount of noise to the computed tendencies, despite the fact that an additive noise term should not be necessary in a logistic model like the MNCM. We have not yet determined whether this behavior reflects the &#x0201C;true&#x0201D; posterior distribution of the model parameters or is merely an artifact arising from our use of approximate inference techniques.</p></fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="thesis"><person-group person-group-type="author"><name><surname>Adair</surname> <given-names>A. M.</given-names></name></person-group> (<year>2013</year>). <source>Student Misconceptions about Newtonian Mechanics: Origins and Solutions through Changes to Instruction</source>. PhD thesis, <publisher-name>The Ohio State University</publisher-name>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bernaards</surname> <given-names>C. A.</given-names></name> <name><surname>Jennrich</surname> <given-names>R. I.</given-names></name></person-group> (<year>2005</year>). <article-title>Gradient projection algorithms and software for arbitrary rotation criteria in factor analysis</article-title>. <source>Educ. Psychol. Meas</source>. <volume>65</volume>, <fpage>676</fpage>&#x02013;<lpage>696</lpage>. <pub-id pub-id-type="doi">10.1177/0013164404272507</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Blei</surname> <given-names>D. M.</given-names></name> <name><surname>Kucukelbir</surname> <given-names>A.</given-names></name> <name><surname>McAuliffe</surname> <given-names>J. D.</given-names></name></person-group> (<year>2017</year>). <article-title>Variational inference: a review for statisticians</article-title>. <source>J. Am. Stat. Assoc</source>. <volume>112</volume>, <fpage>859</fpage>&#x02013;<lpage>877</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.2017.1285773</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bock</surname> <given-names>D. R.</given-names></name></person-group> (<year>1972</year>). <article-title>Estimating item parameters and latent ability when responses are scored in two or more nominal categories</article-title>. <source>Psychometrika</source> <volume>37</volume>, <fpage>29</fpage>&#x02013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1007/BF02291411</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bock</surname> <given-names>R. D.</given-names></name> <name><surname>Gibbons</surname> <given-names>R.</given-names></name> <name><surname>Muraki</surname> <given-names>E.</given-names></name></person-group> (<year>1988</year>). <article-title>Full-information item factor analysis</article-title>. <source>Appl. Psychol. Meas</source>. <volume>12</volume>, <fpage>261</fpage>&#x02013;<lpage>280</lpage>. <pub-id pub-id-type="doi">10.1177/014662168801200305</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brown</surname> <given-names>D. E.</given-names></name></person-group> (<year>2014</year>). <article-title>Students&#x00027; conceptions as dynamically emergent structures</article-title>. <source>Sci. Educ</source>. <volume>23</volume>, <fpage>1463</fpage>&#x02013;<lpage>1483</lpage>. <pub-id pub-id-type="doi">10.1007/s11191-013-9655-9</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Burkholder</surname> <given-names>E.</given-names></name> <name><surname>Blackmon</surname> <given-names>L.</given-names></name> <name><surname>Wieman</surname> <given-names>C.</given-names></name></person-group> (<year>2020</year>). <article-title>Characterizing the mathematical problem-solving strategies of transitioning novice physics students</article-title>. <source>Phys. Rev. Phys. Educ. Res</source>. <volume>16</volume>:<fpage>020134</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevPhysEducRes.16.020134</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chalmers</surname> <given-names>R. P.</given-names></name></person-group> (<year>2012</year>). <article-title>Mirt: a multidimensional item response theory package for the R environment</article-title>. <source>J. Stat. Softw</source>. <volume>48</volume>, <fpage>1</fpage>&#x02013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v048.i06</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chi</surname> <given-names>M. T.</given-names></name> <name><surname>Slotta</surname> <given-names>J. D.</given-names></name></person-group> (<year>1993</year>). <article-title>The ontological coherence of intuitive physics</article-title>. <source>Cogn. Instr</source>. <volume>10</volume>, <fpage>249</fpage>&#x02013;<lpage>260</lpage>. <pub-id pub-id-type="doi">10.1080/07370008.1985.9649011</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>diSessa</surname> <given-names>A. A.</given-names></name></person-group> (<year>1993</year>). <article-title>Toward an epistemology of physics</article-title>. <source>Cogn. Instr</source>. <volume>10</volume>, <fpage>105</fpage>&#x02013;<lpage>225</lpage>. <pub-id pub-id-type="doi">10.1080/07370008.1985.9649008</pub-id></citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gette</surname> <given-names>C. R.</given-names></name> <name><surname>Kryjevskaia</surname> <given-names>M.</given-names></name> <name><surname>Stetzer</surname> <given-names>M. R.</given-names></name> <name><surname>Heron</surname> <given-names>P. R. L.</given-names></name></person-group> (<year>2018</year>). <article-title>Probing student reasoning approaches through the lens of dual-process theories: a case study in buoyancy</article-title>. <source>Phys. Rev. Phys. Educ. Res</source>. <volume>14</volume>:<fpage>010113</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevPhysEducRes.14.010113</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Halloun</surname> <given-names>I.</given-names></name> <name><surname>Hake</surname> <given-names>R.</given-names></name> <name><surname>Mosca</surname> <given-names>E.</given-names></name> <name><surname>Hestenes</surname> <given-names>D.</given-names></name></person-group> (<year>1995</year>). <source>Force concept inventory, revised version (v95)</source>. Available at: <ext-link ext-link-type="uri" xlink:href="https://www.physport.org/assessments/FCI">https://www.physport.org/assessments/FCI</ext-link> (accessed January 15, 2025).</citation>
</ref>
<ref id="B13">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Hestenes</surname> <given-names>D.</given-names></name> <name><surname>Jackson</surname> <given-names>J.</given-names></name></person-group> (<year>2010</year>). <source>Table II for the force concept inventory (revised form 081695R)</source>. Available at: <ext-link ext-link-type="uri" xlink:href="https://modeling.asu.edu/R&#x00026;E/FCI-RevisedTable-II_2010.pdf">https://modeling.asu.edu/R&#x00026;E/FCI-RevisedTable-II_2010.pdf</ext-link> (accessed January 15, 2025).</citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hestenes</surname> <given-names>D.</given-names></name> <name><surname>Wells</surname> <given-names>M.</given-names></name> <name><surname>Swackhamer</surname> <given-names>G.</given-names></name></person-group> (<year>1992</year>). <article-title>Force concept inventory</article-title>. <source>Phys. Teach</source>. <volume>30</volume>, <fpage>141</fpage>&#x02013;<lpage>158</lpage>. <pub-id pub-id-type="doi">10.1119/1.2343497</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jennrich</surname> <given-names>R. I.</given-names></name> <name><surname>Bentler</surname> <given-names>P. M.</given-names></name></person-group> (<year>2011</year>). <article-title>Exploratory BI-factor analysis</article-title>. <source>Psychometrika</source> <volume>76</volume>, <fpage>537</fpage>&#x02013;<lpage>549</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-011-9218-4</pub-id><pub-id pub-id-type="pmid">22232562</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>D. P.</given-names></name> <name><surname>Ba</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Adam: A method for stochastic optimization</article-title>. <source>arXiv Preprint.arXiv:1412.6980</source>.</citation>
</ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Natesan</surname> <given-names>P.</given-names></name> <name><surname>Nandakumar</surname> <given-names>R.</given-names></name> <name><surname>Minka</surname> <given-names>T.</given-names></name> <name><surname>Rubright</surname> <given-names>J. D.</given-names></name></person-group> (<year>2016</year>). <article-title>Bayesian prior choice in IRT estimation using MCMC and variational bayes</article-title>. <source>Front. Psychol</source>. <volume>7</volume>:<fpage>1422</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2016.01422</pub-id><pub-id pub-id-type="pmid">27729878</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>P&#x000E9;rez-Lemonche</surname> <given-names>A.</given-names></name> <name><surname>Stewart</surname> <given-names>J.</given-names></name> <name><surname>Drury</surname> <given-names>B.</given-names></name> <name><surname>Henderson</surname> <given-names>R.</given-names></name> <name><surname>Shvonski</surname> <given-names>A.</given-names></name> <name><surname>Pritchard</surname> <given-names>D. E.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Mining students pre-instruction beliefs for improved learning,&#x0201D;</article-title> in <source>Proceedings of the Sixth (2019) ACM Conference on Learning &#x00040; Scale, L&#x00040;S &#x00027;19</source> (<publisher-loc>Association for Computing Machinery</publisher-loc>). <pub-id pub-id-type="doi">10.1145/3330430.3333637</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Phan</surname> <given-names>D.</given-names></name> <name><surname>Pradhan</surname> <given-names>N.</given-names></name> <name><surname>Jankowiak</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Composable effects for flexible and accelerated probabilistic programming in NumPyro</article-title>. <source>arXiv Preprint arXiv:1912.11554</source>.</citation>
</ref>
<ref id="B20">
<citation citation-type="book"><person-group person-group-type="author"><collab>R Core Team</collab></person-group> (<year>2021</year>). <source>R: A Language and Environment for Statistical Computing</source>. <publisher-loc>Vienna, Austria</publisher-loc>: <publisher-name>R Foundation for Statistical Computing</publisher-name>.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Revuelta</surname> <given-names>J.</given-names></name> <name><surname>Xim&#x000E9;nez</surname> <given-names>C.</given-names></name></person-group> (<year>2017</year>). <article-title>Bayesian dimensionality assessment for the multidimensional nominal response model</article-title>. <source>Front. Psychol</source>. <volume>8</volume>:<fpage>961</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2017.00961</pub-id><pub-id pub-id-type="pmid">28670291</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sch&#x000F6;nemann</surname> <given-names>P. H.</given-names></name></person-group> (<year>1966</year>). <article-title>A generalized solution of the orthogonal procrustes problem</article-title>. <source>Psychometrika</source> <volume>31</volume>, <fpage>1</fpage>&#x02013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1007/BF02289451</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stewart</surname> <given-names>J.</given-names></name> <name><surname>Drury</surname> <given-names>B.</given-names></name> <name><surname>Wells</surname> <given-names>J.</given-names></name> <name><surname>Adair</surname> <given-names>A.</given-names></name> <name><surname>Henderson</surname> <given-names>R.</given-names></name> <name><surname>Ma</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Examining the relation of correct knowledge and misconceptions using the nominal response model</article-title>. <source>Phys. Rev. Phys. Educ. Res</source>. <volume>17</volume>:<fpage>010122</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevPhysEducRes.17.010122</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Takane</surname> <given-names>Y.</given-names></name> <name><surname>de Leeuw</surname> <given-names>J.</given-names></name></person-group> (<year>1987</year>). <article-title>On the relationship between item response theory and factor analysis of discretized variables</article-title>. <source>Psychometrika</source> <volume>52</volume>, <fpage>393</fpage>&#x02013;<lpage>408</lpage>. <pub-id pub-id-type="doi">10.1007/BF02294363</pub-id></citation>
</ref>
<ref id="B25">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Thissen</surname> <given-names>D.</given-names></name> <name><surname>Cai</surname> <given-names>L.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Nominal categories models,&#x0201D;</article-title> in <source>Handbook of Item Response Theory</source> (<publisher-loc>Chapman and Hall/CRC</publisher-loc>).</citation>
</ref>
<ref id="B26">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Thissen</surname> <given-names>D.</given-names></name> <name><surname>Cai</surname> <given-names>L.</given-names></name> <name><surname>Bock</surname> <given-names>R. D.</given-names></name></person-group> (<year>2010</year>). <article-title>&#x0201C;The nominal categories item response model,&#x0201D;</article-title> in <source>Handbook of Polytomous Item Response Theory Models</source> (<publisher-loc>London</publisher-loc>: <publisher-name>Routledge</publisher-name>).</citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>van der Linden</surname> <given-names>W. J.</given-names></name></person-group> (<year>2016</year>). <source>Handbook of Item Response Theory, Volume 2: Statistical Tools</source>. New York: CRC Press, Taylor &#x00026;Francis Group. <pub-id pub-id-type="doi">10.1201/9781315374512</pub-id></citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wells</surname> <given-names>J.</given-names></name> <name><surname>Henderson</surname> <given-names>R.</given-names></name> <name><surname>Stewart</surname> <given-names>J.</given-names></name> <name><surname>Stewart</surname> <given-names>G.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Traxler</surname> <given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>Exploring the structure of misconceptions in the force concept inventory with modified module analysis</article-title>. <source>Phys. Rev. Phys. Educ. Res</source>. <volume>15</volume>:<fpage>020122</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevPhysEducRes.15.020122</pub-id></citation>
</ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wheatley</surname> <given-names>C.</given-names></name> <name><surname>Wells</surname> <given-names>J.</given-names></name> <name><surname>Pritchard</surname> <given-names>D. E.</given-names></name> <name><surname>Stewart</surname> <given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>Comparing conceptual understanding across institutions with module analysis</article-title>. <source>Phys. Rev. Phys. Educ. Res</source>. <volume>18</volume>:<fpage>020132</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevPhysEducRes.18.020132</pub-id><pub-id pub-id-type="pmid">39022924</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name></person-group> (<year>2020</year>). <article-title>A note on exploratory item factor analysis by singular value decomposition</article-title>. <source>Psychometrika</source> <volume>85</volume>, <fpage>358</fpage>&#x02013;<lpage>372</lpage>. <pub-id pub-id-type="doi">10.1007/s11336-020-09704-7</pub-id><pub-id pub-id-type="pmid">32451743</pub-id></citation></ref>
</ref-list>
</back>
</article> 