<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="methods-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2025.1463790</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Psychology</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Mixture Multilevel SEM vs. Multilevel SEM for comparing structural relations across groups in presence of measurement non-invariance</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhao</surname> <given-names>Hongwei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2790855/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Vermunt</surname> <given-names>Jeroen K.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1534706/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>De Roover</surname> <given-names>Kim</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn004"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/135023/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Quantitative Psychology and Individual Differences, KU Leuven</institution>, <addr-line>Leuven</addr-line>, <country>Belgium</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Methodology and Statistics, Tilburg University</institution>, <addr-line>Tilburg</addr-line>, <country>Netherlands</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Holmes Finch, Ball State University, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Carmen Xim&#x000E9;nez, Autonomous University of Madrid, Spain</p>
<p>Zhenwei Dai, Peking University Institute of Mental Health, China</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Hongwei Zhao <email>hongwei.zhao&#x00040;kuleuven.be</email></corresp>
<fn fn-type="other" id="fn002"><p>&#x02020;ORCID: Hongwei Zhao <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0009-0008-9372-4986">orcid.org/0009-0008-9372-4986</ext-link></p></fn>
<fn fn-type="other" id="fn003"><p>Jeroen K. Vermunt <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0001-9053-9330">orcid.org/0000-0001-9053-9330</ext-link></p></fn>
<fn fn-type="other" id="fn004"><p>Kim De Roover <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-0299-0648">orcid.org/0000-0002-0299-0648</ext-link></p></fn></author-notes>
<pub-date pub-type="epub">
<day>28</day>
<month>07</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1463790</elocation-id>
<history>
<date date-type="received">
<day>12</day>
<month>07</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>06</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Zhao, Vermunt and De Roover.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Zhao, Vermunt and De Roover</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Structural equation modeling (SEM) is commonly used to explore relations between latent variables, such as beliefs and attitudes. However, comparing structural relations across a large number of groups, such as countries or classrooms, can be challenging. Existing SEM approaches may fall short, especially when measurement non-invariance is present. In this paper, we propose Mixture Multilevel SEM (MixML-SEM), a novel approach to comparing relationships between latent variables across many groups. MixML-SEM gathers groups with the same structural relations in a cluster, while accounting for measurement non-invariance in a parsimonious way by means of random effects. Specifically, MixML-SEM captures measurement non-invariance using multilevel confirmatory factor analysis and, then, it estimates the structural relations and mixture clustering of the groups by means of the structural-after-measurement approach. In this way, MixML-SEM ensures that the clustering is focused on structural relations and unaffected by differences in measurement. In contrast, Multilevel SEM (ML-SEM) estimates measurement and structural models simultaneously, and both with random effects. In comparison to ML-SEM, MixML-SEM provides better estimates of the structural relations, especially when (some of) the groups are large. This is because combining information from multiple groups within a cluster leads to more accurate estimates of the structural relations, whereas, in case of ML-SEM, these estimates are affected by shrinkage bias. We demonstrate the advantages of MixML-SEM through simulations and an empirical example on how social pressure to be happy relates to life satisfaction across 40 countries.</p></abstract>
<kwd-group>
<kwd>structural equation modeling (SEM)</kwd>
<kwd>measurement invariance (MI)</kwd>
<kwd>multilevel modeling</kwd>
<kwd>mixture modeling</kwd>
<kwd>multielvel SEM</kwd>
</kwd-group>
<counts>
<fig-count count="7"/>
<table-count count="5"/>
<equation-count count="23"/>
<ref-count count="57"/>
<page-count count="22"/>
<word-count count="17301"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Quantitative Psychology and Measurement</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Social science research often aims to gain insight into complex human behavior by studying the relations between constructs (e.g., satisfaction, emotions), often quantified by regression coefficients. Comparing these relations across groups helps reveal how they differ across populations, cultures, or contexts. For instance, Kuppens et al. (<xref ref-type="bibr" rid="B33">2008</xref>) explored the association between life satisfaction and positive and negative emotions across 46 countries, while Pakarinen et al. (<xref ref-type="bibr" rid="B43">2020</xref>) investigated how emotional support from teachers related to the development of social competence in children across 47 preschool classrooms.</p>
<p>Structural Equation Modeling (SEM; Bollen, <xref ref-type="bibr" rid="B6">1989</xref>; Hoyle, <xref ref-type="bibr" rid="B27">2012</xref>) is the state-of-the-art technique for analyzing relations among several constructs, referred to as &#x0201C;structural relations&#x0201D; in this framework. In studies involving multiple groups, Multigroup SEM allows researchers to estimate a SEM model for all groups and test whether the structural relations are consistent across groups. When many groups are involved, differences in structural relations are more likely to emerge. Conducting pairwise comparisons of the group-specific relations can help identify group differences and similarities, but this quickly becomes infeasible as the number of groups increases (e.g., 1,035 pairwise comparisons for 46 groups). Multilevel SEM (ML-SEM) can parsimoniously capture differences in structural relations across many groups by means of normally distributed random effects, but this does not help in achieving our primary goal of pinpointing which groups differ and how. Adding group-level moderators to the structural relations may still fail to explain the differences (e.g., Brandt et al., <xref ref-type="bibr" rid="B7">2021</xref>).</p>
<p>In scenarios with many groups, some groups likely have equivalent structural relations&#x02014;for example, due to a shared cultural background&#x02014;so that &#x0201C;latent classes&#x0201D; or &#x0201C;clusters&#x0201D; of groups with common relations arise. Identifying these clusters would thus be an intuitive and efficient alternative to pairwise comparisons, which can be done by means of mixture modeling (McLachlan and Peel, <xref ref-type="bibr" rid="B35">2000</xref>).</p>
<p>Before clustering groups on structural relations, we need to consider whether these relations can be validly compared across the groups. In social sciences, the constructs of interest are typically &#x0201C;latent&#x0201D; variables that are measured indirectly through indicator variables, like questionnaire items, which contain measurement error. In SEM, the latent nature of the constructs&#x02014;also called &#x0201C;factors&#x0201D;&#x02014;is accounted for by estimating a measurement model (MM) for each construct (capturing how it is measured by observed indicators), as well as the relations between the constructs&#x02014;which are part of the structural model (SM). For ensuring comparability of constructs across groups, some degree of &#x0201C;measurement invariance&#x0201D; (MI) should hold, meaning that the constructs are measured equivalently across groups so that differences in the (relations between) constructs are not due to differences in how they are measured.</p>
<p>Different levels of MI pertain to different subsets of measurement model parameters. Configural invariance concerns equivalence of the factor loading structure (i.e., the number of factors and the pattern of zero and non-zero loadings) across groups, where factor loadings capture the factor-indicator relations so that configural invariance implies that a construct is measured by the same set of items in all groups. Metric invariance concerns equality of the factor loadings (i.e., the strength and direction of the factor-indicator relations). Scalar invariance requires equality of item intercepts. Lastly, residual invariance implies equality of the items&#x00027; residual or &#x0201C;unique&#x0201D; variances (i.e., not explained by the factor). To make accurate between-group comparisons of construct relations, metric invariance (Davidov et al., <xref ref-type="bibr" rid="B13">2012</xref>) should hold across groups. With many groups, however, measurement non-invariance is often encountered (e.g., Boer et al., <xref ref-type="bibr" rid="B5">2018</xref>; Rutkowski and Svetina, <xref ref-type="bibr" rid="B50">2014</xref>). If equality does not hold for all factor loadings, partial metric invariance still enables valid comparisons of structural relations, meaning that some loadings are invariant while others differ (Byrne et al., <xref ref-type="bibr" rid="B9">1989</xref>; Pokropek et al., <xref ref-type="bibr" rid="B46">2019</xref>). Non-invariance of item intercepts and unique variances do not invalidate the comparison of structural relations. To avoid incorrect (biased) estimates of structural relations, differences in loadings, intercepts and unique variances must be accounted for within the SEM model (Chen, <xref ref-type="bibr" rid="B11">2008</xref>; Guenole and Brown, <xref ref-type="bibr" rid="B23">2014</xref>; Pokropek et al., <xref ref-type="bibr" rid="B46">2019</xref>).</p>
<p>From this reflection on MI, we conclude that differences in structural relations are not the only between-group differences we can encounter in SEM, but they are the only differences we want to capture by clustering. However, traditional mixture SEM methods (Arminger and Stein, <xref ref-type="bibr" rid="B2">1997</xref>; Dolan and van der Maas, <xref ref-type="bibr" rid="B22">1998</xref>; Jedidi et al., <xref ref-type="bibr" rid="B30">1997</xref>) capture differences in all SEM parameters with clustering&#x02014;including MM parameters (loadings, intercepts, and unique variances) as well as SM parameters (structural relations and factor means)&#x02014;except for parameters that are constrained to be equal across clusters. As a result, this method may need many clusters to capture both measurement non-invariances and structural differences, or it may mix up the two sources of differences or capture the most dominant differences only, failing to isolate differences in structural relations.</p>
<p>To solve this shortcoming, Perez Alonso and colleagues (Perez Alonso et al., <xref ref-type="bibr" rid="B44">2024</xref>) proposed Mixture Multigroup SEM (MixMG-SEM), which uses mixture modeling to cluster groups based on their structural relations while accounting for potential measurement non-invariance by keeping the MM partially group-specific. Unlike other mixture SEM approaches, MixMG-SEM ensures that the clustering is solely based on structural relations rather than (also) on differences in how the constructs are measured. However, when the sample size per group is small (say 50 or less, or barely higher than the number of variables), taking a multigroup approach to capturing measurement non-invariance may not converge or result in less accurate and inefficient estimates of the group-specific measurement parameters, which may propagate into the SM and affect the estimates of the structural relations as well as the clustering based on these relations. Therefore, in this paper, we propose a more parsimonious alternative: Mixture Multilevel SEM (MixML-SEM). It uses the multilevel approach to capturing measurement non-invariance, while comparing structural relations across groups by means of a mixture clustering. Hence, it accounts for between-group differences in measurement parameters by means of random effects at the group-level, reducing the number of parameters and thus resulting in more efficient estimates (Hox et al., <xref ref-type="bibr" rid="B26">2017</xref>). Note that at least 30 or 50 groups are needed to obtain valid estimates of the random effects (Leitg&#x000F6;b et al., <xref ref-type="bibr" rid="B34">2023</xref>), however.</p>
<p>While the multilevel approach is commonly favored for modeling differences among many groups, in MixML-SEM, we employ it specifically for the MM but not for the structural relations. This distinguishes MixML-SEM from ML-SEM. As mentioned above, modeling variance in structural relations does not help in pinpointing similarities and differences across groups. Furthermore, group-specific estimates derived from random effects are systematically biased toward the overall mean parameter value, especially when the sample size per group are small (Hox et al., <xref ref-type="bibr" rid="B26">2017</xref>), impairing the comparisons of group-specific regression coefficients resulting from ML-SEM. In contrast, MixML-SEM provides more accurate estimates of the structural relations by combining information from multiple groups within a cluster when estimating the regression coefficients (i.e., they are directly estimated as cluster-specific parameters), which alleviates the effect of having small sample size per group (as long as the clusters are still sufficiently separated).</p>
<p>For estimating MixML-SEM, we build on the &#x0201C;Structural-After-Measurement&#x0201D; (SAM) framework presented by Rosseel and Loh (<xref ref-type="bibr" rid="B49">2024</xref>), which decouples the estimation of the measurement and structural parts of the SEM model. For the estimation of MixML-SEM, we adopt a tailored variant of the local SAM approach, where the estimation of structural relations operates directly on the covariances between factors and, thus, no longer involves the measurement parameters. Specifically, a multilevel factor analysis is performed per factor, while accounting for potential measurement non-invariances with random effects. Factor scores (i.e., estimated latent variable scores) are extracted for each factor. Then, these factor scores are used as scores on a single indicator for the factor, for which measurement parameters are derived, and Croon&#x00027;s correction (Croon, <xref ref-type="bibr" rid="B12">2002</xref>) is applied to compute the factor covariances. These factor covariances are the input for the final step, which boils down to a mixture multigroup path model, estimating the mixture clustering of the groups and the cluster-specific regression relations among the factors.</p>
<p>To conclude, this paper presents MixML-SEM for efficiently comparing structural relations across many groups (by means of mixture modeling), while handling measurement non-invariances with a low number of parameters (by means of multilevel modeling). The paper is organized as follows: Firstly, we provide a comprehensive description of the specification and estimation of MixML-SEM. Next, we present simulation studies assessing the performance of MixML-SEM in terms of model estimation and model selection, comparing it to ML-SEM. We then demonstrate the empirical value of MixML-SEM using data on how the perceived social pressure to be happy relates to people&#x00027;s life satisfaction. Finally, we summarize the main findings and discuss limitations of the study as well as directions for future research.</p></sec>
<sec id="s2">
<title>Specification and estimation of MixML-SEM</title>
<p>In Step 1 of MixML-SEM, the measurement model (MM) is estimated by performing a Multilevel Confirmatory Factor Analysis (ML-CFA) per factor (i.e., per construct). In Step 2, the factor scores obtained in Step 1 are used as scores on a single indicator (with fixed measurement parameters) for each respective factor and Croon&#x00027;s correction (Croon, <xref ref-type="bibr" rid="B12">2002</xref>) is applied to obtain bias-corrected factor covariances. In Step 3, the structural model (SM)&#x02014;including the clustering of the groups and the cluster-specific structural relations&#x02014;is estimated using an Expectation-Maximization (Dempster et al., <xref ref-type="bibr" rid="B17">1977</xref>) algorithm. Below, we elaborate on each step, including the relevant model specifications. Then, we discuss how to determine an essential aspect of the model specification: the number of clusters.</p>
<sec>
<title>Step 1: ML-CFA with measurement non-invariances</title>
<p>The MM captures how the indicators (observed variables) relate to the constructs of interest (latent variables). For each construct (factor) <italic>q</italic> (<italic>q</italic> &#x0003D; 1, &#x02026;, <italic>Q</italic>), <bold>x</bold><sub><italic>n</italic><sub><italic>g</italic></sub></sub> denotes the observed scores on the <italic>J</italic><sub><italic>q</italic></sub> indicator items for individual <italic>n</italic> nested within group <italic>g</italic> (<italic>g</italic> &#x0003D; 1, &#x02026;, <italic>G</italic>), which are modeled as follows:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003C4;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003BB;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>&#x003B7;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003F5;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003F5;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>M</mml:mi><mml:mi>V</mml:mi><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>0</mml:mtext></mml:mstyle><mml:mstyle mathvariant="bold"><mml:mo>,</mml:mo></mml:mstyle><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <bold>&#x003C4;</bold><sub><italic>g</italic></sub> is a <italic>J</italic><sub><italic>q</italic></sub>-dimensional vector of intercepts for group <italic>g</italic>, <bold>&#x003BB;</bold><sub><italic>g</italic></sub> denotes a <italic>J</italic><sub><italic>q</italic></sub>-dimensional vector of factor loadings for group <italic>g</italic>, quantifying the expected change in the item scores due to a one-unit change in the latent variable score &#x003B7;<sub><italic>n</italic><sub><italic>g</italic></sub></sub>, and <bold>&#x003F5;</bold><sub><italic>n</italic><sub><italic>g</italic></sub></sub> is a <italic>J</italic><sub><italic>q</italic></sub>-dimensional vector of residuals, where the diagonal of <bold>&#x00398;</bold><sub><italic>g</italic></sub> contains the items&#x00027; unique variances in group <italic>g</italic>, representing variance that is unexplained by the underlying construct. To set the scale of the latent variables, we adopt the marker variable approach, where one loading per factor is fixed to one for each group, so that a one-unit change of a factor has the same meaning in all groups.</p>
<p>Note that we formulated the MM for each factor separately, in line with the &#x0201C;measurement blocks&#x0201D; concept introduced by Rosseel and Loh (<xref ref-type="bibr" rid="B49">2024</xref>) in their SAM approach. They recommend estimating the MM of each latent variable separately, which corresponds to having one factor within each measurement block. This strategy streamlines computational efficiency by avoiding estimating one larger model with more parameters and it enhances the model&#x00027;s robustness against potential misspecifications, such as unmodeled cross-loadings.</p>
<p>Multigroup confirmatory factor analysis (MG-CFA; Meredith and Teresi, <xref ref-type="bibr" rid="B37">2006</xref>; S&#x000F6;rbom, <xref ref-type="bibr" rid="B52">1974</xref>) allows fitting MMs for multiple groups and determining which parameters are invariant. The invariance of a subset of measurement parameters holds when imposing their equality across groups (e.g., <bold>&#x003BB;</bold><sub><italic>g</italic></sub><bold>&#x0003D;&#x003BB;</bold> for <italic>g</italic> &#x0003D; 1, &#x02026;, <italic>G</italic>) does not significantly worsen the fit of the model. In MG-CFA, a non-invariant measurement parameter is estimated separately for each group, which results in a large number of parameters to be estimated. In multilevel terminology, this is referred to as the &#x0201C;fixed effect approach,&#x0201D; which is known to result in less efficient parameter estimates, meaning that the group-specific parameters are estimated with greater uncertainty, especially for small groups. Small-sample bias may also make the group-specific parameter estimates less accurate. Therefore, ML-CFA has gained prominence as a parsimonious alternative for capturing heterogeneity in parameters across many groups (Kim et al., <xref ref-type="bibr" rid="B32">2016</xref>; Muth&#x000E9;n, <xref ref-type="bibr" rid="B39">1991</xref>, <xref ref-type="bibr" rid="B40">1994</xref>), a trend supported by the availability of statistical software like Mplus (Muth&#x000E9;n and Muth&#x000E9;n, <xref ref-type="bibr" rid="B42">1998</xref>). Instead of estimating separate measurement parameters for each group, ML-CFA estimates a single MM for all groups, allowing measurement parameters to vary randomly across groups. This variation is modeled through random effects, which essentially impose a certain distribution on the variation in parameters. For these random effects, only the mean and variance are estimated as parameters, which results in more efficient estimation (Hox et al., <xref ref-type="bibr" rid="B26">2017</xref>). As mentioned in the Introduction, group-specific estimates can be derived from the random effects; however, these estimates are subject to shrinkage bias toward the overall mean.</p>
<p>When estimating MixML-SEM, we start by performing a separate ML-CFA for each factor using the Bayes estimator (Asparouhov and Muth&#x000E9;n, <xref ref-type="bibr" rid="B3">2012</xref>). Note that, since our primary focus is the comparison of structural relations and not of latent means, the mean structure of the data (i.e., the group-specific means) is removed by centering the observed scores per item within each group (therefore, <bold>&#x003C4;</bold><sub><italic>g</italic></sub> &#x0003D; <bold>0</bold> for <italic>g</italic> &#x0003D; 1, &#x02026;, <italic>G</italic>). This also lowers computational demands. For individual <italic>n</italic> in group <italic>g</italic>, the ML-CFA model for a single factor with random loadings is then expressed as follows:</p>
<p>Level-1 Model:</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003BB;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>&#x003B7;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003F5;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003F5;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>M</mml:mi><mml:mi>V</mml:mi><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>0</mml:mtext></mml:mstyle><mml:mstyle mathvariant="bold"><mml:mo>,</mml:mo></mml:mstyle><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Level-2 Model:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:mo>=</mml:mo></mml:mtd><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mi>&#x003C3;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E4"><label>(4)</label><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:mo>=</mml:mo></mml:mtd><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0007E;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mi>&#x003C3;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E5"><label>(5)</label><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mtd><mml:mtd><mml:mo>=</mml:mo></mml:mtd><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0007E;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mrow><mml:mi>&#x003C3;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>At Level-1, <bold>&#x003BB;</bold><sub><italic>g</italic></sub> refers to the <italic>J</italic><sub><italic>q</italic></sub>-dimensional vector of factor loadings for group <italic>g</italic>, &#x003B7;<sub><italic>n</italic><sub><italic>g</italic></sub></sub> is the latent variable score and <bold>&#x003F5;</bold><sub><italic>n</italic><sub><italic>g</italic></sub></sub> the within-level error term for individual <italic>n</italic><sub><italic>g</italic></sub>. At Level-2, random effects are included for each non-invariant parameter (i.e., for each random loading &#x003BB;<sub><italic>jg</italic></sub>, random unique variance &#x003B8;<sub><italic>jg</italic></sub>, and random factor variance &#x003D5;<sub><italic>qg</italic></sub>). Random effects for loadings are also referred to as &#x0201C;random slopes.&#x0201D; Random loadings &#x003BB;<sub><italic>jg</italic></sub> are assumed to be normally distributed. In <xref ref-type="disp-formula" rid="E3">Equation 3</xref>, &#x003B3;<sub>&#x003BB;<sub><italic>j</italic></sub></sub> refers to the average slope, and <italic>u</italic><sub>&#x003BB;<sub><sub><italic>j</italic></sub><sub><italic>g</italic></sub></sub></sub> is the group-specific deviation from the average slope. While only the mean (&#x003B3;<sub>&#x003BB;<sub><italic>j</italic></sub></sub>) and the variance (&#x003C3;<sub>&#x003BB;<sub><italic>j</italic></sub></sub>) of the random slopes are estimated as parameters, it is possible to obtain group-specific loading estimates from the posterior distributions when using the Bayes estimator (i.e., posterior mean estimates, e.g., Asparouhov and Muth&#x000E9;n, <xref ref-type="bibr" rid="B3">2012</xref>). For an invariant loading, <italic>u</italic><sub>&#x003BB;<sub><italic>jg</italic></sub></sub> becomes 0 and &#x003BB;<sub><italic>jg</italic></sub><bold>&#x0003D;</bold>&#x003BB;<sub><italic>j</italic></sub> for all groups. As highlighted in the Introduction, at least partial metric invariance should hold for the between-group comparisons of structural relations to be valid. This implies that at least some loadings should be invariant across the groups, so that the vector of loadings contains both invariant and non-invariant ones. If full metric invariance holds, no random slopes are needed and the vector of loadings is fully equal across groups, so that <bold>&#x003BB;</bold><sub><italic>g</italic></sub> &#x0003D; <bold>&#x003BB;</bold> for <italic>g</italic> &#x0003D; 1, &#x02026;, <italic>G</italic>.</p>
<p>Differences in unique variances should also be captured by random effects (<xref ref-type="disp-formula" rid="E4">Equation 4</xref>), so that <bold>&#x00398;</bold><sub><italic>g</italic></sub> can also contain a combination of invariant and non-invariant unique variances&#x02014;depending on the results of the MI testing. Additionally, it is reasonable to expect differences in factor variances across groups. Thus, one should also specify random factor variances, &#x003D5;<sub><italic>qg</italic></sub>, when necessary (<xref ref-type="disp-formula" rid="E5">Equation 5</xref>). Since it is not suitable to assume random variances (i.e., unique variances &#x003B8;<sub><italic>jg</italic></sub> and factor variances &#x003D5;<sub><italic>qg</italic></sub>) following a normal distribution, the log of the variance is modeled by a normal distribution (e.g., &#x0201C;logv,&#x0201D; see Muth&#x000E9;n and Asparouhov, <xref ref-type="bibr" rid="B41">2023</xref>). Throughout the paper, we assumed the factor loadings and unique variances to be partially group-specific and factor variances to be fully group-specific, so we always refer to them with a subscript <italic>g</italic>.</p>
<p>As mentioned above, it is necessary to determine beforehand which measurement parameters should be specified as non-invariant (i.e., with random effects) and which ones as invariant (i.e., without random effects). Hence, MI testing should precede MixML-SEM. Even though MG-CFA is the most commonly used method, the MI test can also be performed with ML-CFA (Kim et al., <xref ref-type="bibr" rid="B31">2017</xref>; Leitg&#x000F6;b et al., <xref ref-type="bibr" rid="B34">2023</xref>). To evaluate MI, one can use the random effects directly. To assess whether (partial) metric invariance holds, the factor loadings are specified as random across groups (as in <xref ref-type="disp-formula" rid="E3">Equation 3</xref>) and, then, one can test&#x02014;for each loading&#x02014;whether the variance of the random loadings &#x003C3;<sub>&#x003BB;<sub><italic>j</italic></sub></sub> is non-zero (Asparouhov and Muth&#x000E9;n, <xref ref-type="bibr" rid="B3">2012</xref>; Leitg&#x000F6;b et al., <xref ref-type="bibr" rid="B34">2023</xref>), which implies that the corresponding loading is non-invariant. For unique variances and factor variances, one can also test whether the variance of their random effect is non-zero. Note that the MI testing method from Jak et al. (<xref ref-type="bibr" rid="B29">2013</xref>) is not applicable in our context, as we removed the mean structure and thus the between-level (co)variances.</p>
<p>Once the non-invariant parameters are identified and accounted for by random effects, we obtain the ML-CFA model that corresponds to the first step of MixML-SEM. Throughout this paper, Step 1 of MixML-SEM is performed by means of Mplus and the R-package MplusAutomation (Hallquist and Wiley, <xref ref-type="bibr" rid="B24">2018</xref>), using the Bayes estimator with default, non-informative priors. Upon estimating the MM for each factor, the posterior distributions (i.e., posterior means and standard deviations) of the factor scores (i.e., estimated latent variable scores) are appended to the data file. These values are used in Step 2. For more details, see <xref ref-type="supplementary-material" rid="SM1">Supplementary material S1</xref>.</p></sec>
<sec>
<title>Step 2: single-indicator approach to obtain group-specific factor covariances</title>
<p>The goal of Step 2 is to obtain group-specific factor covariances, denoted as <inline-formula><mml:math id="M6"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003A6;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula>. Factor scores are estimates of the true latent variable scores that contain error, so when they are used in regression or path analysis as if they were the true latent variable scores, the regression estimates may be biased (Devlieger and Rosseel, <xref ref-type="bibr" rid="B18">2017</xref>, <xref ref-type="bibr" rid="B19">2020</xref>). Croon developed a method to correct for the bias (Croon, <xref ref-type="bibr" rid="B12">2002</xref>). In a multigroup setting, Croon&#x00027;s formula describes the relation between the factor score covariances (<italic>cov</italic>(<bold>F</bold><sub><italic>g</italic></sub>)) and the true latent variable covariances (<italic>cov</italic>(<bold>&#x003B7;</bold><sub><italic>g</italic></sub>)) as follows:</p>
<disp-formula id="E6"><label>(6)</label><mml:math id="M7"><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>F</mml:mi></mml:mstyle><mml:mi>g</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>A</mml:mi></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x0039B;</mml:mi></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>&#x003B7;</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x0039B;</mml:mi></mml:mstyle><mml:mi>g</mml:mi><mml:mo>&#x02032;</mml:mo></mml:msubsup><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>A</mml:mi></mml:mstyle><mml:mi>g</mml:mi><mml:mo>&#x00027;</mml:mo></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>A</mml:mi></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x00398;</mml:mi></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>A</mml:mi></mml:mstyle><mml:mi>g</mml:mi><mml:mo>&#x00027;</mml:mo></mml:msubsup></mml:mrow></mml:math></disp-formula>
<p>where <bold>F</bold><sub><italic>g</italic></sub> is the matrix containing the factor scores for all individuals of group <italic>g</italic>, <bold>A</bold><sub><italic>g</italic></sub> is the group-specific factor score matrix, containing the coefficients needed to convert the item scores into factor scores, <bold>&#x0039B;</bold><sub><italic>g</italic></sub> is the factor loading matrix for group <italic>g</italic>, and <bold>&#x00398;</bold><sub><italic>g</italic></sub> is the unique variance matrix for group <italic>g</italic>.</p>
<p>As mentioned in the Introduction, random effect estimates for the group-specific factor loadings in <bold>&#x0039B;</bold><sub><italic>g</italic></sub> and unique variances in <bold>&#x00398;</bold><sub><italic>g</italic></sub> can be biased, especially when the within-group sample size is small. Therefore, we opt to use only the estimated factor scores from Step 1, which contain all necessary information to proceed (Vermunt, <xref ref-type="bibr" rid="B56">2024</xref>). The factor scores can serve as a single &#x0201C;observed&#x0201D; indicator of the factor, which reduces the data&#x00027;s dimensionality. We can derive the measurement parameters of these single indicators from the estimated factor scores and their standard deviations (see below). The single-indicator approach is similar to the factor score regression approach with Croon&#x00027;s correction (Croon, <xref ref-type="bibr" rid="B12">2002</xref>; Vermunt, <xref ref-type="bibr" rid="B56">2024</xref>), with the difference that we now no longer use the measurement parameters of the observed indicators to perform the correction in <xref ref-type="disp-formula" rid="E6">Equation 6</xref>. To make sure that the mean of the estimated factor scores is exactly zero per group, we centered them per group.</p>
<p>The group-specific loading for factor <italic>q</italic> in group <italic>g</italic>, denoted as &#x003BB;<sub><italic>qg</italic></sub>, is equal to the reliability of the factor scores. The reliability is defined as the ratio of the variance of the factor scores within group <italic>g</italic> (i.e., the variance explained by the items) to the group-specific factor variance (i.e., total variance of the factor):</p>
<disp-formula id="E7"><label>(7)</label><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mi>E</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>In the Bayesian framework, the factor score for each individual <italic>n</italic><sub><italic>g</italic></sub> is considered a random variable with a distribution. For factor <italic>q</italic>, <italic>E</italic>(<italic>f</italic><sub><italic>q</italic><sub><sub><italic>n</italic></sub><sub><italic>g</italic></sub></sub></sub>) represents the posterior mean of this distribution for individual <italic>n</italic><sub><italic>g</italic></sub> (i.e., the mean of the posterior distribution)&#x02014;and corresponds to the estimated factor score&#x02014;and <italic>var</italic>(<italic>E</italic>(<italic>f</italic><sub><italic>q</italic><sub><sub><italic>n</italic></sub><sub><italic>g</italic></sub></sub></sub>)) stands for the variance of the estimated factor scores across all individuals <italic>n</italic> within group <italic>g</italic>. The group-specific unique variance for factor <italic>q</italic> is set to &#x003D5;<sub><italic>qg</italic></sub>&#x003BB;<sub><italic>qg</italic></sub>(1&#x02212;&#x003BB;<sub><italic>qg</italic></sub>). The group-specific factor variance &#x003D5;<sub><italic>qg</italic></sub> can be obtained using the posterior means and variances of the factor scores as follows:</p>
<disp-formula id="E8"><label>(8)</label><mml:math id="M9"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003D5;</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mi>E</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mi>E</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>var</italic>(<italic>f</italic><sub><italic>q</italic><sub><sub><italic>n</italic></sub><sub><italic>g</italic></sub></sub></sub>) represents the variance of this distribution (i.e., the square of the estimated standard deviation obtained from Step 1) for individual <italic>n</italic><sub><italic>g</italic></sub> and <italic>E</italic>(<italic>var</italic>(<italic>f</italic><sub><italic>q</italic><sub><sub><italic>n</italic></sub><sub><italic>g</italic></sub></sub></sub>)) represents the mean of the variance across all individuals <italic>n</italic> within group <italic>g</italic>. Technically, the group-specific factor variance &#x003D5;<sub><italic>qg</italic></sub> can also be obtained directly from the random effects of the factor variances in Step 1, but when using these random effects estimates in <xref ref-type="disp-formula" rid="E7">Equation 7</xref>, &#x003BB;<sub><italic>qg</italic></sub> can become larger than one, leading to a negative unique variance. Therefore, we use <xref ref-type="disp-formula" rid="E8">Equation 8</xref> to derive group-specific factor variances, &#x003D5;<sub><italic>qg</italic></sub>.</p>
<p>By gathering these parameters for all factors, we obtain the <italic>Q</italic>&#x000D7;<italic>Q</italic> group-specific factor loadings <inline-formula><mml:math id="M10"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x0039B;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> and <italic>Q</italic>&#x000D7;<italic>Q</italic> group-specific unique variances <inline-formula><mml:math id="M11"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> for the factor scores as single indicators. Note that <inline-formula><mml:math id="M12"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x0039B;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> and <inline-formula><mml:math id="M13"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> are equivalent to <bold>A</bold><sub><italic>g</italic></sub><bold>&#x0039B;</bold><sub><italic>g</italic></sub> and <bold>A</bold><sub><italic>g</italic></sub><bold>&#x00398;</bold><sub><italic>g</italic></sub><bold>A</bold><sub><italic>g</italic></sub> in <xref ref-type="disp-formula" rid="E6">Equation 6</xref>, respectively (Vermunt, <xref ref-type="bibr" rid="B56">2024</xref>). The group-specific factor covariance matrices <inline-formula><mml:math id="M14"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003A6;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003B7;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, which serve as the input for Step 3, can thus be derived as follows:</p>
<disp-formula id="E9"><label>(9)</label><mml:math id="M15"><mml:mrow><mml:mtable columnalign='left'><mml:mtr columnalign='left'><mml:mtd columnalign='left'><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A6;</mml:mi></mml:mstyle><mml:mi>g</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mo>&#x000A0;</mml:mo><mml:mover accent='true'><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x0039B;</mml:mi></mml:mstyle><mml:mi>g</mml:mi><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover><mml:mo stretchy='false'>(</mml:mo><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>F</mml:mi></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x02212;</mml:mo><mml:mover accent='true'><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x00398;</mml:mi></mml:mstyle><mml:mi>g</mml:mi></mml:msub></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover><mml:mo stretchy='false'>)</mml:mo><mml:msup><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mover accent='true'><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x0039B;</mml:mi></mml:mstyle><mml:mi>g</mml:mi><mml:mo>&#x02032;</mml:mo></mml:msubsup></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:msup></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math></disp-formula>
<p>Note that, instead of these factor covariances, it is theoretically possible to use the factor scores themselves as the input for Step 3, with measurement parameters that are fixed to <inline-formula><mml:math id="M16"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x0039B;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> and <inline-formula><mml:math id="M17"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula>. This would be a global SAM version of Step 3, which is computationally slow despite the dimension reduction due to the single-indicator approach. Therefore, we apply this intermediate Step 2 to obtain group-specific factor covariance matrices so that we no longer need to work with the measurement parameters in Step 3 (i.e., the local SAM approach).</p></sec>
<sec>
<title>Step 3: structural model with mixture clustering of the groups</title>
<p>This step corresponds to the second step of the MixMG-SEM method introduced by Perez Alonso et al. (<xref ref-type="bibr" rid="B44">2024</xref>). It aims to find the underlying clusters of groups and their cluster-specific structural relations. Thus, the SM is conditional on the cluster membership <italic>z</italic><sub><italic>gk</italic></sub>, which indicates whether group <italic>g</italic> belongs to cluster <italic>k</italic>:</p>
<disp-formula id="E10"><label>(10)</label><mml:math id="M18"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003B7;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>B</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003B7;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mstyle mathvariant="bold"><mml:mo>&#x0002B;</mml:mo></mml:mstyle><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003B6;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <bold>B</bold><sub><italic>k</italic></sub> contains the cluster-specific regression coefficients between latent variables, and <bold>&#x003B6;</bold><sub><italic>n</italic><sub><italic>g</italic></sub></sub> indicates the disturbances of these regressions. Under the assumption <italic>E</italic>(<bold>&#x003B6;</bold><sub><italic>n</italic><sub><italic>g</italic></sub></sub>)<bold>&#x0003D;0</bold> and <italic>cov</italic>(<bold>&#x003B6;</bold><sub><italic>n</italic><sub><italic>g</italic></sub></sub>)<bold>&#x0003D;&#x003A8;</bold><sub><italic>g</italic></sub>, the model-implied factor covariance matrix is computed as:</p>
<disp-formula id="E11"><label>(11)</label><mml:math id="M19"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003A6;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>I</mml:mtext></mml:mstyle><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>B</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>&#x003A8;</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>I</mml:mtext></mml:mstyle><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>B</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:msup><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Note that the residual factor covariances <bold>&#x003A8;</bold><sub><italic>gk</italic></sub> are specified as both group- and cluster-specific as they depend on the cluster-specific regression coefficients <bold>B</bold><sub><italic>k</italic></sub> but should not affect the cluster memberships. Estimating the SM involves minimizing the discrepancy between the group-specific factor covariance matrices obtained in Step 2, <inline-formula><mml:math id="M20"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003A6;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula>, and their corresponding model-implied reconstructions, <bold>&#x003A6;</bold><sub><italic>gk</italic></sub>. Note that the latter will differ from the former when the SM is not saturated.</p>
<p>MixML-SEM assumes that latent variable scores <bold>&#x003B7;</bold><sub><italic>n</italic><sub><italic>g</italic></sub></sub> are sampled from a mixture of <italic>K</italic> multivariate normal distributions where all latent variable scores of a group (gathered in <bold>H</bold><sub><italic>g</italic></sub>) are assumed to be sampled from the same distribution. More specifically, the MixML-SEM for group <italic>g</italic> is written as follows:</p>
<disp-formula id="E12"><label>(12)</label><mml:math id="M21"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>H</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>;</mml:mo><mml:mi>&#x003C5;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>&#x003C0;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x0220F;</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munderover></mml:mstyle><mml:mi>M</mml:mi><mml:mi>V</mml:mi><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003B7;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003B1;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mo>,</mml:mo><mml:mstyle mathvariant="bold"><mml:mi>&#x003A6;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Here, <italic>f</italic> represents the total population density function, &#x003C5; is the set of population parameters. &#x003C0;<sub><italic>k</italic></sub> stands for the prior probability of a group <italic>g</italic> belonging to cluster <italic>k</italic> (with <inline-formula><mml:math id="M22"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mrow><mml:mi>&#x003C0;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> = 1). The mean vector <bold>&#x003B1;</bold><sub><italic>g</italic></sub> is <bold>0</bold> due to centering and covariance matrix <bold>&#x003A6;</bold><sub><italic>gk</italic></sub> is decomposed as in <xref ref-type="disp-formula" rid="E11">Equation 11</xref>.</p>
<p>The unknown parameters &#x003C5; are estimated by maximizing the following log-likelihood function:</p>
<disp-formula id="E13"><label>(13)</label><mml:math id="M24"><mml:mtable columnalign='left'><mml:mtr><mml:mtd><mml:mi>log</mml:mi><mml:msub><mml:mi>L</mml:mi><mml:mi>&#x003B7;</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>log</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>G</mml:mi></mml:munderover><mml:mrow><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mi>&#x003C0;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>2</mml:mn><mml:mi>&#x003C0;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mfrac><mml:mi>Q</mml:mi><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:mo>&#x0007C;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A6;</mml:mi></mml:mstyle><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:msup><mml:mo>&#x0007C;</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup></mml:mrow></mml:mfrac><mml:mi>exp</mml:mi><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A6;</mml:mi></mml:mstyle><mml:mi>g</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A6;</mml:mi></mml:mstyle><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>G</mml:mi></mml:munderover><mml:mrow><mml:mi>log</mml:mi></mml:mrow></mml:mstyle><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mi>&#x003C0;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>2</mml:mn><mml:mi>&#x003C0;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mfrac><mml:mi>Q</mml:mi><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:mo>&#x0007C;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A6;</mml:mi></mml:mstyle><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:msup><mml:mo>&#x0007C;</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup></mml:mrow></mml:mfrac><mml:mi>exp</mml:mi><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mi>t</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A6;</mml:mi></mml:mstyle><mml:mi>g</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:msubsup><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A6;</mml:mi></mml:mstyle><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:mstyle></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <inline-formula><mml:math id="M25"><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003A6;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula> is the group-specific factor covariance matrix from Step 2 (<xref ref-type="disp-formula" rid="E9">Equation 9</xref>), and <bold>&#x003A6;</bold><sub><italic>gk</italic></sub> is the group- and cluster-specific factor covariance matrix from Step 3 (<xref ref-type="disp-formula" rid="E11">Equation 11</xref>). We use an Expectation-Maximization (EM; Dempster et al., <xref ref-type="bibr" rid="B17">1977</xref>) algorithm to optimize this log-likelihood function. In the E-step, the algorithm estimates the expected values of the cluster memberships of the groups given the current parameter estimates; that is, the classification probabilities <inline-formula><mml:math id="M26"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. In the M-step, the algorithm maximizes the unknown parameters &#x003C5; given the expected cluster memberships from the E-step by calling lavaan (Rosseel, <xref ref-type="bibr" rid="B48">2012</xref>). Note that the M-step includes a bias correction procedure to get <bold>&#x003A8;</bold><sub><italic>gk</italic></sub>. Readers can consult Perez Alonso and colleagues&#x00027; paper (Perez Alonso et al., <xref ref-type="bibr" rid="B44">2024</xref>), Appendix A, for a deeper dive into the technical details of Step 3. The E- and M-steps are iterated until convergence is reached, which is when the change in log-likelihood between iterations becomes sufficiently small (e.g., &#x0003C;1 &#x000D7; 10<sup>&#x02212;6</sup>). A multi-start procedure, starting from multiple random partitions, is used to avoid convergence to local maxima. The solution with the highest log-likelihood is selected as the final result.</p></sec>
<sec>
<title>Model selection</title>
<p>Because the number of clusters underlying the data is unknown in real life, we compare models with different numbers of clusters using the following methods: Bayesian Information Criterion (BIC; Schwarz, <xref ref-type="bibr" rid="B51">1978</xref>), Akaike Information Criterion (AIC; Akaike, <xref ref-type="bibr" rid="B1">1974</xref>), and the convex hull procedure (CHull; Ceulemans and Kiers, <xref ref-type="bibr" rid="B10">2006</xref>). BIC combines the model&#x00027;s log-likelihood with a penalty based on the number of parameters:</p>
<disp-formula id="E14"><label>(14)</label><mml:math id="M27"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>B</mml:mi><mml:mi>I</mml:mi><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mn>2</mml:mn><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mi>L</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>P</mml:mi><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>S</mml:mi><mml:mi>S</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Here, <italic>P</italic> is the number of free parameters and <italic>SS</italic> is the sample size. The model with the smallest BIC value is selected. For MixML-SEM, <italic>P</italic> is the sum of the number of mixing proportions (minus one restriction), the number of cluster-specific regression coefficients, the number of group- and cluster-specific factor (co-)variances (counting only one set per group, since the model assumes each group to belong to one cluster only), and the number of measurement parameters. Recall that, from Step 2 onwards, the factor scores are used as single indicators for the latent variables. Therefore, we include the number of loadings <inline-formula><mml:math id="M28"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x0039B;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> and unique variances <inline-formula><mml:math id="M29"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> for the factor scores as the number of measurement parameters in <italic>P</italic>. In simulation studies involving the mixture multigroup approach (De Roover, <xref ref-type="bibr" rid="B14">2021</xref>; De Roover et al., <xref ref-type="bibr" rid="B15">2022</xref>; Perez Alonso et al., <xref ref-type="bibr" rid="B45">2025</xref>), it was found that the BIC performed better when <italic>SS</italic> is equal to the number of groups <italic>G</italic> (BIC<sub><italic>G</italic></sub>) rather than the total number of observations <italic>N</italic> (BIC<sub><italic>N</italic></sub>), which is why we focus on BIC<sub><italic>G</italic></sub> throughout the paper.</p>
<p>In case of small sample sizes and low cluster separation, AIC was found to outperform BIC for some related methods (De Roover et al., <xref ref-type="bibr" rid="B15">2022</xref>; Kim et al., <xref ref-type="bibr" rid="B31">2017</xref>), but not all (De Roover, <xref ref-type="bibr" rid="B14">2021</xref>). AIC penalizes model complexity as follows:</p>
<disp-formula id="E15"><label>(15)</label><mml:math id="M30"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>A</mml:mi><mml:mi>I</mml:mi><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mn>2</mml:mn><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mi>L</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>2</mml:mn><mml:mi>P</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Moreover, the CHull has been shown to be a valuable alternative to BIC and AIC (Bulteel et al., <xref ref-type="bibr" rid="B8">2013</xref>; De Roover, <xref ref-type="bibr" rid="B14">2021</xref>; De Roover et al., <xref ref-type="bibr" rid="B15">2022</xref>). It balances the logL and the number of free parameters by means of a generalized scree test, selecting the model with the highest scree ratio. Note that a limitation of CHull is that it always selects at least two clusters, because the scree ratio cannot be computed for a one-cluster solution, but visual inspection of the CHull plot can help identify whether a clear elbow is present. If not, an underlying clustering is less likely.</p>
<p>Since we use the estimated factor scores as the single &#x0201C;observed&#x0201D; indicator in Steps 2 and 3, we use the following loglikelihood in BIC, AIC, and CHull:</p>
<disp-formula id="E16"><label>(16)</label><mml:math id="M31"><mml:mtable columnalign='left'><mml:mtr><mml:mtd><mml:mi>log</mml:mi><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mi>log</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>G</mml:mi></mml:munderover><mml:mrow><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mi>&#x003C0;</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:munderover><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>2</mml:mn><mml:mi>&#x003C0;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mfrac><mml:mi>Q</mml:mi><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:mo>&#x0007C;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mover accent='true'><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A3;</mml:mi></mml:mstyle><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover><mml:mtext>&#x000A0;</mml:mtext><mml:mo stretchy='false'>)</mml:mo><mml:msup><mml:mo>&#x0007C;</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:mrow></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mi>exp</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mover accent='true'><mml:mi>f</mml:mi><mml:mo>&#x000AF;</mml:mo></mml:mover></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:msup><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x02032;</mml:mo></mml:msup><mml:msup><mml:mrow><mml:mover accent='true'><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A3;</mml:mi></mml:mstyle><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mover accent='true'><mml:mi>f</mml:mi><mml:mo>&#x000AF;</mml:mo></mml:mover></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>G</mml:mi></mml:munderover><mml:mrow><mml:mi>log</mml:mi></mml:mrow></mml:mstyle><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mi>&#x003C0;</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:mstyle><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:munderover><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>2</mml:mn><mml:mi>&#x003C0;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mfrac><mml:mi>Q</mml:mi><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup><mml:mo>&#x0007C;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mover accent='true'><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A3;</mml:mi></mml:mstyle><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover><mml:mtext>&#x000A0;</mml:mtext><mml:mo stretchy='false'>)</mml:mo><mml:msup><mml:mo>&#x0007C;</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:mrow></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mi>exp</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mover accent='true'><mml:mi>f</mml:mi><mml:mo>&#x000AF;</mml:mo></mml:mover></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:msup><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x02032;</mml:mo></mml:msup><mml:msup><mml:mrow><mml:mover accent='true'><mml:mrow><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>&#x003A3;</mml:mi></mml:mstyle><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy='true'>&#x0005E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mover accent='true'><mml:mi>f</mml:mi><mml:mo>&#x000AF;</mml:mo></mml:mover></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <bold>f</bold><sub><italic>n</italic><sub><italic>g</italic></sub></sub> refers to the <italic>Q</italic>-dimensional vector of estimated factor scores of individual <italic>n</italic><sub><italic>g</italic></sub>, <inline-formula><mml:math id="M33"><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mover accent="false" class="mml-overline"><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mo accent="true">&#x000AF;</mml:mo></mml:mover></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">g</mml:mtext></mml:mstyle></mml:mrow></mml:msub></mml:math></inline-formula> refers to the mean of the estimated factor scores for each group, which equals zero due to centering, <inline-formula><mml:math id="M34"><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003A3;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:math></inline-formula> refers to the model-implied covariance matrix of the <italic>Q</italic> single indicators, which is equal to:</p>
<disp-formula id="E17"><label>(17)</label><mml:math id="M35"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003A3;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x0039B;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x003A6;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x0039B;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>&#x0002B;</mml:mo><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mtext>&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x0039B;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mtext>&#x000A0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>I</mml:mtext></mml:mstyle><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>B</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>&#x003A8;</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>I</mml:mtext></mml:mstyle><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>B</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mo>&#x02212;</mml:mo><mml:msup><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:msup><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x0039B;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>&#x0002B;</mml:mo><mml:mover accent="false"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mi>&#x00398;</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Note that using the loglikelihood for the observed items would be too complicated since obtaining a valid loglikelihood requires integrating out the random effects for the non-invariant measurement parameters and the factor variances (see Step 1).</p></sec></sec>
<sec id="s3">
<title>Simulation studies</title>
<p>In Simulation Study 1, we evaluated the performance of MixML-SEM when the number of clusters is assumed to be known, and compared it to ML-SEM. Then, in Simulation Study 2, we investigated whether the correct number of clusters is selected for MixML-SEM by BIC<sub><italic>G</italic></sub>, AIC, and/or CHull.</p>
<sec>
<title>Simulation study 1</title>
<p>The goal of the Simulation Study 1 was two-fold: Firstly, we aimed to evaluate the performance of MixML-SEM in terms of parameter and cluster recovery when the number of clusters is known. Secondly, we compared it to ML-SEM, where group-specific regression coefficients were derived from random effects. Specifically, we performed ML-SEM with Mplus and the R-package MplusAutomation (Hallquist and Wiley, <xref ref-type="bibr" rid="B24">2018</xref>), where the SM and MM were estimated simultaneously with random effects for capturing differences. The following factors were manipulated:</p>
<list list-type="order">
<list-item><p>Total number of groups <italic>G</italic> (2 levels): 48, 96;</p></list-item>
<list-item><p>Number of clusters <italic>K</italic> (2 levels): 2, 4;</p></list-item>
<list-item><p>Small groups <italic>N</italic><sub><italic>g</italic></sub> (2 levels): 25, 50;</p></list-item>
<list-item><p>Small groups proportion (5 levels): 0, 0.25, 0.5, 0.75, 1;</p></list-item>
<list-item><p>Large groups <italic>N</italic><sub><italic>g</italic></sub> (2 levels): 100, 200;</p></list-item>
<list-item><p>Size of regression parameters &#x003B2; (3 levels): 0.2, 0.3, 0.4;</p></list-item>
<list-item><p>Reliability (2 levels): high, low;</p></list-item>
<list-item><p>Within-group samples: fixed, random.</p></list-item>
</list>
<p>We included two levels of the total number of groups <italic>G</italic>, with a minimum of 48 groups, based on the recommendation that at least 30 or 50 groups are needed to obtain valid estimates of random effects (Leitg&#x000F6;b et al., <xref ref-type="bibr" rid="B34">2023</xref>). Because more groups imply more information on cluster-specific regression estimates (i.e., a larger within-cluster sample size), we hypothesize that the performance of MixML-SEM will improve with a higher number of groups.</p>
<p>We considered two levels of the number of clusters <italic>K</italic> underlying the groups: two or four. A higher number of clusters lowers the within-cluster sample size and is thus expected to lower the performance of MixML-SEM. Additionally, it raises the complexity of determining the cluster memberships (i.e., more posterior classification probabilities) for each group, making the recovery of clusters more intricate. Here, we focused on balanced cluster sizes, where all clusters contained an equal number of groups. In practice, cluster recovery is likely to be more challenging when cluster sizes are unbalanced, as was demonstrated by Perez Alonso et al. (<xref ref-type="bibr" rid="B44">2024</xref>).</p>
<p>As mentioned in the Introduction, an advantage of MixML-SEM is combining information from multiple groups within a cluster when estimating the (cluster-specific) regression coefficients. In this way, the regression estimates for small groups benefit from the presence of large groups within the same cluster, if any. If only small groups are combined in a cluster and their cluster memberships are very uncertain (i.e., classification probabilities &#x0003C;1), this may affect the estimation of the cluster-specific regression estimates. Therefore, we generated data with a mix of small and large groups, which is also a realistic setting. To this end, we initially randomly selected a specific number of groups per cluster which were assigned a small <italic>N</italic><sub><italic>g</italic></sub> of either 25 or 50, where this number of groups was determined by the small groups proportion of 0, 0.25, 0.5, 0.75, or 1. Note that this proportion is applied to each cluster, so that the equality of the within-cluster sample sizes is preserved. Subsequently, the other groups were assigned a large <italic>N</italic><sub><italic>g</italic></sub> of either 100 or 200. A larger proportion of small groups lowers the within-cluster sample size, and is thus expected to lower the performance. To summarize, the group sizes are determined by three factors: the large <italic>N</italic><sub><italic>g</italic></sub>, the small <italic>N</italic><sub><italic>g</italic></sub>, and the small groups proportion. Note that the within-cluster sample sizes are determined by all the abovementioned factors.</p>
<p>The data were generated by a SEM model with four latent variables, each measured by five items (see <xref ref-type="fig" rid="F1">Figure 1</xref>), as was also used by Perez Alonso et al. (<xref ref-type="bibr" rid="B44">2024</xref>). As mentioned above, we assume the latent variable scores for group <italic>g</italic> follow a multivariate normal distribution with covariance matrix <bold>&#x003A3;</bold><sub><italic>gk</italic></sub><bold>, </bold> which is determined by the parameters: <bold>B</bold><sub><italic>k</italic></sub>, <bold>&#x003A8;</bold><sub><italic>gk</italic></sub>, <bold>&#x0039B;</bold><sub><italic>g</italic></sub> and <bold>&#x00398;</bold><sub><italic>g</italic></sub>. We first defined the cluster-specific regression parameters <bold>B</bold><sub><italic>k</italic></sub>. As illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>, for each cluster, one of them was set to zero, while the other regression parameters were set equal to the size of regression parameters &#x003B2;. Therefore, for each pair of clusters, the difference between them pertains to two regression coefficients and the size of each difference is equal to &#x003B2;. We considered three levels of regression parameters. The larger the size of regression parameters &#x003B2;, the more separated the clusters become and the easier the cluster recovery will be.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The model used for the data generation. F1 and F2 are exogenous variables, F3 and F4 are endogenous variables.</p></caption>
<alt-text>Flowchart depicting connections between latent variables and observed variables. Two circles labeled F1 and F2 are connected, with inputs V1 to V5 for F1 and V6 to V10 for F2. F3 and F4 link to observed variables V11 to V15 and V16 to V20. Arrows indicate relationships, and F1 and F2 are exogenous variables, F3 and F4 are endogenous variables.</alt-text>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1463790-g0001.tif"/>
</fig><fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>The regression parameters between the latent variables depending on the cluster.</p></caption>
<alt-text>Four cluster diagrams labeled Cluster 1 to Cluster 4. Each diagram contains four circular nodes labeled F1, F2, F3, and F4, connected by arrows and lines indicating relationships with labels &#x003B2;1, &#x003B2;2, &#x003B2;3, and &#x003B2;4. Some connections are marked with &#x0201C;0&#x0201D; indicating the absence of a relationship. Arrows indicate directionality between nodes.</alt-text>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1463790-g0002.tif"/>
</fig><p>Secondly, we generated the group- and cluster-specific residual factor covariances <bold>&#x003A8;</bold><sub><italic>gk</italic></sub>. For the exogenous variables <italic>F</italic>1 and <italic>F</italic>2, we sampled the group-specific covariances (<italic>Cov</italic>(<italic>F</italic>1, <italic>F</italic>2)<sub><italic>g</italic></sub>) from a Wishart distribution, with variances (<italic>Var</italic>(<italic>F</italic>1)<sub><italic>g</italic></sub>, <italic>Var</italic>(<italic>F</italic>2)<sub><italic>g</italic></sub>) varying around 1 and their covariance varying around 0. Across groups within simulated data sets, <italic>Var</italic>(<italic>F</italic>1) and <italic>Var</italic>(<italic>F</italic>2) varied from 0.608 to 1.389 (mean = 0.952, <italic>SD</italic> = 0.166), whereas <italic>Cov</italic>(<italic>F</italic>1, <italic>F</italic>2) varied across groups from &#x02212;0.279 to 0.279 (mean = 0.000, <italic>SD</italic> = 0.117). For the endogenous variables <italic>F</italic>3 and <italic>F</italic>4, the total variances (<italic>Var</italic>(<italic>F</italic>3)<sub><italic>g</italic></sub>, <italic>Var</italic>(<italic>F</italic>4)<sub><italic>g</italic></sub>) were sampled separately from a log-normal distribution (with the mean on the log scale set to 0). Their residual variances depended on the regression parameters. For <italic>F</italic>3, it was <inline-formula><mml:math id="M37"><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>3</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>F</mml:mi><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>. For <italic>F</italic>4, it was<inline-formula><mml:math id="M38"><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>4</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>4</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>3</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>4</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mi>V</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>F</mml:mi><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x000A0;</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>.</p>
<p>Thirdly, we specified the group-specific loading matrices <bold>&#x0039B;</bold><sub><italic>g</italic></sub> and unique variances <bold>&#x00398;</bold><sub><italic>g</italic></sub>, based on the reliability. The first loading of each latent variable was fixed to 1 to set the scale of the latent variable. Per latent variable, the loadings and unique variances of the second and third indicator were set to be non-invariant. When the reliability level was high, the invariant loadings were set to <inline-formula><mml:math id="M39"><mml:msqrt><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>6</mml:mn></mml:mrow></mml:msqrt></mml:math></inline-formula> and their unique variances to 0.4; when the reliability was low, the invariant loadings were set to <inline-formula><mml:math id="M40"><mml:msqrt><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msqrt></mml:math></inline-formula> and their unique variances to 0.6. Meanwhile, the non-invariant loadings were sampled from a normal distribution with a mean of either <inline-formula><mml:math id="M41"><mml:msqrt><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>6</mml:mn></mml:mrow></mml:msqrt></mml:math></inline-formula> or <inline-formula><mml:math id="M42"><mml:msqrt><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msqrt></mml:math></inline-formula> and a variance of 0.1 for all groups. The non-invariant unique variances were sampled from a log-normal distribution (with the standard deviation on the log scale set to 0.25 and the mean to &#x02212;0.948 or &#x02212;0.542 to generate unique variances around 0.4 and 0.6, respectively, for the high and low reliability conditions). When the reliability is higher, we expect a better recovery of the MM in MixML-SEM, potentially leading to a better cluster recovery.</p>
<p>Finally, after defining all the necessary parameters, data were sampled from a multivariate normal distribution <italic>MVN</italic><bold>(0, &#x003A3;</bold><sub><italic>gk</italic></sub><bold>)</bold> for each group, either with fixed or random within-group samples. This was operationalized using the &#x0201C;empirical&#x0201D; argument in the mvrnorm function from the MASS package (Venables and Ripley, <xref ref-type="bibr" rid="B55">2002</xref>). With empirical = TRUE, the covariance matrix of the sampled data exactly matches the specified <bold>&#x003A3;</bold><sub><italic>gk</italic></sub>. This setting corresponds to the empirical situation where all individuals nested within groups are included in the sample (e.g., including all pupils of a classroom), or when only the specific set of individuals in the sample is of interest, without intending to draw conclusions about the broader population of individuals within a group. In contrast, with empirical = FALSE, the within-group samples are regarded as a random sample from a larger population within a certain group (e.g., inhabitants of a country) and one intends to draw conclusions about that entire population. In the latter case, the sample&#x00027;s covariance matrix will differ from <bold>&#x003A3;</bold><italic><sub>gk</sub></italic> due to sampling fluctuations, and more so for smaller group sizes. Thus, we expect the recovery of the clusters and parameters to be more challenging in the random conditions, especially when (more) groups are smaller.</p>
<p>We generated 50 replications per cell of the design, yielding 48,000 data sets in total, using R version 4.4 (R Core Team, <xref ref-type="bibr" rid="B47">2022</xref>). All data sets were analyzed using MixML-SEM with 50 random starts, and ML-SEM. For both methods, the measurement non-invariances were correctly specified as group-specific parameters. The analyses were performed on a supercomputer consisting of 2 Intel Xeon Platinum 8468 CPUs (Sapphire Rapids). The average computation time for MixML-SEM with the correct number of clusters was 1.8 min for Step 1, 0.4 s for Step 2 and 4.6 min for Step 3 (with 50 random starts). Note that the average computation time of Step 1 was mainly influenced by the number of groups: 1.5 min for 48 groups, 2.0 min for 96 groups. For Step 3, the computation time varied depending on all simulation conditions. The lowest average was 0.4 min for &#x0201C;easy&#x0201D; conditions (e.g., <italic>K</italic> &#x0003D; 2, &#x003B2; &#x0003D; 0.4, with only large groups and fixed within-group samples), while the highest average was 24.5 min for &#x0201C;hard&#x0201D; conditions (e.g., <italic>K</italic> &#x0003D; 4, &#x003B2; &#x0003D; 0.2, with only small groups and random within-group samples).</p>
<sec>
<title>Results</title>
<sec>
<title>MixML-SEM results</title>
<sec>
<title>Recovery of the measurement model</title>
<p>For the invariant loadings (excluding the fixed marker variable loadings), on average across simulated data sets, the estimated values amounted to 0.775 (<italic>SD</italic> = 0.005) and 0.634 (<italic>SD</italic> = 0.007) for the two reliability levels, closely matching the data generating values of <inline-formula><mml:math id="M43"><mml:msqrt><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>6</mml:mn></mml:mrow></mml:msqrt></mml:math></inline-formula> and <inline-formula><mml:math id="M44"><mml:msqrt><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msqrt></mml:math></inline-formula>, respectively. Recall that, for the non-invariant loadings, only the mean and variance of the random effects are estimated as parameters. In case of high reliability, on average across simulated data sets (and the two non-invariant loadings), the estimated mean of the loadings was 0.775 (<italic>SD</italic> = 0.021), with a variance of 0.100 (<italic>SD</italic> = 0.012). For the low reliability conditions, the estimated mean was 0.634 (<italic>SD</italic> = 0.021) with the same variance of 0.100 (<italic>SD</italic> = 0.013). This closely matches the data generating values for the random loadings&#x00027; distribution, with a mean of <inline-formula><mml:math id="M45"><mml:msqrt><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>6</mml:mn></mml:mrow></mml:msqrt></mml:math></inline-formula> or <inline-formula><mml:math id="M46"><mml:msqrt><mml:mrow><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msqrt></mml:math></inline-formula> and a variance of 0.1. No large effects were found for the other manipulated factors.</p>
<p>For the non-invariant loadings, we also assessed the accuracy of the group-specific loading estimates derived from the random effects. To this end, we computed the Root Mean Squared Error (RMSE):</p>
<disp-formula id="E18"><label>(18)</label><mml:math id="M47"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msup><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>J</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mi>J</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where &#x003BB;<sub><italic>jg</italic></sub> is the true group-specific loading of item <italic>j</italic>, and <inline-formula><mml:math id="M48"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the corresponding estimate. Only non-invariant loadings are included in this computation. On average across all data sets, <italic>RMSE</italic><sub>&#x003BB;</sub> amounted to 0.079 (<italic>SD</italic> = 0.041). It was mainly influenced by group sizes and whether within-group samples were fixed or random. In particular, for fixed within-group samples, the largest average <italic>RMSE</italic><sub>&#x003BB;</sub> was 0.080 when <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 25 for all groups, and the smallest average was 0.019 when <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 200 for all groups. For random within-group samples, the largest average <italic>RMSE</italic><sub>&#x003BB;</sub> was 0.162 when <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 25 for all groups, decreasing to 0.064 when <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 200 for all groups.</p>
<p>For the invariant unique variances, the mean parameter values were 0.395 (<italic>SD</italic> = 0.005) and 0.593 (<italic>SD</italic> = 0.008) for the two reliability levels, closely matching the data generating values of 0.4 and 0.6, respectively. For the non-invariant, random unique variances, the estimated means were, on average, equal to &#x02212;0.968 and &#x02212;0.563 (<italic>SD</italic> = 0.025) on the log scale, closely matching the data generating values of &#x02212;0.948 and &#x02212;0.542. Again, no large effects were found for the other manipulated factors. We also evaluated the group-specific estimates derived from the random effects, with a similar RMSE as for the non-invariant loadings. Across all data sets, <italic>RMSE</italic><sub>&#x003B8;</sub> amounted to 0.073 (<italic>SD</italic> = 0.024) on average, mainly affected by group sizes and whether within-group samples were fixed or random: In fixed conditions, the average <italic>RMSE</italic><sub>&#x003B8;</sub> was 0.089 with <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 25 for all groups, and 0.026 with <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 200 for all groups. In random conditions, <italic>RMSE</italic><sub>&#x003B8;</sub> was on average 0.113 when <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 25 for all groups and 0.058 when <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 200 for all groups.</p>
<p>To conclude, the invariant measurement parameters and the random effects for the non-invariant ones were recovered very well. As expected, for the non-invariant parameters, the group-specific estimates derived from the random effects were biased for smaller groups, especially in case of random within-group samples, indicating the shrinkage of the group-specific estimates toward the mean.</p></sec>
<sec>
<title>Sensitivity to local maxima</title>
<p>To check how often (Step 3 of) MixML-SEM converged to a local maximum, we compared the final log-likelihood to a &#x0201C;proxy&#x0201D; of the global maximum likelihood solution. This proxy was obtained by starting Step 3 with the true clustering instead of a random clustering (Perez Alonso et al., <xref ref-type="bibr" rid="B44">2024</xref>). When the final log-likelihood (i.e., the highest one resulting from the 50 random starts) was more than 0.001 smaller than the log-likelihood of the proxy, it was considered a local maximum. By this definition, MixML-SEM converged to a local maximum for 0.1% of all data sets.</p></sec>
<sec>
<title>Cluster recovery</title>
<p>To evaluate the cluster recovery, we made use of the Adjusted Rand Index (ARI; Hubert and Arabie, <xref ref-type="bibr" rid="B28">1985</xref>), which measures the agreement between two partitions, where 1 indicates complete agreement and 0 the level of agreement one would find for two random partitions. For computing the ARI, we transformed the estimated cluster memberships into a hard partition, by assigning each group to the cluster with the highest classification probability, and then compared it to the true clustering. To get a better feeling of how many of the groups were clustered (in)correctly, we also evaluated the correct clustering rate (%CC), defined as the percentage of correctly clustered groups for each data set. To evaluate whether a worse cluster recovery concurred with a higher classification uncertainty, we inspected the highest classification probability for each group (<inline-formula><mml:math id="M49"><mml:msubsup><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>), where this probability being smaller than 1 would indicate uncertainty. Hence, classification uncertainty was quantified as <inline-formula><mml:math id="M50"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> for each group, which was then averaged across groups per data set. The ARI, %CC, and classification uncertainty are reported in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>MixML-SEM cluster recovery.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#727779;color:#ffffff">
<th valign="top" align="left"><bold>Factor</bold></th>
<th valign="top" align="center"><bold>Level</bold></th>
<th valign="top" align="center"><bold>ARI</bold></th>
<th valign="top" align="center"><bold>%CC</bold></th>
<th valign="top" align="center"><bold>Uncertainty</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="2"><italic>G</italic></td>
<td valign="top" align="left">48</td>
<td valign="top" align="center">0.735 (0.339)</td>
<td valign="top" align="center">0.876 (0.192)</td>
<td valign="top" align="center">0.088 (0.132)</td>
</tr>
 <tr>
<td valign="top" align="left">96</td>
<td valign="top" align="center">0.755 (0.319)</td>
<td valign="top" align="center">0.887 (0.182)</td>
<td valign="top" align="center">0.092 (0.137)</td>
</tr>
 <tr>
<td valign="top" align="left" rowspan="2"><italic>K</italic></td>
<td valign="top" align="left">2</td>
<td valign="top" align="center">0.805 (0.280)</td>
<td valign="top" align="center">0.937 (0.104)</td>
<td valign="top" align="center">0.053 (0.075)</td>
</tr>
 <tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">0.685 (0.363)</td>
<td valign="top" align="center">0.826 (0.230)</td>
<td valign="top" align="center">0.127 (0.167)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Large <italic>N</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="left">100</td>
<td valign="top" align="center">0.714 (0.345)</td>
<td valign="top" align="center">0.864 (0.200)</td>
<td valign="top" align="center">0.098 (0.142)</td>
</tr>
 <tr>
<td valign="top" align="left">200</td>
<td valign="top" align="center">0.776 (0.310)</td>
<td valign="top" align="center">0.898 (0.172)</td>
<td valign="top" align="center">0.083 (0.126)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Small <italic>N</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="center">25</td>
<td valign="top" align="center">0.698 (0.358)</td>
<td valign="top" align="center">0.856 (0.206)</td>
<td valign="top" align="center">0.113 (0.148)</td>
</tr>
 <tr>
<td valign="top" align="left">50</td>
<td valign="top" align="center">0.791 (0.291)</td>
<td valign="top" align="center">0.907 (0.162)</td>
<td valign="top" align="center">0.067 (0.115)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="5">Small groups proportion</td>
<td valign="top" align="left">0</td>
<td valign="top" align="center">0.899 (0.186)</td>
<td valign="top" align="center">0.961 (0.083)</td>
<td valign="top" align="center">0.021 (0.033)</td>
</tr>
 <tr>
<td valign="top" align="left">0.25</td>
<td valign="top" align="center">0.839 (0.228)</td>
<td valign="top" align="center">0.938 (0.102)</td>
<td valign="top" align="center">0.052 (0.052)</td>
</tr>
 <tr>
<td valign="top" align="left">0.5</td>
<td valign="top" align="center">0.779 (0.277)</td>
<td valign="top" align="center">0.911 (0.130)</td>
<td valign="top" align="center">0.084 (0.084)</td>
</tr>
 <tr>
<td valign="top" align="left">0.75</td>
<td valign="top" align="center">0.693 (0.345)</td>
<td valign="top" align="center">0.858 (0.194)</td>
<td valign="top" align="center">0.122 (0.139)</td>
</tr>
 <tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">0.512 (0.412)</td>
<td valign="top" align="center">0.738 (0.268)</td>
<td valign="top" align="center">0.171 (0.216)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">&#x003B2;</td>
<td valign="top" align="left">0.2</td>
<td valign="top" align="center">0.571 (0.397)</td>
<td valign="top" align="center">0.785 (0.240)</td>
<td valign="top" align="center">0.175 (0.171)</td>
</tr>
 <tr>
<td valign="top" align="left">0.3</td>
<td valign="top" align="center">0.776 (0.288)</td>
<td valign="top" align="center">0.902 (0.160)</td>
<td valign="top" align="center">0.071 (0.110)</td>
</tr>
 <tr>
<td valign="top" align="left">0.4</td>
<td valign="top" align="center">0.887 (0.181)</td>
<td valign="top" align="center">0.957 (0.080)</td>
<td valign="top" align="center">0.023 (0.030)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Reliability</td>
<td valign="top" align="left">high</td>
<td valign="top" align="center">0.760 (0.321)</td>
<td valign="top" align="center">0.889 (0.183)</td>
<td valign="top" align="center">0.092 (0.135)</td>
</tr>
 <tr>
<td valign="top" align="left">low</td>
<td valign="top" align="center">0.729 (0.337)</td>
<td valign="top" align="center">0.874 (0.191)</td>
<td valign="top" align="center">0.088 (0.134)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Within-group samples</td>
<td valign="top" align="center">fixed</td>
<td valign="top" align="center">0.916 (0.275)</td>
<td valign="top" align="center">0.942 (0.193)</td>
<td valign="top" align="center">0.107 (0.180)</td>
</tr>
 <tr>
<td valign="top" align="left">random</td>
<td valign="top" align="center">0.573 (0.287)</td>
<td valign="top" align="center">0.820 (0.160)</td>
<td valign="top" align="center">0.073 (0.056)</td>
</tr> <tr>
<td valign="top" align="left">Total</td>
<td/>
<td valign="top" align="center">0.745 (0.329)</td>
<td valign="top" align="center">0.881 (0.187)</td>
<td valign="top" align="center">0.090 (0.135)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The average ARI, correct clustering rate (%CC), and classification uncertainty for MixML-SEM per level of each manipulated factor. The standard deviation is shown in brackets.</p>
</table-wrap-foot>
</table-wrap>
<p>On average, across all simulated conditions, the ARI amounted to 0.745 and the correct clustering rate was 88.1% (<xref ref-type="table" rid="T1">Table 1</xref>). To check which main and interaction effects of the manipulated factors significantly influenced the ARI, we performed an analysis of variance (ANOVA) by means of the <italic>aov</italic> function in R. To keep the results comprehensible, we only included two-way interaction effects. The ANOVA results table is provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary material S2</xref>. Firstly, we see that all main effects were significant at the &#x003B1; &#x0003D; 0.01 level, and that the effects of <italic>K</italic>, &#x003B2;, small groups proportion and fixed/random within-group samples each had partial &#x003B7;<sup>2</sup> values larger than 0.10, indicating that each of them accounted for a relatively large proportion of variance in the ARI after accounting for all other effects. From <xref ref-type="table" rid="T1">Table 1</xref>, we see that larger <italic>G</italic>, smaller <italic>K</italic>, larger groups (and a larger proportion thereof), larger &#x003B2; (and, thus, larger differences between clusters), higher reliability, and fixed (rather than random) within-group samples all contributed to a better recovery of the clusters. Secondly, we see that all two-way interaction effects involving &#x003B2; or fixed/random within-group samples were significant and that most interaction effects involving the small groups proportion or <italic>K</italic> were significant. The interaction between these four manipulated factors is thus interesting to inspect further and, given the inclusion of the small groups proportion, including the sample size of the small and large groups is also informative. Therefore, the interaction effect of these six manipulated factors is shown in <xref ref-type="fig" rid="F3">Figure 3</xref> for ARI. Specifically, the combination of group sizes of 200 and 50 showed the best recovery of clusters, while 100 and 25 showed the worst. In case of fewer clusters, fixed within-group samples and a &#x003B2; of 0.3 or 0.4, the performance was less sensitive to the group sizes. According to Steinley (<xref ref-type="bibr" rid="B54">2004</xref>), ARI values &#x0003E;0.80 indicate a good recovery of the clusters. Using this rule-of-thumb, for fixed within-group samples, the cluster recovery was good when &#x003B2; &#x0003D; 0.4, or when &#x003B2; &#x0003D; 0.3 and at least 25% of the groups were large, or when &#x003B2; &#x0003D; 0.2 and at least 50% of the groups were large. For random within-group samples, the cluster recovery was (generally) good when &#x003B2; &#x0003D; 0.4 and at least 75% of the groups were large, or when &#x003B2; &#x0003D; 0.3 and at least 75% of the groups were large with <italic>N</italic><sub><italic>g</italic></sub> &#x0003D; 200.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>The ARI for MixML-SEM. The ARI and associated error bars for MixML-SEM in function of the within-group sample sizes for large and small groups, proportion of small groups, number of clusters, and size of regression parameters. <bold>(Top)</bold> Fixed within-group samples. <bold>(Bottom)</bold> Random within-group samples. Note that the standard errors are too small for the error bars to be clearly visible in the plots, the largest observed standard error was 0.020, corresponding to ARI values ranging from 0.850 to 0.890 for conditions with a sample size of 25 for half of the groups and 100 for the other half, <italic>K</italic> &#x0003D; 4, &#x003B2; &#x0003D; 0.2, and fixed within-group samples. &#x0201C;combN&#x0201D; refers to the combination of large and small groups.</p></caption>
<alt-text>The ARI and associated error bars for MixML-SEM in function of the within-group sample sizes for large and small groups, proportion of small groups, number of clusters, and size of regression parameters. Top: Fixed within-group samples. Bottom: Random within-group samples. The charts compare different sample combinations, indicated by varying colors, showing trends in cluster recovery decreasing as the proportion of small groups increases.</alt-text>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1463790-g0003.tif"/>
</fig>
<p>From <xref ref-type="fig" rid="F3">Figure 3</xref>, it is clear that the cluster recovery was the worst when the proportion of small groups was 1, and that the effect of the small groups proportion was different for fixed (<xref ref-type="fig" rid="F3">Figure 3</xref>, top) than for random (<xref ref-type="fig" rid="F3">Figure 3</xref>, bottom) within-group samples. For fixed within-group samples, when the proportion of small groups was 1, the average ARI was 0.670, but decreasing the proportion from 1 to 0.75 already resulted in a remarkable improvement in cluster recovery, with an average ARI of 0.919. We get a better picture of what this implies in terms of correctly clustered groups by linking this to the %CC. Specifically, the %CC was 77.3%, when all groups were small and 94.2% when 75% of the groups were small. For random within-group samples, the average ARI was only 0.354 when all groups were small, which still corresponds to 70.3% of the groups being clustered correctly. Decreasing the proportion from 1 to 0.75 improved the ARI to 0.467 and the %CC to 77.5%, which is a less dramatic improvement than for the fixed within-group samples. Indeed, in the bottom panel of <xref ref-type="fig" rid="F3">Figure 3</xref>, we see a more gradual improvement when the proportion of small groups decreases.</p>
<p>Note that, in the hard partition, it can occur that all (or most) groups are clustered into one cluster, which results in a very low ARI. In our simulations, all groups ended up in one cluster for 1,736 data sets. All of these cases occurred in fixed within-group samples, 1,348 occurred in case of four clusters, 1,547 occurred when the proportion of small groups was 1, and 1,374 occurred when this proportion was combined with &#x003B2; &#x0003D; 0.2. For the remaining 3,253 data sets with a small groups proportion of 1 in fixed within-group samples (where the groups did not end up in one cluster), the average ARI was 0.988, which indicated that the much lower ARI for a small groups proportion of 1 (as opposed to 0.75) is largely explained by the one-cluster solutions. This one-cluster issue may be explained by a high classification uncertainty. Throughout the iterative estimation process of Step 3, high classification uncertainty results in more similar cluster-specific regression coefficients, because then all groups&#x02014;to some extent&#x02014;affect their estimation, which is based on a weighted sum of the group-specific factor covariances with the cluster memberships serving as the weights (Perez Alonso et al., <xref ref-type="bibr" rid="B44">2024</xref>). This may re-enforce the uncertainty in the next iteration. As such, it may happen that the cluster-specific regression coefficients become nearly identical, eventually resulted in one cluster after hard partitioning.</p>
<p>We also checked whether a worse cluster recovery (i.e., lower ARI and %CC) concurred with a higher classification uncertainty. From <xref ref-type="table" rid="T1">Table 1</xref>, we see that the main effects of the following factors on classification uncertainty were opposite to their main effects on ARI and CC%: <italic>G</italic>, reliability, and within-group samples. Note that the effects of <italic>G</italic> and reliability varied depending on the within-group samples being fixed or random. For fixed within-group samples, increasing <italic>G</italic> slightly reduced classification uncertainty (from 0.108 at <italic>G</italic> &#x0003D; 48 to 0.106 at <italic>G</italic> &#x0003D; 96), which concurred with a slight increase in ARI (from 0.915 to 0.917) and %CC (from 0.942 to 0.943). Classification uncertainty remained the same across the two reliability levels (0.107), as did ARI (0.916) and %CC (0.942). In contrast, for random within-group samples, classification uncertainty was lower in general, but increased with both larger <italic>G</italic> (from 0.069 to 0.077) and higher reliability (from 0.069 to 0.078), whereas both larger <italic>G</italic> and higher reliability led to a better cluster recovery (i.e., a better ARI and %CC). A possible explanation is that, in random within-group samples, additional differences were introduced across groups due to sampling fluctuations, making the groups appear more separated (even within clusters), which can lead to lower classification uncertainty, even when it leads to groups being misclassified at the same time. Conversely, a larger <italic>G</italic> and higher reliability help to recover the clustering, but the sampling fluctuations still lead to (slightly more) classification uncertainty. The other factors showed main effects that were consistent with expectations, with a lower classification uncertainty concurring with a better cluster recovery. Smaller <italic>K</italic>, larger groups, and larger &#x003B2; all contributed directly to a lower classification uncertainty, since less clusters imply less cluster memberships to estimate, larger groups lower the sampling fluctuations and larger &#x003B2; implies larger between-cluster differences that stand out over sampling fluctuations. In <xref ref-type="supplementary-material" rid="SM1">Supplementary material S3</xref>, <xref ref-type="fig" rid="F1">Figure 1</xref>, we depicted the same interaction effect as the one we explored for the ARI. It clearly shows that classification uncertainty was generally higher for fixed within-group samples. Specifically, for fixed within-group samples, the classification uncertainty was the highest when the proportion of small groups was 1. Decreasing the proportion of small groups to 0.75 not only improved cluster recovery (as mentioned above), but also lowered the mean uncertainty from 0.252 to 0.143. Since the classification uncertainty was generally lower in random than fixed within-group samples, it does not explain poor cluster recovery in these conditions. For random within-group samples, the poor cluster recovery is instead explained by sampling fluctuations and the fact that sampling fluctuations tend to be larger especially for smaller groups, which means that the observed data are less representative of the larger population. Therefore, a larger group size is required for better cluster recovery in random within-group samples.</p></sec>
<sec>
<title>Regression parameter recovery</title>
<p>To evaluate the recovery of the regression parameters, we computed the Root Mean Squared Error (RMSE) for each regression parameter (i.e., for &#x003B2;<sub>1</sub>, &#x003B2;<sub>2</sub>, &#x003B2;<sub>3</sub>, and &#x003B2;<sub>4</sub>) separately:</p>
<disp-formula id="E19"><label>(19)</label><mml:math id="M51"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where &#x003B2;<sub><italic>k</italic></sub> and <inline-formula><mml:math id="M52"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are the true and estimated values of the regression coefficient in cluster <italic>k</italic>, respectively. The main effects of the manipulated factors on each <italic>RMSE</italic><sub>&#x003B2;</sub> are summarized in <xref ref-type="table" rid="T2">Table 2</xref>. On average, <italic>RMSE</italic><sub>&#x003B2;</sub> was 0.033, 0.030, 0.028, and 0.040 for &#x003B2;<sub>1</sub>, &#x003B2;<sub>2</sub>, &#x003B2;<sub>3</sub>, and &#x003B2;<sub>4</sub>, respectively. Note that differences in &#x003B2;<sub>1</sub> and &#x003B2;<sub>2</sub> were manipulated across all conditions while differences in &#x003B2;<sub>3</sub> and &#x003B2;<sub>4</sub> were only manipulated when <italic>K</italic> &#x0003D; 4. When <italic>K</italic> &#x0003D; 2, the average <italic>RMSE</italic><sub>&#x003B2;</sub> was 0.022, 0.021, 0.017, and 0.028 for &#x003B2;<sub>1</sub>, &#x003B2;<sub>2</sub>, &#x003B2;<sub>3</sub>, and &#x003B2;<sub>4</sub>, respectively; when <italic>K</italic> &#x0003D; 4, the average <italic>RMSE</italic><sub>&#x003B2;</sub> was 0.044, 0.040, 0.039, 0.052 for these regression coefficients respectively. Thus, the parameter recovery of &#x003B2;<sub>1</sub> and &#x003B2;<sub>4</sub> was generally worse than that of &#x003B2;<sub>2</sub> and &#x003B2;<sub>3</sub>. This is consistent with previous findings that the further away the parameters are from the exogenous latent variables (i.e., <italic>F</italic>1 and <italic>F</italic>2), the worse their recovery (Devlieger and Rosseel, <xref ref-type="bibr" rid="B18">2017</xref>; Guenole and Brown, <xref ref-type="bibr" rid="B23">2014</xref>; Perez Alonso et al., <xref ref-type="bibr" rid="B44">2024</xref>). In our model, &#x003B2;<sub>4</sub> is not directly connected to any of the exogenous factors and the estimation of &#x003B2;<sub>1</sub> relies on that of &#x003B2;4 since they both pertain to regression effects on the same variable (<italic>F</italic>4). This may explain why &#x003B2;<sub>1</sub> and &#x003B2;<sub>4</sub> were less accurately recovered than &#x003B2;<sub>2</sub> and &#x003B2;<sub>3</sub>.</p><table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>MixML-SEM regression parameter recovery: <italic>RMSE</italic><sub>&#x003B2;</sub>.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#727779;color:#ffffff">
<th valign="top" align="left"><bold>Factor</bold></th>
<th valign="top" align="center"><bold>Level</bold></th>
<th valign="top" align="center"><bold>&#x003B2;<sub>1</sub></bold></th>
<th valign="top" align="center"><bold>&#x003B2;<sub>2</sub></bold></th>
<th valign="top" align="center"><bold>&#x003B2;<sub>3</sub></bold></th>
<th valign="top" align="center"><bold>&#x003B2;<sub>4</sub></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="2"><italic>G</italic></td>
<td valign="top" align="left">48</td>
<td valign="top" align="center">0.038 (0.055)</td>
<td valign="top" align="center">0.035 (0.050)</td>
<td valign="top" align="center">0.032 (0.049)</td>
<td valign="top" align="center">0.045 (0.051)</td>
</tr>
 <tr>
<td valign="top" align="center">96</td>
<td valign="top" align="center">0.028 (0.040)</td>
<td valign="top" align="center">0.026 (0.037)</td>
<td valign="top" align="center">0.024 (0.035)</td>
<td valign="top" align="center">0.035 (0.038)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2"><italic>K</italic></td>
<td valign="top" align="left">2</td>
<td valign="top" align="center">0.022 (0.030)</td>
<td valign="top" align="center">0.021 (0.028)</td>
<td valign="top" align="center">0.017 (0.024)</td>
<td valign="top" align="center">0.028 (0.026)</td>
</tr>
 <tr>
<td valign="top" align="center">4</td>
<td valign="top" align="center">0.044 (0.059)</td>
<td valign="top" align="center">0.040 (0.054)</td>
<td valign="top" align="center">0.039 (0.053)</td>
<td valign="top" align="center">0.052 (0.056)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Large <italic>N</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="left">100</td>
<td valign="top" align="center">0.038 (0.052)</td>
<td valign="top" align="center">0.035 (0.047)</td>
<td valign="top" align="center">0.032 (0.044)</td>
<td valign="top" align="center">0.044 (0.048)</td>
</tr>
 <tr>
<td valign="top" align="center">200</td>
<td valign="top" align="center">0.028 (0.043)</td>
<td valign="top" align="center">0.026 (0.041)</td>
<td valign="top" align="center">0.024 (0.041)</td>
<td valign="top" align="center">0.036 (0.042)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Small <italic>N</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="left">25</td>
<td valign="top" align="center">0.040 (0.059)</td>
<td valign="top" align="center">0.037 (0.054)</td>
<td valign="top" align="center">0.033 (0.052)</td>
<td valign="top" align="center">0.045 (0.056)</td>
</tr>
 <tr>
<td valign="top" align="center">50</td>
<td valign="top" align="center">0.026 (0.031)</td>
<td valign="top" align="center">0.024 (0.030)</td>
<td valign="top" align="center">0.023 (0.030)</td>
<td valign="top" align="center">0.034 (0.031)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="5">Small groups proportion</td>
<td valign="top" align="left">0</td>
<td valign="top" align="center">0.016 (0.017)</td>
<td valign="top" align="center">0.014 (0.017)</td>
<td valign="top" align="center">0.014 (0.016)</td>
<td valign="top" align="center">0.026 (0.017)</td>
</tr>
 <tr>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">0.018 (0.022)</td>
<td valign="top" align="center">0.016 (0.020)</td>
<td valign="top" align="center">0.016 (0.020)</td>
<td valign="top" align="center">0.028 (0.020)</td>
</tr>
 <tr>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.024 (0.030)</td>
<td valign="top" align="center">0.021 (0.027)</td>
<td valign="top" align="center">0.021 (0.027)</td>
<td valign="top" align="center">0.032 (0.030)</td>
</tr>
 <tr>
<td valign="top" align="center">0.75</td>
<td valign="top" align="center">0.035 (0.045)</td>
<td valign="top" align="center">0.032 (0.041)</td>
<td valign="top" align="center">0.030 (0.040)</td>
<td valign="top" align="center">0.042 (0.045)</td>
</tr>
 <tr>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.073 (0.075)</td>
<td valign="top" align="center">0.068 (0.069)</td>
<td valign="top" align="center">0.059 (0.070)</td>
<td valign="top" align="center">0.071 (0.073)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">&#x003B2;</td>
<td valign="top" align="left">0.2</td>
<td valign="top" align="center">0.045 (0.058)</td>
<td valign="top" align="center">0.043 (0.054)</td>
<td valign="top" align="center">0.037 (0.054)</td>
<td valign="top" align="center">0.043 (0.055)</td>
</tr>
 <tr>
<td valign="top" align="center">0.3</td>
<td valign="top" align="center">0.030 (0.047)</td>
<td valign="top" align="center">0.027 (0.044)</td>
<td valign="top" align="center">0.026 (0.041)</td>
<td valign="top" align="center">0.038 (0.045)</td>
</tr>
 <tr>
<td valign="top" align="center">0.4</td>
<td valign="top" align="center">0.024 (0.033)</td>
<td valign="top" align="center">0.020 (0.028)</td>
<td valign="top" align="center">0.020 (0.027)</td>
<td valign="top" align="center">0.038 (0.032)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Reliability</td>
<td valign="top" align="center">high</td>
<td valign="top" align="center">0.030 (0.041)</td>
<td valign="top" align="center">0.027 (0.039)</td>
<td valign="top" align="center">0.025 (0.038)</td>
<td valign="top" align="center">0.036 (0.039)</td>
</tr>
 <tr>
<td valign="top" align="center">low</td>
<td valign="top" align="center">0.037 (0.054)</td>
<td valign="top" align="center">0.034 (0.049)</td>
<td valign="top" align="center">0.031 (0.047)</td>
<td valign="top" align="center">0.043 (0.051)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Within-group samples</td>
<td valign="top" align="center">fixed</td>
<td valign="top" align="center">0.017 (0.026)</td>
<td valign="top" align="center">0.015 (0.026)</td>
<td valign="top" align="center">0.012 (0.023)</td>
<td valign="top" align="center">0.026 (0.023)</td>
</tr>
 <tr>
<td valign="top" align="center">random</td>
<td valign="top" align="center">0.049 (0.058)</td>
<td valign="top" align="center">0.046 (0.053)</td>
<td valign="top" align="center">0.044 (0.051)</td>
<td valign="top" align="center">0.054 (0.056)</td>
</tr> <tr>
<td valign="top" align="left">Total</td>
<td/>
<td valign="top" align="center">0.033 (0.048)</td>
<td valign="top" align="center">0.030 (0.044)</td>
<td valign="top" align="center">0.028 (0.043)</td>
<td valign="top" align="center">0.040 (0.045)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The average Root Mean Squared Error (RMSE) for MixML-SEM per level of each manipulated factor for each of the four estimated regression parameters. The standard deviation is shown in brackets.</p>
</table-wrap-foot>
</table-wrap>
<p><xref ref-type="fig" rid="F4">Figure 4</xref> displays the same interaction effect for <italic>RME</italic><sub>&#x003B2;1</sub> as the one previously explored for the ARI. Specifically, the largest <italic>RMSE</italic><sub>&#x003B2;1</sub> values were found when the proportion of small groups was 1, small group sample size was 25, <italic>K</italic> &#x0003D; 4, and &#x003B2; &#x0003D; 0.2, in random within-group samples. The interaction plots for <italic>RMSE</italic><sub>&#x003B2;2</sub> to <italic>RMSE</italic><sub>&#x003B2;4</sub>, showing similar patterns, are provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary material S3</xref>, <xref ref-type="fig" rid="F2">Figures 2</xref>&#x02013;<xref ref-type="fig" rid="F4">4</xref>. The worse recovery of the regression coefficients in these conditions is explained by the worse cluster recovery in these conditions, since the estimation of the cluster-specific regression coefficients is directly affected by groups being clustered incorrectly and/or clustered with more classification uncertainty. For fixed within-group samples, the average <italic>RMSE</italic><sub>&#x003B2;</sub> values were 0.045, 0.043, 0.031 and 0.042 when all groups were small, which dropped to 0.018, 0.015, 0.012, and 0.026, respectively, when the proportion of small groups decreased from 1 to 0.75. For random within-group samples, <italic>RMSE</italic><sub>&#x003B2;</sub> was on average 0.101, 0.093, 0.088, and 0.102 when all groups were small, which dropped to 0.053, 0.049, 0.047, and 0.057, when the small groups proportion was 0.75. Other than that, we see that larger <italic>K</italic>, smaller &#x003B2; values, and smaller group sizes (i.e., the large group sample size being 100 and/or the small group sample size being 25), led to higher <italic>RMSE</italic><sub>&#x003B2;</sub> values.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p><italic>The RMSE</italic><sub>&#x003B2;</sub> for MixML-SEM. <italic>The RMSE</italic><sub>&#x003B2;1</sub> for MixML-SEM in function of the within-group sample sizes for large and small groups, proportion of small groups, number of clusters, and size of regression parameters. <bold>(Top)</bold> Fixed within-group samples. <bold>(Bottom)</bold> Random within-group samples. &#x0201C;combN&#x0201D; refers to the combination of large and small groups.</p></caption>
<alt-text>The RMSE (for Beta1) for MixML-SEM in function of the within-group sample sizes for large and small groups, proportion of small groups, number of clusters, and size of regression parameters. Top: Fixed within-group samples. Bottom: Random within-group samples. Lines represent different sample combinations: combN, 100-25, 100-50, 200-25, and 200-50. RMSE generally increases with a higher proportion of small groups.</alt-text>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1463790-g0004.tif"/>
</fig></sec></sec>
<sec>
<title>Comparison to ML-SEM</title>
<p>ML-SEM estimates the MM and SM at the same time, with random effects for the four regression parameters, in addition to the random effects for the non-invariant measurement parameters. In comparison to MixML-SEM, the recovery of the measurement parameters was very similar, so we focus on the recovery of the regression coefficients. For each regression parameter, we computed the <italic>RMSE</italic><sub>&#x003B2;</sub> as follows:</p>
<disp-formula id="E20"><label>(20)</label><mml:math id="M53"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where we compared the group-specific estimates (<inline-formula><mml:math id="M54"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>&#x003B2;</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>) to the cluster-specific true values (&#x003B2;<sub><italic>k</italic></sub>) for the cluster the group truly belongs to.</p>
<p>On average, the <italic>RMSE</italic><sub>&#x003B2;</sub> values were 0.098, 0.096, 0.055, and 0.059 for &#x003B2;<sub>1</sub>, &#x003B2;<sub>2</sub>, &#x003B2;<sub>3</sub>, and &#x003B2;<sub>4</sub>, respectively (see <xref ref-type="table" rid="T3">Table 3</xref>). In comparison to MixML-SEM, we thus observed larger <italic>RMSE</italic><sub>&#x003B2;</sub> values, especially for &#x003B2;<sub>1</sub> and &#x003B2;<sub>2</sub>. This is due to the random effects being normally distributed so that the derived group-specific estimates are biased toward the overall mean parameter value. Indeed, when looking at the estimated parameter values, for example, for a data set with &#x003B2;<sub>1, <italic>k</italic> &#x0003D; 1</sub> &#x0003D; 0 and &#x003B2;<sub>1, <italic>k</italic> &#x0003D; 2</sub> &#x0003D; 0.3, the mean of the regression coefficients across the groups belonging to cluster one (<italic>k</italic> &#x0003D; 1) was 0.138 (<italic>SD</italic> &#x0003D; 0.010), and for cluster two (<italic>k</italic> &#x0003D; 2), it was 0.171 (<italic>SD</italic> &#x0003D; 0.011). The group-specific estimates were thus close to the mean true value of &#x003B2;<sub>1</sub> across clusters, which was 0.15.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>ML-SEM regression parameter recovery: <italic>RMSE</italic><sub>&#x003B2;</sub>.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#727779;color:#ffffff">
<th valign="top" align="left"><bold>Factor</bold></th>
<th valign="top" align="center"><bold>Level</bold></th>
<th valign="top" align="center"><bold>&#x003B2;<sub>1</sub></bold></th>
<th valign="top" align="center"><bold>&#x003B2;<sub>2</sub></bold></th>
<th valign="top" align="center"><bold>&#x003B2;<sub>3</sub></bold></th>
<th valign="top" align="center"><bold>&#x003B2;<sub>4</sub></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="2"><italic>G</italic></td>
<td valign="top" align="left">48</td>
<td valign="top" align="center">0.098 (0.027)</td>
<td valign="top" align="center">0.096 (0.026)</td>
<td valign="top" align="center">0.057 (0.042)</td>
<td valign="top" align="center">0.060 (0.042)</td>
</tr>
 <tr>
<td valign="top" align="left">96</td>
<td valign="top" align="center">0.098 (0.026)</td>
<td valign="top" align="center">0.095 (0.026)</td>
<td valign="top" align="center">0.053 (0.043)</td>
<td valign="top" align="center">0.057 (0.044)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2"><italic>K</italic></td>
<td valign="top" align="left">2</td>
<td valign="top" align="center">0.098 (0.028)</td>
<td valign="top" align="center">0.098 (0.027)</td>
<td valign="top" align="center">0.018 (0.016)</td>
<td valign="top" align="center">0.021 (0.016)</td>
</tr>
 <tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">0.098 (0.025)</td>
<td valign="top" align="center">0.093 (0.024)</td>
<td valign="top" align="center">0.092 (0.024)</td>
<td valign="top" align="center">0.097 (0.025)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Large <italic>N</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="left">100</td>
<td valign="top" align="center">0.104 (0.024)</td>
<td valign="top" align="center">0.102 (0.023)</td>
<td valign="top" align="center">0.059 (0.044)</td>
<td valign="top" align="center">0.063 (0.045)</td>
</tr>
 <tr>
<td valign="top" align="left">200</td>
<td valign="top" align="center">0.092 (0.028)</td>
<td valign="top" align="center">0.089 (0.028)</td>
<td valign="top" align="center">0.051 (0.041)</td>
<td valign="top" align="center">0.055 (0.041)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Small <italic>N</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="left">25</td>
<td valign="top" align="center">0.102 (0.029)</td>
<td valign="top" align="center">0.100 (0.029)</td>
<td valign="top" align="center">0.057 (0.045)</td>
<td valign="top" align="center">0.061 (0.045)</td>
</tr>
 <tr>
<td valign="top" align="left">50</td>
<td valign="top" align="center">0.094 (0.023)</td>
<td valign="top" align="center">0.091 (0.022)</td>
<td valign="top" align="center">0.052 (0.040)</td>
<td valign="top" align="center">0.057 (0.041)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="5">Small groups proportion</td>
<td valign="top" align="left">0</td>
<td valign="top" align="center">0.076 (0.019)</td>
<td valign="top" align="center">0.072 (0.019)</td>
<td valign="top" align="center">0.043 (0.032)</td>
<td valign="top" align="center">0.048 (0.033)</td>
</tr>
 <tr>
<td valign="top" align="left">0.25</td>
<td valign="top" align="center">0.087 (0.017)</td>
<td valign="top" align="center">0.085 (0.016)</td>
<td valign="top" align="center">0.048 (0.037)</td>
<td valign="top" align="center">0.053 (0.037)</td>
</tr>
 <tr>
<td valign="top" align="left">0.5</td>
<td valign="top" align="center">0.098 (0.019)</td>
<td valign="top" align="center">0.095 (0.017)</td>
<td valign="top" align="center">0.054 (0.041)</td>
<td valign="top" align="center">0.058 (0.042)</td>
</tr>
 <tr>
<td valign="top" align="left">0.75</td>
<td valign="top" align="center">0.108 (0.021)</td>
<td valign="top" align="center">0.106 (0.020)</td>
<td valign="top" align="center">0.060 (0.045)</td>
<td valign="top" align="center">0.063 (0.046)</td>
</tr>
 <tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">0.121 (0.028)</td>
<td valign="top" align="center">0.120 (0.027)</td>
<td valign="top" align="center">0.070 (0.051)</td>
<td valign="top" align="center">0.073 (0.050)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">&#x003B2;</td>
<td valign="top" align="left">0.2</td>
<td valign="top" align="center">0.081 (0.010)</td>
<td valign="top" align="center">0.081 (0.011)</td>
<td valign="top" align="center">0.047 (0.033)</td>
<td valign="top" align="center">0.048 (0.032)</td>
</tr>
 <tr>
<td valign="top" align="left">0.3</td>
<td valign="top" align="center">0.102 (0.022)</td>
<td valign="top" align="center">0.100 (0.022)</td>
<td valign="top" align="center">0.057 (0.044)</td>
<td valign="top" align="center">0.060 (0.043)</td>
</tr>
 <tr>
<td valign="top" align="left">0.4</td>
<td valign="top" align="center">0.111 (0.033)</td>
<td valign="top" align="center">0.106 (0.033)</td>
<td valign="top" align="center">0.060 (0.049)</td>
<td valign="top" align="center">0.068 (0.050)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Reliability</td>
<td valign="top" align="left">high</td>
<td valign="top" align="center">0.095 (0.026)</td>
<td valign="top" align="center">0.092 (0.025)</td>
<td valign="top" align="center">0.053 (0.042)</td>
<td valign="top" align="center">0.057 (0.042)</td>
</tr>
 <tr>
<td valign="top" align="left">low</td>
<td valign="top" align="center">0.101 (0.027)</td>
<td valign="top" align="center">0.099 (0.026)</td>
<td valign="top" align="center">0.057 (0.044)</td>
<td valign="top" align="center">0.061 (0.044)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Within-group samples</td>
<td valign="top" align="left">fixed</td>
<td valign="top" align="center">0.096 (0.029)</td>
<td valign="top" align="center">0.092 (0.028)</td>
<td valign="top" align="center">0.047 (0.046)</td>
<td valign="top" align="center">0.053 (0.046)</td>
</tr>
 <tr>
<td valign="top" align="left">random</td>
<td valign="top" align="center">0.101 (0.024)</td>
<td valign="top" align="center">0.099 (0.023)</td>
<td valign="top" align="center">0.063 (0.038)</td>
<td valign="top" align="center">0.065 (0.039)</td>
</tr> <tr>
<td valign="top" align="left">Total</td>
<td/>
<td valign="top" align="center">0.098 (0.027)</td>
<td valign="top" align="center">0.096 (0.026)</td>
<td valign="top" align="center">0.055 (0.043)</td>
<td valign="top" align="center">0.059 (0.043)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The average RMSE for ML-SEM per level of each manipulated factor for each of the four estimated regression parameters. The standard deviation is shown in brackets.</p>
</table-wrap-foot>
</table-wrap>
<p>We see that the <italic>RMSE</italic><sub>&#x003B2;</sub> values for ML-SEM were larger with smaller <italic>G</italic>, smaller groups, lower reliability, and random within-group samples (<xref ref-type="table" rid="T3">Table 3</xref>), as was also the case for MixML-SEM. The only differences were the effects of factors &#x003B2; and <italic>K</italic>. Specifically, <italic>RMSE</italic><sub>&#x003B2;</sub> was larger for larger &#x003B2; due to the shrinkage effect toward the overall mean parameter. Since the latter is a weighted average of &#x003B2; and zero, it deviates more from the true value of either &#x003B2; or 0 in case of a larger &#x003B2; value. Regarding the number of clusters, for fixed within-group samples, <italic>RMSE</italic><sub>&#x003B2;<sub>1</sub></sub> increased with more clusters, while <italic>RMSE</italic><sub>&#x003B2;<sub>2</sub></sub> decreased. For &#x003B2;<sub>2</sub>, in case of more clusters, the group-specific estimates gravitate toward a larger overall mean parameter, resulting in a smaller deviation between the estimated and true value for most groups and thus in a smaller RMSE value. For example, when <italic>G</italic> = 48, <italic>K</italic> &#x0003D; 2, and &#x003B2; &#x0003D; 0.4, the overall mean parameter for &#x003B2;<sub>2</sub> is the weighted average of &#x003B2;<sub>2, <italic>k</italic> &#x0003D; 1</sub> &#x0003D; 0.4 and &#x003B2;<sub>2, <italic>k</italic> &#x0003D; 2</sub> &#x0003D; 0, where each value applies to 24 groups. When <italic>K</italic> &#x0003D; 4, the overall mean parameter is larger, because, in that case, 36 groups (in Clusters 1, 3, and 4) have a &#x003B2;<sub>2</sub> of 0.4, whereas only 12 groups (in Cluster 2) have a &#x003B2;<sub>2</sub> of 0, resulting in smaller differences between estimates and true values for the former 36 groups. For &#x003B2;<sub>1</sub>, the overall mean parameter is the same as for &#x003B2;<sub>2</sub>, but its estimation is also influenced by that of &#x003B2;<sub>4</sub>, which may explain why the RMSE was slightly larger in case of more clusters. For random within-group samples, both <italic>RMSE</italic><sub>&#x003B2;<sub>1</sub></sub>and <italic>RMSE</italic><sub>&#x003B2;<sub>2</sub></sub> decreased with more clusters, likely influenced by the sampling variability and the fact that the within-cluster sample size is smaller when <italic>K</italic> is larger. For &#x003B2;<sub>3</sub> and &#x003B2;<sub>4</sub>, the RMSE values were larger when <italic>K</italic> &#x0003D; 4 in both fixed and random within-group samples, because they were only different across clusters in these conditions.</p></sec></sec></sec>
<sec>
<title>Simulation study 2</title>
<p>In Simulation Study 2, we evaluated MixML-SEM in terms of model selection. Specifically, we ran (Step 3 of) MixML-SEM with one to six clusters for the first 10 replications of each cell of the design of Simulation Study 1, excluding conditions with &#x0201C;high&#x0201D; reliability (i.e., for a total of 4,800 data sets). Then, the number of clusters was selected based on BIC<sub><italic>G</italic></sub>, AIC, and CHull.</p>
<sec>
<title>Results</title>
<p>For each data set, we verified whether the correct number of clusters was selected for MixML-SEM. BIC<sub><italic>G</italic></sub> correctly selected the number of clusters for 64.8% of the data sets whereas AIC did so for 69.1% and CHull for 71%. Note that for 5.0% of the data sets, CHull did not provide a solution and they were classified as incorrect. This occurred because the &#x0201C;observed&#x0201D; single-indicator log<italic>L</italic> did not increase monotonically with more clusters, which is attributed to the fact that, during Step 3 of the model estimation, we maximized log<italic>L</italic><sub>&#x003B7;</sub> (<xref ref-type="disp-formula" rid="E13">Equation 13</xref>) rather than the &#x0201C;observed&#x0201D; single-indicator log<italic>L</italic> (<xref ref-type="disp-formula" rid="E15">Equation 15</xref>). This causes the CHull procedure to exclude the concerned models from the selection. In practice, visual inspection of the CHull plot would alleviate the problem, since a clear elbow may still be present.</p>
<p>Both BIC<sub><italic>G</italic></sub> and AIC had a tendency to underestimate the number of clusters. Specifically, BIC<sub><italic>G</italic></sub> underestimated the number of clusters for 35.1% of the data sets and selected one cluster for 26.3%. Similarly, AIC selected too few clusters for 24.4% of the data sets and selected only one cluster for 17.6%. Note that these selections of one-cluster or too-few-clusters models could not be fully explained by MixML-SEM&#x00027;s tendency to assign all groups to one cluster in specific conditions when using the true <italic>K</italic>, which would make it harder to select the correct <italic>K</italic>. This occurred in only 11.0% and 14.2% of the data sets for which BIC<sub><italic>G</italic></sub> or AIC selected one cluster, respectively; and in only 8.7% and 11.4% of the data sets where BIC<sub><italic>G</italic></sub> or AIC selected too few clusters. Since CHull selects at least two clusters, we also examined the performance of BIC<sub><italic>G</italic></sub> and AIC when only considering two or more clusters, to make the comparison more fair. In this case, the overall accuracy of BIC<sub><italic>G</italic></sub> increased to 74.4%, and that of AIC increased to 79.4%, which are slightly better than CHull.</p>
<p>The main effects of the simulated conditions on the model selection accuracy are given in <xref ref-type="table" rid="T4">Table 4</xref>. For BIC<sub><italic>G</italic></sub> and AIC, larger regression coefficients, fewer clusters, a lower proportion of small groups, and random within-group samples, all contributed to a more accurate model selection. The worse performance for fixed within-group samples can only be partially explained by the occurrence of solutions where all groups were modally assigned to one cluster when using the true <italic>K</italic>, even though this only occurred in case of fixed within-group samples. After excluding the data sets for which one-cluster solutions occurred for fixed within-group samples (177 out of 2,400 data sets), the model selection accuracy increased to 0.630 for BIC<sub><italic>G</italic></sub> (which is still lower than the BIC<sub><italic>G</italic></sub> for random within-group samples), and 0.717 for AIC (which is now higher than the AIC for random within-group samples). Another potential explanation for the better performance in case of random within-group samples is that the tiny differences due to sampling fluctuations counter the tendency of BIC<sub><italic>G</italic></sub> and AIC to select too few clusters. Note that having a few large groups led to a marked increase of the model selection accuracy. Specifically, for BIC<sub><italic>G</italic></sub>, the correct selection rate increased from 28.0% to 56.8% when the proportion of small groups decreased from 1 to 0.75. For AIC, it increased from 39.1% to 66.3%.</p><table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>The percentage of data sets for which the correct number of clusters for MixML-SEM was selected using BIC<sub><italic>G</italic></sub>, AIC and CHull, per level of each manipulated factor.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#727779;color:#ffffff">
<th valign="top" align="left"><bold>Factor</bold></th>
<th valign="top" align="center"><bold>Level</bold></th>
<th valign="top" align="center"><bold>BIC<italic><sub><italic>G</italic></sub></italic></bold></th>
<th valign="top" align="center"><bold>AIC</bold></th>
<th valign="top" align="center"><bold>CHull</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="2"><italic>G</italic></td>
<td valign="top" align="left">48</td>
<td valign="top" align="center">0.591 (0.492)</td>
<td valign="top" align="center">0.658 (0.475)</td>
<td valign="top" align="center">0.691 (0.462)</td>
</tr>
 <tr>
<td valign="top" align="left">96</td>
<td valign="top" align="center">0.705 (0.456)</td>
<td valign="top" align="center">0.724 (0.447)</td>
<td valign="top" align="center">0.729 (0.444)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2"><italic>K</italic></td>
<td valign="top" align="left">2</td>
<td valign="top" align="center">0.783 (0.412)</td>
<td valign="top" align="center">0.801 (0.400)</td>
<td valign="top" align="center">0.781 (0.413)</td>
</tr>
 <tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">0.510 (0.500)</td>
<td valign="top" align="center">0.579 (0.494)</td>
<td valign="top" align="center">0.638 (0.481)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Large <italic>N</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="left">100</td>
<td valign="top" align="center">0.547 (0.498)</td>
<td valign="top" align="center">0.615 (0.487)</td>
<td valign="top" align="center">0.646 (0.478)</td>
</tr>
 <tr>
<td valign="top" align="left">200</td>
<td valign="top" align="center">0.748 (0.434)</td>
<td valign="top" align="center">0.765 (0.424)</td>
<td valign="top" align="center">0.773 (0.419)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Small <italic>N</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="left">25</td>
<td valign="top" align="center">0.602 (0.490)</td>
<td valign="top" align="center">0.653 (0.476)</td>
<td valign="top" align="center">0.699 (0.459)</td>
</tr>
 <tr>
<td valign="top" align="left">50</td>
<td valign="top" align="center">0.689 (0.463)</td>
<td valign="top" align="center">0.725 (0.447)</td>
<td valign="top" align="center">0.720 (0.449)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="5">Small groups proportion</td>
<td valign="top" align="left">0</td>
<td valign="top" align="center">0.846 (0.361)</td>
<td valign="top" align="center">0.809 (0.394)</td>
<td valign="top" align="center">0.756 (0.430)</td>
</tr>
 <tr>
<td valign="top" align="left">0.25</td>
<td valign="top" align="center">0.793 (0.405)</td>
<td valign="top" align="center">0.811 (0.391)</td>
<td valign="top" align="center">0.831 (0.375)</td>
</tr>
 <tr>
<td valign="top" align="left">0.5</td>
<td valign="top" align="center">0.722 (0.448)</td>
<td valign="top" align="center">0.758 (0.429)</td>
<td valign="top" align="center">0.815 (0.389)</td>
</tr>
 <tr>
<td valign="top" align="left">0.75</td>
<td valign="top" align="center">0.568 (0.496)</td>
<td valign="top" align="center">0.663 (0.473)</td>
<td valign="top" align="center">0.708 (0.455)</td>
</tr>
 <tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">0.280 (0.449)</td>
<td valign="top" align="center">0.391 (0.488)</td>
<td valign="top" align="center">0.424 (0.495)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">&#x003B2;</td>
<td valign="top" align="left">0.2</td>
<td valign="top" align="center">0.334 (0.472)</td>
<td valign="top" align="center">0.456 (0.498)</td>
<td valign="top" align="center">0.551 (0.498)</td>
</tr>
 <tr>
<td valign="top" align="left">0.3</td>
<td valign="top" align="center">0.734 (0.442)</td>
<td valign="top" align="center">0.765 (0.424)</td>
<td valign="top" align="center">0.748 (0.435)</td>
</tr>
 <tr>
<td valign="top" align="left">0.4</td>
<td valign="top" align="center">0.904 (0.294)</td>
<td valign="top" align="center">0.871 (0.335)</td>
<td valign="top" align="center">0.846 (0.361)</td>
</tr> <tr>
<td valign="top" align="left">Reliability</td>
<td valign="top" align="left">low</td>
<td valign="top" align="center">0.648 (0.478)</td>
<td valign="top" align="center">0.691 (0.462)</td>
<td valign="top" align="center">0.710 (0.454)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">Within-group samples</td>
<td valign="top" align="left">fixed</td>
<td valign="top" align="center">0.600 (0.490)</td>
<td valign="top" align="center">0.686 (0.464)</td>
<td valign="top" align="center">0.718 (0.450)</td>
</tr>
 <tr>
<td valign="top" align="left">random</td>
<td valign="top" align="center">0.702 (0.457)</td>
<td valign="top" align="center">0.696 (0.460)</td>
<td valign="top" align="center">0.701 (0.458)</td>
</tr> <tr>
<td valign="top" align="left">Total</td>
<td/>
<td valign="top" align="center">0.648 (0.478)</td>
<td valign="top" align="center">0.691 (0.462)</td>
<td valign="top" align="center">0.710 (0.454)</td>
</tr></tbody>
</table>
</table-wrap>
<p>The model selection performance of CHull showed similar trends to that of BIC<sub><italic>G</italic></sub> and AIC, but a difference is that CHull performed slightly better for fixed within-group samples (fixed: 71.8%, random: 70.1%). For random within-group samples, the correct model selection rate increased with a larger proportion of larger groups, while, for fixed within-group samples, the best performance occurred when the proportion of small groups was 0.5 rather than 0. This may be attributed to CHull selecting overly complex models when a more complex model barely resulted in a better model fit, leading to an artificially inflated scree ratio because the denominator approaches zero (Wilderjans et al., <xref ref-type="bibr" rid="B57">2013</xref>). An infinite scree ratio occurred only in fixed within-group samples (144 data sets), which is explained by the group-specific covariances being unaffected by sampling fluctuations. Also, it occurred more frequently with fewer clusters (125 data sets), more groups (81 data sets) and a smaller proportion of small groups (107 data sets). All 144 data sets with an infinite scree ratio selected an incorrect number of clusters. Recall that, in practice, researchers can visually inspect the CHull plot to identify the elbow and avoid an overly complex model being selected based on the scree ratio&#x00027;s alone. As an example, consider the scree ratio&#x00027;s and scree plot (<xref ref-type="fig" rid="F5">Figure 5</xref>) for one of the simulated data sets. This data set contained two clusters, 48 groups, a fixed sample of 200 per group and &#x003B2; &#x0003D; 0.3. CHull suggests three clusters with a ratio of infinity, whereas a very clear elbow is visible for two clusters (the only elbow in the plot), which is captured by the second largest scree ratio (8.292e&#x0002B;12). Given the impracticality of checking the CHull plot for each simulated data set, we examined the second largest scree ratio for all data sets with a maximal scree ratio of infinity and an incorrect model selection. By selecting the model with the second largest scree ratio, the number of clusters was correctly identified for all these data sets. In this way, the model selection accuracy for fixed within-group samples improved from 41.3%, 77.5%, 87.1%, 85.2%, and 67.9% to 41.9%, 78.3%, 88.8%, 89.8% and 90.2% for the proportion of small groups ranging from 1 to 0, respectively, indicating an improvement in model selection accuracy with a larger proportion of larger groups.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>The CHull plot of an example of selecting an overly complex model for MixML-SEM based on the scree ratio&#x00027;s. The data set contained 48 groups, two clusters, balanced, a fixed sample of size 200 per group and &#x003B2; &#x0003D; 0.3.</p></caption>
<alt-text>Line graph titled &#x0201C;LogL per model&#x0201D; showing the relationship between the number of parameters (nr.par) on the x-axis ranging from six hundred twenty-eight to six hundred fifty-three and the log likelihood (LogL) on the y-axis ranging from negative four hundred ninety-one thousand six hundred to negative four hundred ninety thousand four hundred. The line rises steeply at the beginning and then levels off, reaching a plateau.</alt-text>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1463790-g0005.tif"/>
</fig>
</sec>
</sec>
<sec>
<title>Conclusion</title>
<p>We assessed the performance of MixML-SEM and compared it to ML-SEM when the measurement non-invariances were correctly specified. When the true number of clusters was specified (Simulation Study 1), MixML-SEM performed well when the cluster separation was sufficiently large (for example &#x003B2; &#x0003D; 0.3) and/or when more large groups were involved, outperforming ML-SEM in terms of regression parameters recovery. The results suggest that the required group sizes for reliable performance depend on the degree of cluster separation and whether one wants to learn about structural relations for the specific within-group samples (fixed within-group samples) or aims to make inferences about a larger population with the groups (random within-group samples). In more challenging conditions with very small cluster differences (e.g., &#x003B2; &#x0003D; 0.2), a minimum of 50% group sizes of 100 was needed to achieve good performance under the current model setup in case of fixed within-group samples, while only 25% or even 0% of large groups was needed with better cluster separation (e.g., &#x003B2; &#x0003D; 0.3 <italic>or</italic> 0.4). For random within-group samples, larger group sizes (e.g., 200 or larger) and/or a larger proportion of large groups were required. Thus, applied researchers should keep in mind that, when (some) group sizes are smaller than 100, small differences in structural relations may not be captured, especially when they are masked by sampling fluctuations and one wants to draw conclusions about the larger population if the samples are not representative.</p>
<p>Despite difficulties in recovering the clustering when having only small groups combined with a low cluster separation, we observed a notable improvement in the performance of MixML-SEM when more larger groups were included alongside the small groups. This confirms the main advantage of MixML-SEM: combining information from multiple groups within a cluster leads to a better regression parameter (and cluster) recovery.</p>
<p>In contrast, in ML-SEM, the group-specific regression parameter estimates, derived from the random effects, were biased toward the overall mean parameter value. Hence, if researchers compare the group-specific estimates to draw conclusions about differences and similarities in structural relations, the differences are obfuscated due to the shrinkage bias. If they would opt for a mixture clustering based on such biased estimates to make the comparisons, the clustering will not be accurate either. Even when the clustering would be accurately recovered, the comparison of the corresponding cluster-specific regression coefficients would still be affected by the bias.</p>
<p>When it comes to selecting the number of clusters for MixML-SEM (Simulation Study 2), we conclude that BIC<sub><italic>G</italic></sub>, AIC, and CHull have comparable performance. BIC<sub><italic>G</italic></sub> and AIC tend to be more conservative, preferring models with fewer clusters, but they have the advantage over CHull that they can select one cluster. However, violations of distributional assumptions may lead to overselection in case of the BIC (and AIC) (e.g., Bauer, <xref ref-type="bibr" rid="B4">2007</xref>; McNeish and Harring, <xref ref-type="bibr" rid="B36">2017</xref>), making CHull potentially more suitable for empirical data. In CHull, an artificially inflated scree ratio may lead to overselection as well, but we can consider the two best models and/or visually inspect the CHull plot to confirm the presence of an elbow. Therefore, in empirical practice, we recommend combining BIC<sub><italic>G</italic></sub>, AIC, and CHull, along with visual inspection of the CHull scree plot.</p></sec></sec>
<sec id="s4">
<title>Empirical application</title>
<p>In this section, we demonstrate the empirical value of MixML-SEM using data from Dejonckheere et al. (<xref ref-type="bibr" rid="B16">2022</xref>), where they investigated how the perceived social emotion norm&#x02014;i.e., the social pressure to feel positive, and not to feel negative&#x02014;relates to people&#x00027;s subjective wellbeing across 40 countries and territories. For this illustration, we focus on the relation between participants&#x00027; perceived social pressure to be happy and life satisfaction. The perceived social pressure to be happy was assessed using the nine-item Social Expectancies about Happiness Scale (SEHS; Dejonckheere et al., <xref ref-type="bibr" rid="B16">2022</xref>), with items such as &#x0201C;I often feel a great deal of pressure from those around me to feel Happy.&#x0201D; Participants rated each item on a Nine-point Likert scale ranging from strongly disagree (one) to strongly agree (nine). Life satisfaction was assessed with the Five-item Satisfaction with Life Scale (SWLS; Diener et al., <xref ref-type="bibr" rid="B20">1985</xref>), including items like &#x0201C;The conditions of my life are excellent,&#x0201D; rated on a Seven-point Likert scale from strongly disagree (one) to strongly agree (seven). Dejonckheere et al. (<xref ref-type="bibr" rid="B16">2022</xref>) conducted multilevel regression analysis with random intercepts and slopes, based on mean scores computed for each construct. They found a fixed effect of &#x02212;0.05 and a random effect standard deviation of 0.11 for the regression coefficient of SEHS on SWLS, indicating substantial variability of this relations across the countries. Specifically, five countries had significantly positive relations between SEHS and SWLS, 10 had significantly negative relations, and 25 had null-relations. They used the world happiness index (WHI) as a country-level predictor to explain the between-country variability and concluded that SEHS was linked to lower SWLS in high WHI countries (&#x003B2; &#x0003D; &#x02212; 0.08).</p>
<p>By using mean scores, Dejonckheere et al. ignored (1) that the constructs are measured by items containing measurement error, and (2) that this measurement may be non-invariant across countries, both of which can result in biased regression estimates. Moreover, as mentioned in the Introduction, multilevel modeling is not ideal for identifying specific differences between the 40 countries. Specifically, it requires 780 pairwise comparisons of country-specific regression estimates, which are biased by the shrinkage toward the overall mean&#x02014;in addition to the bias by measurement error and potential non-invariances.</p>
<p>To solve all these issues, we applied MixML-SEM to the data. After removing observations with missing data for the variables of interest, we retained a sample of 6,775 participants from 40 countries. The mean structure was removed by centering the items per group. Before applying MixML-SEM, we evaluated MI for each construct separately using ML-CFA in Mplus. We first examined the variances of the measurement parameters when all of them were set to be random to see which ones have the largest variance. Then, we estimated several ML-CFA models, each time adding a random measurement parameter in the order of the magnitude of the random effect variance in the model where all measurement parameters were random. We compared these models by means of the deviance information criterion (DIC; Spiegelhalter et al., <xref ref-type="bibr" rid="B53">2002</xref>), which balances model fit and complexity for Bayesian models, based on the posterior mean estimate. The MI testing revealed three non-invariant factor loadings (of items 1, 6, and 9) for SEHS, in addition to the need to include residual covariances between certain items (items 1 and 2, and items 7 and 8), and no non-invariant loadings for SWLS, with both constructs modeled with random unique variances and factor variances. The testing procedure and final model specification for both constructs (Step 1 of MixML-SEM) can be found in <xref ref-type="supplementary-material" rid="SM1">Supplementary material S4</xref>.</p>
<p>Given that we do not know the true underlying number of clusters, we ran (Step 3 of) MixML-SEM with one to six clusters. The BIC<sub><italic>G</italic></sub> (<xref ref-type="fig" rid="F6">Figure 6</xref>, left) suggests that the model with three clusters provides the best model fit, and CHull (<xref ref-type="fig" rid="F6">Figure 6</xref>, right) also suggests three clusters (i.e., the plot levels off completely after three clusters). Therefore, we present the results for the Three-cluster model (<xref ref-type="table" rid="T5">Table 5</xref>).</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>The BIC<sub><italic>G</italic></sub> and CHull plot (based on the logL as a function of the number of parameters) for models with 1 to 6 clusters for the empirical data.</p></caption>
<alt-text>Two line graphs are displayed side by side. The left graph shows BIC values across clusters, which decreases sharply from 1 to 3 clusters, reaching a minimum at 3, and then increases gradually, indicating that the optimal number of clusters is 3. The right graph shows LogL values increasing rapidly for models with one to three clusters, stabilizing thereafter.</alt-text>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1463790-g0006.tif"/>
</fig><table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>The clustering of the countries based on the regression parameters between social pressure to be happy and life satisfaction (3-cluster model).</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#727779;color:#ffffff">
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="left"><bold>Countries</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Cluster 1 (8)</td>
<td valign="top" align="left">Chile (0.73), China, Hong Kong, Nigeria, Pakistan, Senegal, Uganda (0.84), Ukraine (0.83)</td>
</tr> <tr>
<td valign="top" align="left">Cluster 2 (15)</td>
<td valign="top" align="left">Brazil (0.88), Costa Rica (0.89), England, France, Germany, Latvia, Macedonia, Netherlands, Northern Ireland, Peru (0.66), Philippines, Poland, Scotland (0.70), South Korea (0.51), Wales</td>
</tr> <tr>
<td valign="top" align="left">Cluster 3 (17)</td>
<td valign="top" align="left">Australia, Belgium, Canada, Colombia (0.85), Estonia (0.71), Italy, Japan (0.86), Malaysia, New Zealand, Portugal (0.84), Singapore (0.84), Slovakia (0.74), South Africa, Spain, Thailand (0.89), Turkey, USA</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The countries with a <inline-formula><mml:math id="M55"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> lower than 0.90 are presented along with their corresponding posterior probabilities, enclosed in parentheses. For those countries with classification uncertainty, each consisted of a mix of either Cluster 1 or Cluster 2 with Cluster 3. For example, South Korea had the largest classification uncertainty: <inline-formula><mml:math id="M56"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>51</mml:mn></mml:math></inline-formula> for Cluster 2 and 0.49 for Cluster 3, and Peru: <inline-formula><mml:math id="M57"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>66</mml:mn></mml:math></inline-formula> for Cluster 2 and 0.34 for Cluster 3.</p>
</table-wrap-foot>
</table-wrap><p>The Three-cluster model consists of Cluster 1 with a regression coefficient of 0.184 and including 8 countries, Cluster 2 with a coefficient of &#x02212;0.254 and comprising 15 countries, and Cluster 3 with a coefficient of &#x02212;0.038 and including 17 countries. Geographically, most European countries were classified in either Cluster 2 or Cluster 3 except for Ukraine (<inline-formula><mml:math id="M58"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>83</mml:mn></mml:math></inline-formula> for Cluster 1 and 0.17 for Cluster 3). Asia was also mainly distributed across two clusters, Cluster 1 and Cluster 3, except for Philippines and South Korea. South Korea was classified with a high classification uncertainty, however (<inline-formula><mml:math id="M59"><mml:msub><mml:mrow><mml:mover accent="false"><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>51</mml:mn></mml:math></inline-formula> for Cluster 2 and 0.49 for Cluster 3). Of the four African countries, three were classified into Cluster 1 and one (South Africa) in Cluster 3. For South-American countries, all of them exhibited a classification uncertainty &#x0003E;0.1 (each country comprised a mix of either Cluster 1 or Cluster 2 with Cluster 3). For North America, Canada and the USA were in Cluster 3, whereas Costa Rica was in Cluster 2. For Oceania, both Australia and New Zealand were in Cluster 3.</p>
<p>To some extent, our findings resemble those of Dejonckheere et al. (<xref ref-type="bibr" rid="B16">2022</xref>), where five countries showed positive relations (Senegal, Hong Kong, Nigeria, Pakistan, and China, all found in Cluster 1 of MixML-SEM), 10 negative (Northern Ireland, Peru, Latvia, Philippines, France, Macedonia, Germany, Netherlands, England, Poland, all found in Cluster 2), while the remaining 25 were close to 0 (17 countries in Cluster 3, three in Cluster 1, and five in Cluster 2). Note that the classification by Dejonckheere et al. (<xref ref-type="bibr" rid="B16">2022</xref>) was solely based on whether the regression coefficient was significantly positive or negative (disregarding the actual values of the coefficient), whereas, with MixML-SEM, we assume all countries in the same cluster to have the same values for the regression coefficients.</p>
<p>To explore the potential influence of country-level predictors on the relation between social pressure to be happy and life satisfaction, we examined two variables: World happiness index (WHI; as in Dejonckheere et al., <xref ref-type="bibr" rid="B16">2022</xref>) and individualism (from Hofstede, <xref ref-type="bibr" rid="B25">2001</xref>). <xref ref-type="fig" rid="F7">Figure 7</xref> (left) presents boxplots of WHI scores for the Three-cluster model. Countries in Cluster 1 generally exhibited lower WHI values compared to the other clusters, with the exception of Chile. Clusters 2 and 3 had relatively higher WHI values, but there were exceptions (e.g., Macedonia, in Cluster 2, and South Africa, in Cluster 3, have a low WHI). In conclusion, there was considerable overlap between the three clusters, suggesting that WHI may not be the only predictor of the relation. Note that the interaction effect found by Dejonckheere et al. (<xref ref-type="bibr" rid="B16">2022</xref>), where SEHS was linked to lower SWLS in high WHI countries (&#x003B2; &#x0003D; &#x02212;0.08), was also a weak one. To further examine whether this relation depends on individualism, <xref ref-type="fig" rid="F7">Figure 7</xref> (right) presents boxplots of the individualism scores for the three clusters, which was available for 31 of the countries. Overall, countries in Cluster 1 exhibited lower individualism, followed by countries in Clusters 2 and 3. However, the overlap between the clusters (especially between Clusters 2 and 3) suggests that other variables may also contribute to the relation between social pressure to be happy and life satisfaction.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>The WHI and individualism scores for the three clusters obtained by MixML-SEM.</p></caption>
<alt-text>Two box plots compare data across three clusters. The left plot shows the World Happiness Index (2019), with cluster two and three having higher medians than cluster one. The right plot represents Individualism, with clusters two and three showing wider distributions and higher medians compared to cluster one.</alt-text>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1463790-g0007.tif"/>
</fig>
<p>In summary, MixML-SEM revealed cross-national differences in the relation between social pressure to be happy and life satisfaction, captured by assigning the 40 countries to three clusters. Compared to the multilevel regression analysis, MixML-SEM allowed us to avoid a large number of pairwise comparisons of biased group-specific regression estimates, while taking the measurement error and the measurement non-invariances into account.</p></sec>
<sec sec-type="discussion" id="s5">
<title>Discussion</title>
<p>MixML-SEM is a novel method for comparing structural relations between latent variables across many groups, while parsimoniously addressing potential measurement non-invariances with random effects. Specifically, after accounting for measurement non-invariances with ML-CFA, MixML-SEM uses mixture clustering to gather groups with equivalent structural relations, thereby reducing the need for pairwise comparisons of the relations. For instance, in the empirical example on social pressure to be happy and life satisfaction, comparing only three cluster-specific regression coefficients efficiently exposed the differences among 40 countries.</p>
<p>One would expect the parsimonious measurement model of MixML-SEM to be especially advantageous in case of (some) small groups. Small groups are more prone to sampling fluctuations, however. The simulation study demonstrated a strong effect of the fixed vs. random within-group samples on MixML-SEM&#x00027;s performance. Thus, a critical question is whether the analysis aims to capture clusters regarding the regression relations in the observed within-group samples or regarding the relations in the larger within-group populations. Since smaller samples are more vulnerable to sampling variability, the observed data are less representative of the population, especially when the underlying population is relatively large. Thus, when applying MixML-SEM to empirical data on small groups, researchers should be cautious when generalizing the captured differences in structural relations to larger within-group populations, unless there is a strong justification that the observed samples are representative (for instance, when within-group samples are obtained within stratified sampling). Unlike ML-SEM, MixML-SEM does not adopt the multilevel approach for the structural model, which avoids comparing biased group-specific regression estimates derived from random effects. The added value of MixML-SEM over ML-SEM was demonstrated in simulation studies. Of course, the simulation studies were limited to conditions where a clustering was indeed underlying the structural relations. Future research could evaluate how MixML-SEM performs when this is not the case. For instance, it would be interesting to see how many and which clusters are captured with MixML-SEM when the differences in structural relations are in fact gradual and normally distributed.</p>
<p>Furthermore, in our simulation studies, we examined a model with four latent variables and four regressions among them, whereas, in the empirical study, we applied MixML-SEM to cluster on only one regression coefficient. In practice, more variables and thus more relations can be involved. Depending on the research question and the structural model, researchers can choose to cluster all relations together or separately. Note that clustering on multiple relations may require more clusters to capture all the differences, whereas clustering on each regression coefficient separately does not allow for one relation to be estimated conditional on another one. Note that the advantage of MixML-SEM is greater in case of more complex structural models, where multiple relations may vary across clusters. That is, a clustering based on one regression coefficients might also be found by ordering the group-specific relations in terms of their direction and size, but a clustering based on multiple regression coefficients is much harder to find based on comparisons of group-specific coefficients. When clustering groups on multiple relations, it would be interesting to combine MixML-SEM with hypothesis testing on the cluster-specific regressions to determine which relations are significantly different across which clusters.</p>
<p>A key feature of MixML-SEM is its stepwise estimation process, which should provide robustness against local model misspecifications, as was demonstrated for the SAM approach in general (Rosseel and Loh, <xref ref-type="bibr" rid="B49">2024</xref>). In contrast, simultaneous estimation of all parameters may allow misfit or uncertainty in one part of the model (e.g., the structural model) to propagate and distort estimation in another part (e.g., the measurement model), and vice versa. Future research could investigate the robustness of MixML-SEM against different forms of measurement model misspecification. This could involve scenarios such as disregarding cross-loadings or residual covariances between items, or ignoring measurement non-invariances. In the first step of MixML-SEM, the measurement model is estimated per latent variable, which corresponds to the &#x0201C;measurement block approach&#x0201D; recommended by Rosseel and Loh (Rosseel and Loh, <xref ref-type="bibr" rid="B49">2024</xref>). This approach drastically reduces computation time, but requires a sufficient number of (strong) indicators for each factor. It may thus be infeasible or inadvisable when the number of indicators is insufficient and/or when their reliability is low. In such cases, the ML-CFA needs to be estimated for all (or some) latent variables simultaneously (which corresponds to combining them in one measurement block), before continuing with the estimation of MixML-SEM as usual. In the future, it would be valuable to compare the performance of MixML-SEM when starting from one vs. multiple measurement blocks in different conditions and under different misspecifications.</p>
<p>The stepwise approach allows for a lot of flexibility, such as using a different estimator in each step. In particular, Bayesian estimation was applied in Step 1 for estimating ML-CFA, whereas maximum likelihood estimation was applied in Steps 2 and 3. Existing mixture-based SEM methods, like multilevel mixture SEM in Mplus (where &#x0201C;multilevel&#x0201D; indicates that the mixture operates at the group level), lack such flexibility. In fact, multilevel mixture SEM does not support including random loadings for the measurement model due to the unavailability of Bayesian estimation for this model type. Consequently, differences in the measurement model are either disregarded or captured by the same clustering along with the differences in structural relations. This limitation undermines the ability to disentangle different sources of variability across groups, making it impossible to cluster on the structural relations specifically.</p>
<p>The stepwise estimation also facilitates extensions of the method. One possible extension involves replacing the first step by alternative approaches for handling measurement non-invariances. In MixML-SEM, we used the multilevel approach to account for measurement non-invariances, but an interesting alternative is the Multigroup Bayesian CFA approach with approximate measurement invariance (Muth&#x000E9;n and Asparouhov, <xref ref-type="bibr" rid="B38">2013</xref>) which captures differences in measurement parameters by small-variance priors. The key difference between the two approaches lies in their underlying assumptions. For the invariant measurement parameters, ML-CFA assumes exact invariance, meaning identical parameters across all groups, whereas the non-invariant measurement parameters are captured with random effects with an unrestricted variance. Especially when dealing with a large number of groups, achieving exact invariance may not always be realistic, not even for some of the measurement parameters. Approximate invariance allows for small differences across groups for many, or even all, of the measurement parameters, where these differences are kept small by restricting their variance. The fact that the differences across groups are kept small maintains the comparability of structural relations across groups. In the future, it would be interesting to investigate the interplay between exact measurement invariance, approximate measurement invariance, and measurement non-invariance within the Mixture Multigroup/Multilevel SEM framework.</p>
<p>Currently, MixML-SEM combines Bayesian and maximum likelihood estimation, assuming continuous items. In empirical practice, we often encounter ordinal data like questionnaire items with a specific number of response categories. Having enough response categories (say five or more) allows for the items to be considered as continuous (Dolan, <xref ref-type="bibr" rid="B21">1994</xref>), but binary items or items with three or four response categories are common (e.g., in the Programme for International Student Assessment). Therefore, adapting the method to ordinal data would be another useful extension. To this end, the first step of MixML-SEM, using Bayesian estimation, should be adjusted to deal with ordinal data, whereas the rest of the estimation procedure remains unchanged, since the factor scores derived from the first step are still continuous.</p>
<p>As our primary focus was on comparing structural relations, our current approach disregarded the mean structure (i.e., the data were centered per group). Another possible extension is to incorporate the mean structure into the model. In addition to clustering based on regressions, we could also cluster groups based on their factor means if this is relevant to the research question. Note that comparing factor means across (clusters of) groups requires a higher level of MI&#x02014;that is, at least (partial) scalar invariance&#x02014;and thus a modification to the measurement model as well. Differences in factor means and structural relations will then be captured by the clusters, whereas differences in the measurement model are captured by random intercepts and/or random loadings. This combination of mixture modeling and random effects cannot be attained by existing mixture SEM methods in Mplus, because they do not allow for the required Bayesian estimation (as mentioned above).</p>
<p>To conclude, MixML-SEM provides a parsimonious way for accounting for measurement non-invariance (with random effects) while comparing structural relations across many groups (with mixture modeling). The combination of random effects and mixture modeling is handled in a stepwise manner, building on the SAM approach. This stepwise estimation approach is very flexible, which makes it easy to extend the method in many ways, including the use of alternative methods for addressing measurement non-invariances. MixML-SEM is thus not only a novel method for comparing structural relations among many groups, but also an important step toward a realm of possibilities for handling different types of measurement non-invariance while clustering the groups on their structural relations.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://osf.io/rtp78/">https://osf.io/rtp78/</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>HZ: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Project administration, Software, Validation, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. JV: Conceptualization, Methodology, Software, Supervision, Writing &#x02013; review &#x00026; editing. KD: Conceptualization, Funding acquisition, Methodology, Project administration, Software, Supervision, Writing &#x02013; review &#x00026; editing, Resources, Investigation.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This study was funded by the European Union (ERC, PROCESSHETEROGENEITY, 101040754, awarded to Kim De Roover). Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them. The resources and services used in this work were provided by the VSC (Flemish Supercomputer Center), funded by the Research Foundation&#x02014;Flanders (FWO) and the Flemish Government.</p>
<p><inline-graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-16-1463790-i0001.tif"/></p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec><sec sec-type="supplementary-material" id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpsyg.2025.1463790/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpsyg.2025.1463790/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Supplementary_file_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Akaike</surname> <given-names>H.</given-names></name></person-group> (<year>1974</year>). <article-title>A new look at the statistical model identification</article-title>. <source>IEEE Trans. Autom. Control</source> <volume>19</volume>, <fpage>716</fpage>&#x02013;<lpage>723</lpage>. <pub-id pub-id-type="doi">10.1109/TAC.1974.1100705</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Arminger</surname> <given-names>G.</given-names></name> <name><surname>Stein</surname> <given-names>P.</given-names></name></person-group> (<year>1997</year>). <article-title>Finite mixtures of covariance structure models with regressors: loglikelihood function, minimum distance estimation, fit indices, and a complex example</article-title>. <source>Sociol. Methods Res.</source> <volume>26</volume>, <fpage>148</fpage>&#x02013;<lpage>182</lpage>. <pub-id pub-id-type="doi">10.1177/0049124197026002002</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Asparouhov</surname> <given-names>T.</given-names></name> <name><surname>Muth&#x000E9;n</surname> <given-names>B.</given-names></name></person-group> (<year>2012</year>). <source>General Random Effect Latent Variable Modeling: Random Subjects, Items, Contexts, and Parameters</source>. Mplus. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.statmodel.com/download/NCME12.pdf">https://www.statmodel.com/download/NCME12.pdf</ext-link></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bauer</surname> <given-names>D. J.</given-names></name></person-group> (<year>2007</year>). <article-title>Observations on the use of growth mixture models in psychological research</article-title>. <source>Multivariate Behav. Res.</source> <volume>42</volume>, <fpage>757</fpage>&#x02013;<lpage>786</lpage>. <pub-id pub-id-type="doi">10.1080/00273170701710338</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boer</surname> <given-names>D.</given-names></name> <name><surname>Hanke</surname> <given-names>K.</given-names></name> <name><surname>He</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>On detecting systematic measurement error in cross-cultural research: a review and critical reflection on equivalence and invariance tests</article-title>. <source>J. Cross-Cultural Psychol.</source> <volume>49</volume>, <fpage>713</fpage>&#x02013;<lpage>734</lpage>. <pub-id pub-id-type="doi">10.1177/0022022117749042</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bollen</surname> <given-names>K. A.</given-names></name></person-group> (<year>1989</year>). <source>Structural Equations with Latent Variables</source>. Oxford: John Wiley and Sons. <pub-id pub-id-type="doi">10.1002/9781118619179</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brandt</surname> <given-names>M. J.</given-names></name> <name><surname>Turner-Zwinkels</surname> <given-names>F. M.</given-names></name> <name><surname>Karapirinler</surname> <given-names>B.</given-names></name> <name><surname>Van Leeuwen</surname> <given-names>F.</given-names></name> <name><surname>Bender</surname> <given-names>M.</given-names></name> <name><surname>van Osch</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>The association between threat and politics depends on the type of threat, the political domain, and the country</article-title>. <source>Pers. Soc. Psychol. Bull.</source> <volume>47</volume>, <fpage>324</fpage>&#x02013;<lpage>343</lpage>. <pub-id pub-id-type="doi">10.1177/0146167220946187</pub-id><pub-id pub-id-type="pmid">32842885</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bulteel</surname> <given-names>K.</given-names></name> <name><surname>Wilderjans</surname> <given-names>T. F.</given-names></name> <name><surname>Tuerlinckx</surname> <given-names>F.</given-names></name> <name><surname>Ceulemans</surname> <given-names>E.</given-names></name></person-group> (<year>2013</year>). <article-title>CHull as an alternative to AIC and BIC in the context of mixtures of factor analyzers</article-title>. <source>Behav. Res.</source> <volume>45</volume>, <fpage>782</fpage>&#x02013;<lpage>791</lpage>. <pub-id pub-id-type="doi">10.3758/s13428-012-0293-y</pub-id><pub-id pub-id-type="pmid">23307573</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Byrne</surname> <given-names>B. M.</given-names></name> <name><surname>Shavelson</surname> <given-names>R. J.</given-names></name> <name><surname>Muth&#x000E9;n</surname> <given-names>B.</given-names></name></person-group> (<year>1989</year>). <article-title>Testing for the equivalence of factor covariance and mean structures: the issue of partial measurement invariance</article-title>. <source>Psychol. Bull.</source> <volume>105</volume>, <fpage>456</fpage>&#x02013;<lpage>466</lpage>. <pub-id pub-id-type="doi">10.1037/0033-2909.105.3.456</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ceulemans</surname> <given-names>E.</given-names></name> <name><surname>Kiers</surname> <given-names>H. A. L.</given-names></name></person-group> (<year>2006</year>). <article-title>Selecting among three-mode principal component models of different types and complexities: a numerical convex hull based method</article-title>. <source>Br. J. Math. Stat. Psychol.</source> <volume>59</volume>, <fpage>133</fpage>&#x02013;<lpage>150</lpage>. <pub-id pub-id-type="doi">10.1348/000711005X64817</pub-id><pub-id pub-id-type="pmid">16709283</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>F. F.</given-names></name></person-group> (<year>2008</year>). <article-title>What happens if we compare chopsticks with forks? The impact of making inappropriate comparisons in cross-cultural research</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>95</volume>, <fpage>1005</fpage>&#x02013;<lpage>1018</lpage>. <pub-id pub-id-type="doi">10.1037/a0013193</pub-id><pub-id pub-id-type="pmid">18954190</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Croon</surname> <given-names>M. A.</given-names></name></person-group> (<year>2002</year>). <article-title>&#x0201C;Using predicted latent scores in general latent structure models,&#x0201D;</article-title> in <source>Latent Variable and Latent Structure Models</source>, eds. G. A. Marcoulides and I. Moustaki (<publisher-loc>Mahwah, NJ</publisher-loc>: <publisher-name>Lawrence Erlbaum</publisher-name>), <fpage>195</fpage>&#x02013;<lpage>224</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Davidov</surname> <given-names>E.</given-names></name> <name><surname>D&#x000FC;lmer</surname> <given-names>H.</given-names></name> <name><surname>Schl&#x000FC;ter</surname> <given-names>E.</given-names></name> <name><surname>Schmidt</surname> <given-names>P.</given-names></name> <name><surname>Meuleman</surname> <given-names>B.</given-names></name></person-group> (<year>2012</year>). <article-title>Using a multilevel structural equation modeling approach to explain cross-cultural measurement noninvariance</article-title>. <source>J. Cross-Cultural Psychol.</source> <volume>43</volume>, <fpage>558</fpage>&#x02013;<lpage>575</lpage>. <pub-id pub-id-type="doi">10.1177/0022022112438397</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Roover</surname> <given-names>K.</given-names></name></person-group> (<year>2021</year>). <article-title>Finding clusters of groups with measurement invariance: unraveling intercept non-invariance with mixture multigroup factor analysis</article-title>. <source>Struct. Equ. Model. Multidiscipl. J.</source> <volume>28</volume>, <fpage>663</fpage>&#x02013;<lpage>683</lpage>. <pub-id pub-id-type="doi">10.1080/10705511.2020.1866577</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Roover</surname> <given-names>K.</given-names></name> <name><surname>Vermunt</surname> <given-names>J. K.</given-names></name> <name><surname>Ceulemans</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>Mixture multigroup factor analysis for unraveling factor loading noninvariance across many groups</article-title>. <source>Psychol. Methods</source> <volume>27</volume>, <fpage>281</fpage>&#x02013;<lpage>306</lpage>. <pub-id pub-id-type="doi">10.1037/met0000355</pub-id><pub-id pub-id-type="pmid">33271027</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dejonckheere</surname> <given-names>E.</given-names></name> <name><surname>Rhee</surname> <given-names>J. J.</given-names></name> <name><surname>Baguma</surname> <given-names>P. K.</given-names></name> <name><surname>Barry</surname> <given-names>O.</given-names></name> <name><surname>Becker</surname> <given-names>M.</given-names></name> <name><surname>Bilewicz</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Perceiving societal pressure to be happy is linked to poor well-being, especially in happy nations</article-title>. <source>Sci. Rep.</source> <volume>12</volume>:<fpage>1514</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-04262-z</pub-id><pub-id pub-id-type="pmid">35177625</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dempster</surname> <given-names>A. P.</given-names></name> <name><surname>Laird</surname> <given-names>N. M.</given-names></name> <name><surname>Rubin</surname> <given-names>D. B.</given-names></name></person-group> (<year>1977</year>). <article-title>Maximum likelihood from incomplete data via the EM Algorithm</article-title>. <source>J. R. Statist. Soc. Ser. B</source> <volume>39</volume>, <fpage>1</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1111/j.2517-6161.1977.tb01600.x</pub-id></citation>
</ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Devlieger</surname> <given-names>I.</given-names></name> <name><surname>Rosseel</surname> <given-names>Y.</given-names></name></person-group> (<year>2017</year>). <article-title>Factor score path analysis: an alternative for SEM</article-title>. <source>Methodol. Euro. J. Res. Methods Behav. Soc. Sci.</source> <volume>13</volume>, <fpage>31</fpage>&#x02013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1027/1614-2241/a000130</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Devlieger</surname> <given-names>I.</given-names></name> <name><surname>Rosseel</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>Multilevel factor score regression</article-title>. <source>Multivariate Behav. Res.</source> <volume>55</volume>, <fpage>600</fpage>&#x02013;<lpage>624</lpage>. <pub-id pub-id-type="doi">10.1080/00273171.2019.1661817</pub-id><pub-id pub-id-type="pmid">31505988</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Diener</surname> <given-names>E.</given-names></name> <name><surname>Emmons</surname> <given-names>R. A.</given-names></name> <name><surname>Larsen</surname> <given-names>R. J.</given-names></name> <name><surname>Griffin</surname> <given-names>S.</given-names></name></person-group> (<year>1985</year>). <article-title>The satisfaction with life scale</article-title>. <source>J. Pers. Assess.</source> <volume>49</volume>, <fpage>71</fpage>&#x02013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1207/s15327752jpa4901_13</pub-id><pub-id pub-id-type="pmid">16367493</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dolan</surname> <given-names>C. V.</given-names></name></person-group> (<year>1994</year>). <article-title>Factor analysis of variables with 2, 3, 5 and 7 response categories: a comparison of categorical variable estimators using simulated data</article-title>. <source>Br. J. Math. Stat. Psychol.</source> <volume>47</volume>, <fpage>309</fpage>&#x02013;<lpage>326</lpage>. <pub-id pub-id-type="doi">10.1111/j.2044-8317.1994.tb01039.x</pub-id></citation>
</ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dolan</surname> <given-names>C. V.</given-names></name> <name><surname>van der Maas</surname> <given-names>H. L. J.</given-names></name></person-group> (<year>1998</year>). <article-title>Fitting multivariage normal finite mixtures subject to structural equation modeling</article-title>. <source>Psychometrika</source> <volume>63</volume>, <fpage>227</fpage>&#x02013;<lpage>253</lpage>. <pub-id pub-id-type="doi">10.1007/BF02294853</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guenole</surname> <given-names>N.</given-names></name> <name><surname>Brown</surname> <given-names>A.</given-names></name></person-group> (<year>2014</year>). <article-title>The consequences of ignoring measurement invariance for path coefficients in structural equation models</article-title>. <source>Front. Psychol.</source> <volume>5</volume>:<fpage>980</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2014.00980</pub-id><pub-id pub-id-type="pmid">25278911</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hallquist</surname> <given-names>M. N.</given-names></name> <name><surname>Wiley</surname> <given-names>J. F.</given-names></name></person-group> (<year>2018</year>). <article-title>MplusAutomation: an R package for facilitating large-scale latent variable analyses in Mplus</article-title>. <source>Struct. Equ. Model. Multidiscipl. J.</source> <volume>25</volume>, <fpage>621</fpage>&#x02013;<lpage>638</lpage>. <pub-id pub-id-type="doi">10.1080/10705511.2017.1402334</pub-id><pub-id pub-id-type="pmid">30083048</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Hofstede</surname> <given-names>G.</given-names></name></person-group> (<year>2001</year>). <source>Culture&#x00027;s Consequences: Comparing Values, Behaviors, Institutions, and Organizations Across Nations, 2nd Edn</source>. <publisher-loc>Thousand Oaks, CA</publisher-loc>: <publisher-name>Sage</publisher-name>.</citation>
</ref>
<ref id="B26">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Hox</surname> <given-names>J.</given-names></name> <name><surname>Moerbeek</surname> <given-names>M.</given-names></name> <name><surname>van de Schoot</surname> <given-names>R.</given-names></name></person-group> (<year>2017</year>). <source>Multilevel Analysis: Techniques and Applications, 3rd Edn</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Routledge</publisher-name>. <pub-id pub-id-type="doi">10.4324/9781315650982</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Hoyle</surname> <given-names>R. H.</given-names></name></person-group> (<year>2012</year>). <source>Handbook of Structural Equation Modeling</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Guilford Press</publisher-name>.</citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hubert</surname> <given-names>L.</given-names></name> <name><surname>Arabie</surname> <given-names>P.</given-names></name></person-group> (<year>1985</year>). <article-title>Comparing partitions</article-title>. <source>J. Classification</source> <volume>2</volume>, <fpage>193</fpage>&#x02013;<lpage>218</lpage>. <pub-id pub-id-type="doi">10.1007/BF01908075</pub-id></citation>
</ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jak</surname> <given-names>S.</given-names></name> <name><surname>Oort</surname> <given-names>F. J.</given-names></name> <name><surname>Dolan</surname> <given-names>C. V. A.</given-names></name></person-group> (<year>2013</year>). <article-title>Test for cluster bias: detecting violations of measurement invariance across clusters in multilevel data</article-title>. <source>Struct. Equ. Model. Multidiscipl. J.</source> <volume>20</volume>, <fpage>265</fpage>&#x02013;<lpage>282</lpage>. <pub-id pub-id-type="doi">10.1080/10705511.2013.769392</pub-id></citation>
</ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jedidi</surname> <given-names>K.</given-names></name> <name><surname>Jagpal</surname> <given-names>H.</given-names></name> <name><surname>Desarbo</surname> <given-names>W.</given-names></name></person-group> (<year>1997</year>). <article-title>Finite-mixture structural equation models for response-based segmentation and unobserved heterogeneity</article-title>. <source>Market. Sci.</source> <volume>16</volume>, <fpage>39</fpage>&#x02013;<lpage>59</lpage>. <pub-id pub-id-type="doi">10.1287/mksc.16.1.39</pub-id><pub-id pub-id-type="pmid">19642375</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>E. S.</given-names></name> <name><surname>Cao</surname> <given-names>C.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Nguyen</surname> <given-names>D. T.</given-names></name></person-group> (<year>2017</year>). <article-title>Measurement invariance testing with many groups: a comparison of five approaches</article-title>. <source>Struct. Equ. Model. Multidiscipl. J.</source> <volume>24</volume>, <fpage>524</fpage>&#x02013;<lpage>544</lpage>. <pub-id pub-id-type="doi">10.1080/10705511.2017.1304822</pub-id></citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>E. S.</given-names></name> <name><surname>Joo</surname> <given-names>S.-H.</given-names></name> <name><surname>Lee</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Stark</surname> <given-names>S.</given-names></name></person-group> (<year>2016</year>). <article-title>Measurement invariance testing across between-level latent classes using multilevel factor mixture modeling</article-title>. <source>Struct. Equ. Model. Multidiscipl. J.</source> <volume>23</volume>, <fpage>870</fpage>&#x02013;<lpage>887</lpage>. <pub-id pub-id-type="doi">10.1080/10705511.2016.1196108</pub-id></citation>
</ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kuppens</surname> <given-names>P.</given-names></name> <name><surname>Realo</surname> <given-names>A.</given-names></name> <name><surname>Diener</surname> <given-names>E.</given-names></name></person-group> (<year>2008</year>). <article-title>The role of positive and negative emotions in life satisfaction judgment across nations</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>95</volume>, <fpage>66</fpage>&#x02013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1037/0022-3514.95.1.66</pub-id><pub-id pub-id-type="pmid">18605852</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Leitg&#x000F6;b</surname> <given-names>H.</given-names></name> <name><surname>Seddig</surname> <given-names>D.</given-names></name> <name><surname>Asparouhov</surname> <given-names>T.</given-names></name> <name><surname>Behr</surname> <given-names>D.</given-names></name> <name><surname>Davidov</surname> <given-names>E.</given-names></name> <name><surname>De Roover</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Measurement invariance in the social sciences: historical development, methodological challenges, state of the art, and future perspectives</article-title>. <source>Soc. Sci. Res.</source> <volume>110</volume>:<fpage>102805</fpage>. <pub-id pub-id-type="doi">10.1016/j.ssresearch.2022.102805</pub-id><pub-id pub-id-type="pmid">36796989</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>McLachlan</surname> <given-names>G.</given-names></name> <name><surname>Peel</surname> <given-names>D.</given-names></name></person-group> (<year>2000</year>). <source>Finite Mixture Models, 1st Edn</source>. <publisher-loc>Hoboken, NJ</publisher-loc>: <publisher-name>Wiley</publisher-name>. <pub-id pub-id-type="doi">10.1002/0471721182</pub-id></citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McNeish</surname> <given-names>D.</given-names></name> <name><surname>Harring</surname> <given-names>J. R.</given-names></name></person-group> (<year>2017</year>). <article-title>The effect of model misspecification on growth mixture model class enumeration</article-title>. <source>J. Classif.</source> <volume>34</volume>, <fpage>223</fpage>&#x02013;<lpage>248</lpage>. <pub-id pub-id-type="doi">10.1007/s00357-017-9233-y</pub-id></citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meredith</surname> <given-names>W.</given-names></name> <name><surname>Teresi</surname> <given-names>J. A.</given-names></name></person-group> (<year>2006</year>). <article-title>An essay on measurement and factorial invariance</article-title>. <source>Medical Care</source> <volume>44</volume>, <fpage>S69</fpage>&#x02013;<lpage>S77</lpage>. <pub-id pub-id-type="doi">10.1097/01.mlr.0000245438.73837.89</pub-id><pub-id pub-id-type="pmid">17060838</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Muth&#x000E9;n</surname> <given-names>B.</given-names></name> <name><surname>Asparouhov</surname> <given-names>T.</given-names></name></person-group> (<year>2013</year>). <source>BSEM Measurement Invariance Analysis</source>. Mplus Web Notes: No. 17. Mplus. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.statmodel.com/examples/webnotes/webnote17.pdf">https://www.statmodel.com/examples/webnotes/webnote17.pdf</ext-link></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Muth&#x000E9;n</surname> <given-names>B. O.</given-names></name></person-group> (<year>1991</year>). <article-title>Multilevel factor analysis of class and student achievement components</article-title>. <source>J. Educ. Measure.</source> <volume>28</volume>, <fpage>338</fpage>&#x02013;<lpage>354</lpage>. <pub-id pub-id-type="doi">10.1111/j.1745-3984.1991.tb00363.x</pub-id></citation>
</ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Muth&#x000E9;n</surname> <given-names>B. O.</given-names></name></person-group> (<year>1994</year>). <article-title>Multilevel covariance structure analysis</article-title>. <source>Sociol. Methods Res.</source> <volume>22</volume>, <fpage>376</fpage>&#x02013;<lpage>398</lpage>. <pub-id pub-id-type="doi">10.1177/0049124194022003006</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Muth&#x000E9;n</surname> <given-names>B. O.</given-names></name> <name><surname>Asparouhov</surname> <given-names>T.</given-names></name></person-group> (<year>2023</year>). <source>Mplus Web Talk No. 6 - Using Mplus to do Dynamic Structural Equation modeling</source>. Mplus Web Talks. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.statmodel.com/Webtalk6.shtml">https://www.statmodel.com/Webtalk6.shtml</ext-link></citation>
</ref>
<ref id="B42">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Muth&#x000E9;n</surname> <given-names>L. K.</given-names></name> <name><surname>Muth&#x000E9;n</surname> <given-names>B.</given-names></name></person-group> (<year>1998</year>). <source>Mplus User&#x00027;s Guide: Statistical Analysis With Latent Variables, 8th Edn</source>. Muth&#x000E9;n &#x00026; Muth&#x000E9;n. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.statmodel.com/download/usersguide/MplusUserGuideVer_8.pdf&#x00023;::text=Following%20is%20the%20correct%20citation%20for%20this%20document%3A,Muth%C3%A9n%20%26%20Muth%C3%A9n%20Version%208%20April%202017%20">https://www.statmodel.com/download/usersguide/MplusUserGuideVer_8.pdf&#x00023;:&#x0007E;:text=Following%20is%20the%20correct%20citation%20for%20this%20document%3A,Muth%C3%A9n%20%26%20Muth%C3%A9n%20Version%208%20April%202017%20</ext-link></citation>
</ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pakarinen</surname> <given-names>E.</given-names></name> <name><surname>Lerkkanen</surname> <given-names>M.-K.</given-names></name> <name><surname>von Suchodoletz</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Teacher emotional support in relation to social competence in preschool classrooms</article-title>. <source>Int. J. Res. Method Educ.</source> <volume>43</volume>, <fpage>444</fpage>&#x02013;<lpage>460</lpage>. <pub-id pub-id-type="doi">10.1080/1743727X.2020.1791815</pub-id></citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Perez Alonso</surname> <given-names>A. F.</given-names></name> <name><surname>Rosseel</surname> <given-names>Y.</given-names></name> <name><surname>Vermunt</surname> <given-names>J. K.</given-names></name> <name><surname>De Roover</surname> <given-names>K.</given-names></name></person-group> (<year>2024</year>). <article-title>Mixture multigroup structural equation modeling: a novel method for comparing structural relations across many groups</article-title>. <source>Psychol. Methods</source>. <pub-id pub-id-type="doi">10.1037/met0000667</pub-id> [Epub ahead of print].<pub-id pub-id-type="pmid">39264645</pub-id></citation></ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Perez Alonso</surname> <given-names>A. F.</given-names></name> <name><surname>Vermunt</surname> <given-names>J. K.</given-names></name> <name><surname>Rosseel</surname> <given-names>Y.</given-names></name> <name><surname>Roover</surname> <given-names>K. D.</given-names></name></person-group> (<year>2025</year>). <article-title>Selecting the number of clusters in mixture multigroup structural equation modeling</article-title>. <source>Methodology</source> <volume>21</volume>, <fpage>1</fpage>&#x02013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.5964/meth.14931</pub-id></citation>
</ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pokropek</surname> <given-names>A.</given-names></name> <name><surname>Davidov</surname> <given-names>E.</given-names></name> <name><surname>Schmidt</surname> <given-names>P. A.</given-names></name></person-group> (<year>2019</year>). <article-title>Monte Carlo simulation study to assess the appropriateness of traditional and newer approaches to test for measurement invariance</article-title>. <source>Struct. Equ. Model. Multidiscipl. J.</source> <volume>26</volume>, <fpage>724</fpage>&#x02013;<lpage>744</lpage>. <pub-id pub-id-type="doi">10.1080/10705511.2018.1561293</pub-id></citation>
</ref>
<ref id="B47">
<citation citation-type="web"><person-group person-group-type="author"><collab>R Core Team</collab></person-group> (<year>2022</year>). <source>R: A Language and Environment for Statistical Computing</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/">https://www.R-project.org/</ext-link></citation>
</ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rosseel</surname> <given-names>Y.</given-names></name></person-group> (<year>2012</year>). <article-title>lavaan: an R package for structural equation modeling</article-title>. <source>J. Stat. Softw.</source> <volume>48</volume>, <fpage>1</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v048.i02</pub-id></citation>
</ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rosseel</surname> <given-names>Y.</given-names></name> <name><surname>Loh</surname> <given-names>W. W.</given-names></name></person-group> (<year>2024</year>). <article-title>A structural after measurement approach to structural equation modeling</article-title>. <source>Psychol. Methods</source> <volume>29</volume>, <fpage>561</fpage>&#x02013;<lpage>588</lpage>. <pub-id pub-id-type="doi">10.1037/met0000503</pub-id><pub-id pub-id-type="pmid">36355708</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rutkowski</surname> <given-names>L.</given-names></name> <name><surname>Svetina</surname> <given-names>D.</given-names></name></person-group> (<year>2014</year>). <article-title>Assessing the hypothesis of measurement invariance in the context of large-scale international surveys</article-title>. <source>Educ. Psychol. Measure.</source> <volume>74</volume>, <fpage>31</fpage>&#x02013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1177/0013164413498257</pub-id></citation>
</ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schwarz</surname> <given-names>G.</given-names></name></person-group> (<year>1978</year>). <article-title>Estimating the dimension of a model</article-title>. <source>Ann. Stat.</source> <volume>6</volume>, <fpage>461</fpage>&#x02013;<lpage>464</lpage>. <pub-id pub-id-type="doi">10.1214/aos/1176344136</pub-id></citation>
</ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>S&#x000F6;rbom</surname> <given-names>D.</given-names></name></person-group> (<year>1974</year>). <article-title>A general method for studying differences in factor means and factor structure between groups</article-title>. <source>Br. J. Math. Stat. Psychol.</source> <volume>27</volume>, <fpage>229</fpage>&#x02013;<lpage>239</lpage>. <pub-id pub-id-type="doi">10.1111/j.2044-8317.1974.tb00543.x</pub-id></citation>
</ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Spiegelhalter</surname> <given-names>D. J.</given-names></name> <name><surname>Best</surname> <given-names>N. G.</given-names></name> <name><surname>Carlin</surname> <given-names>B. P.</given-names></name> <name><surname>van der Linde</surname> <given-names>A.</given-names></name></person-group> (<year>2002</year>). <article-title>Bayesian measures of model complexity and fit</article-title>. <source>J. R. Stat. Soc. Ser. B. Stat. Methodol.</source> <volume>64</volume>, <fpage>583</fpage>&#x02013;<lpage>639</lpage>. <pub-id pub-id-type="doi">10.1111/1467-9868.00353</pub-id></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Steinley</surname> <given-names>D.</given-names></name></person-group> (<year>2004</year>). <article-title>Properties of the Hubert-Arable adjusted Rand index</article-title>. <source>Psychol. Methods</source> <volume>9</volume>, <fpage>386</fpage>&#x02013;<lpage>396</lpage>. <pub-id pub-id-type="doi">10.1037/1082-989X.9.3.386</pub-id><pub-id pub-id-type="pmid">15355155</pub-id></citation></ref>
<ref id="B55">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Venables</surname> <given-names>W.</given-names></name> <name><surname>Ripley</surname> <given-names>B.</given-names></name></person-group> (<year>2002</year>). <source>Modern Applied Statistics with S, 4th Edn</source>. <ext-link ext-link-type="uri" xlink:href="https://www.stats.ox.ac.uk/pub/MASS4/">https://www.stats.ox.ac.uk/pub/MASS4/</ext-link> (accessed June 3, 2025). <pub-id pub-id-type="doi">10.1007/978-0-387-21706-2</pub-id></citation>
</ref>
<ref id="B56">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Vermunt</surname> <given-names>J. K.</given-names></name></person-group> (<year>2024</year>). <source>Stepwise Latent Variable Modeling: An Overview of Approaches</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://jeroenvermunt.nl/stepwiseLVM2024.pdf">https://jeroenvermunt.nl/stepwiseLVM2024.pdf</ext-link></citation>
</ref>
<ref id="B57">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wilderjans</surname> <given-names>T. F.</given-names></name> <name><surname>Ceulemans</surname> <given-names>E.</given-names></name> <name><surname>Meers</surname> <given-names>K.</given-names></name></person-group> (<year>2013</year>). <article-title>CHull: a generic convex-hull-based model selection method</article-title>. <source>Behav. Res.</source> <volume>45</volume>, <fpage>1</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.3758/s13428-012-0238-5</pub-id><pub-id pub-id-type="pmid">23055156</pub-id></citation></ref>
</ref-list>
</back>
</article>