<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="systematic-review" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2026.1758104</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Systematic Review</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Working memory in technology-enhanced language learning: a systematic review from interactive to AI-mediated contexts</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Deng</surname>
<given-names>Xin</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3300392"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Department of Public Foreign Language Teaching and Research, Jilin University of Finance and Economics</institution>, <city>Changchun</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Xin Deng, <email xlink:href="mailto:dengxin051009@163.com">dengxin051009@163.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-18">
<day>18</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1758104</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>20</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Deng.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Deng</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-18">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec id="sec1001">
<title>Introduction</title>
<p>Working memory (WM) is a central cognitive constraint in second and foreign language learning, particularly in technology-enhanced instructional environments. While pre-AI computer-assisted language learning (CALL) research has examined how interactive technologies interact with individual differences in WM capacity, the rapid emergence of AI-mediated language learning tools raises new questions about how WM demands are managed, redistributed, or compensated. This review examines how WM has been conceptualized and empirically addressed across two historical eras of language learning technology.</p>
</sec>
<sec id="sec1002">
<title>Methods</title>
<p>This systematic review adopts a PRISMA 2020&#x2013;compliant historical&#x2013;comparative design and synthesizes 31 primary empirical studies, including 27 studies from the Interactive Era (2010&#x2013;2024) and 4 studies from the AI-Mediated Era (2024&#x2013;2025), supplemented by recent systematic reviews and theoretical work. Studies were analyzed within two analytically distinct corpora, focusing on instructional design features, WM-related outcomes, cognitive load management, and measurement approaches, followed by cross-era comparison guided by three research questions.</p>
</sec>
<sec id="sec1003">
<title>Results</title>
<p>Interactive Era studies show that CALL, multimedia, and online platforms provide multimodal input, adaptive feedback, collaboration, and flexible pacing, but frequently induce cognitive overload and unequal learning outcomes associated with individual differences in WM capacity, which is typically treated as a fixed learner constraint. In contrast, AI-mediated studies reveal a qualitative shift. AI-assisted writing reduces lower-level encoding demands while increasing central-executive demands for evaluation and integration; biometric-adaptive reading systems preemptively regulate cognitive load and improve comprehension; and AI-orchestrated VR&#x2013;AR vocabulary instruction yields large gains only within empirically bounded multimodal channel limits. AI-mediated data-driven learning further offloads corpus search, reallocating WM resources toward noticing and internalization.</p>
</sec>
<sec id="sec1004">
<title>Discussion</title>
<p>Despite these advances, direct assessment of WM is largely absent from AI-mediated intervention studies, which rely on cognitive load proxies. This measurement gap limits causal inference regarding whether AI primarily reduces task demands, improves functional WM utilization, or supports WM capacity development. The review calls for future research to incorporate validated WM measures, adopt aptitude&#x2013;treatment interaction designs, and establish evidence-based boundaries for AI-mediated multimodal adaptivity across diverse EFL and ESL contexts.</p>
</sec>
</abstract>
<kwd-group>
<kwd>adaptive learning</kwd>
<kwd>AI-assisted language learning</kwd>
<kwd>aptitude&#x2013;treatment interaction</kwd>
<kwd>cognitive load</kwd>
<kwd>cognitive load redistribution</kwd>
<kwd>computer-assisted language learning</kwd>
<kwd>multimodal instruction</kwd>
<kwd>working memory</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="1"/>
<table-count count="7"/>
<equation-count count="0"/>
<ref-count count="67"/>
<page-count count="30"/>
<word-count count="20763"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Educational Psychology</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec1">
<title>Highlights</title>
<list list-type="bullet">
<list-item>
<p>Historical&#x2013;comparative synthesis of 31 empirical studies spanning the Interactive Era (2010&#x2013;2024; <italic>n</italic> = 27) and the emerging AI-Mediated Era (2024&#x2013;2025; <italic>n</italic> = 4).</p>
</list-item>
<list-item>
<p>Interactive technologies support WM through multimodality, adaptive feedback, and flexible pacing, yet frequently induce cognitive overload and produce unequal outcomes linked to individual WM capacity.</p>
</list-item>
<list-item>
<p>AI-mediated tools redistribute&#x2014;rather than merely reduce&#x2014;cognitive load: generative AI offloads lower-level encoding while elevating central-executive demands for evaluation, prompt management, and integrative synthesis.</p>
</list-item>
<list-item>
<p>Biometric-adaptive AI enables preemptive cognitive regulation; AI-orchestrated multimodal instruction shows optimal effects within an empirically bounded 3&#x2013;4 channel limit.</p>
</list-item>
<list-item>
<p>Critical measurement gap identified: direct WM assessment is virtually absent from AI intervention studies, precluding causal inference about whether AI reduces task demands, enhances WM utilization, or supports WM plasticity.</p>
</list-item>
</list>
</sec>
<sec sec-type="intro" id="sec2">
<label>1</label>
<title>Introduction</title>
<p>The relationship between language learning technology and human cognition has long been framed around the limitations of working memory&#x2014;the cognitive system responsible for the temporary storage and manipulation of information that underpins complex comprehension and production (<xref ref-type="bibr" rid="ref3">Baddeley, 2000</xref>). In second language acquisition, working memory is widely recognized as a core bottleneck: tasks that overtax working memory tend to reduce accuracy, depth of processing, and long-term retention, particularly for learners with lower working memory capacity (<xref ref-type="bibr" rid="ref15">Gathercole and Alloway, 2008</xref>). Consequently, the design of technology-enhanced language learning environments has historically been guided by the imperative to manage, or at least not exceed, this limited cognitive resource.</p>
<p>This review argues that the field is now in the midst of a transition between two technological eras with fundamentally different implications for working memory. The first is a pre-AI &#x201C;Interactive Era,&#x201D; in which technologies such as computer-assisted language learning software, hypermedia environments, captioned video, and online platforms provided valuable interactivity but were fundamentally static and pre-scripted (<xref ref-type="bibr" rid="ref34">Lobin and R&#x00F6;sler, 2012</xref>). The second is an emerging &#x201C;AI-Mediated Era,&#x201D; in which generative AI chatbots, intelligent tutoring systems, biometric-adaptive platforms, and AI-orchestrated virtual and augmented reality environments are dynamic and dialogic, capable in principle of tailoring support to real-time learner states.</p>
<p>In the Interactive Era, interactivity was essentially reactive and bounded by pre-authored content, fixed branching, and rule-based feedback. Computer-assisted language learning software and online language courses grew in popularity, offering learners a range of interactive and engaging activities including multimedia presentations, language games, and virtual conversations (<xref ref-type="bibr" rid="ref25">Jiang et al., 2017</xref>; <xref ref-type="bibr" rid="ref27">Jones et al., 2017</xref>). However, these environments also presented challenges related to working memory: learners needed to manage cognitive load, cope with distractions, and navigate individual differences in working memory capacity to effectively learn new vocabulary, grammatical structures, and communication skills.</p>
<p>Within this paradigm, working memory was conceptualized primarily as a fixed individual-difference variable&#x2014;a learner property that moderated the extent to which individuals could benefit from complex multimedia and self-directed tasks. Research in this era, especially under the influence of Cognitive Load Theory (<xref ref-type="bibr" rid="ref50">Sweller et al., 2019</xref>), focused on mitigating the risk of overload by managing information density, controlling redundancy, sequencing tasks appropriately, and coordinating modalities to support dual-channel processing (<xref ref-type="bibr" rid="ref38">Mutlu-Bayraktar et al., 2019</xref>). The underlying assumption was that good instructional design should stay within working memory limits; working memory itself was not something the system could sense, measure, or adapt to in real time.</p>
<p>Before proceeding, it is essential to distinguish among three theoretically and empirically distinct WM constructs that are often conflated in the literature (<xref ref-type="bibr" rid="ref55">Unsworth and Engle, 2007</xref>; <xref ref-type="bibr" rid="ref15">Gathercole and Alloway, 2008</xref>). WM capacity refers to the relatively stable, trait-like limit on the amount of information an individual can simultaneously maintain and manipulate, typically assessed through span tasks (e.g., digit span, reading span, operation span). WM utilization (or efficiency) refers to how effectively available WM resources are deployed under varying task conditions, including strategic allocation, attentional control, and resistance to interference&#x2014;a process-level construct that can vary within individuals across contexts. WM training (or plasticity) refers to the potential for systematic practice to enhance WM capacity or efficiency, typically examined through extended training paradigms and transfer assessments. These three constructs carry distinct implications for instructional design: capacity sets an upper bound that instruction must respect, utilization determines how close to that bound learners can perform under given conditions, and plasticity determines whether instructional interventions can expand the bound itself. Critically, these distinctions have direct relevance for AI-mediated instruction, as AI systems might (a) respect capacity limits by dynamically adjusting task complexity, (b) improve utilization by optimizing scaffolding and reducing extraneous load, or (c) support plasticity through extended adaptive training regimens.</p>
<p>Empirical work in interactive environments repeatedly underscored this constraint. <xref ref-type="bibr" rid="ref57">Varol and Er&#x00E7;etin (2021)</xref> showed that gloss type and position affected comprehension, with higher working memory capacity associated with better outcomes in hypermedia reading. <xref ref-type="bibr" rid="ref20">Hong et al. (2021)</xref> reported that intrinsic cognitive load negatively affected flow experience, while gameplay interest positively affected flow in game-based environments. <xref ref-type="bibr" rid="ref54">T&#x00FC;rk and Er&#x00E7;etin (2014)</xref> found that interactive glosses were more effective than simultaneous glosses in promoting reading comprehension and incidental vocabulary learning, suggesting that better alignment with working memory limitations improves learning. <xref ref-type="bibr" rid="ref2">Aryadoust (2020)</xref> reported that higher working memory capacity was associated with better performance on computerized while-listening tests. Across such studies, working memory consistently emerged as a predictor of success rather than a target for real-time intervention.</p>
<p>The arrival of powerful AI systems in language education is reshaping this landscape fundamentally. AI-mediated environments now include generative AI chatbots such as ChatGPT, Microsoft Copilot, and DeepSeek that can participate in open-ended dialogue, generate exemplars, and provide context-sensitive feedback. They also include adaptive learning platforms that adjust content difficulty, pacing, and support based on learner performance and, in advanced implementations, biometric indicators such as eye-tracking and physiological monitoring. Virtual and augmented reality environments combine immersive technologies with AI-driven content generation for multimodal, embodied language experiences (<xref ref-type="bibr" rid="ref46">Squires, 2017</xref>). Intelligent personal assistants such as Amazon Alexa, Google Assistant, and Apple Siri enable voice-activated spoken interaction for listening and speaking practice.</p>
<p>These technologies are not limited to presenting pre-authored content; they can analyze learner input and behavior, detect patterns, and respond with adaptive scaffolding (<xref ref-type="bibr" rid="ref8">Chen et al., 2020</xref>; <xref ref-type="bibr" rid="ref60">Xu, 2025</xref>). This capability invites a reconceptualization of working memory from a static constraint to a dynamic target for adaptive compensation and regulation. Working memory becomes not only &#x201C;something to design around&#x201D; but potentially &#x201C;something to design to&#x201D;&#x2014;a parameter that AI systems might measure, infer, and respond to. This evolution reintroduces the principle of Aptitude-Treatment Interaction, suggesting that AI systems could personalize instructional support based on a learner&#x2019;s cognitive profile in ways previously impossible (<xref ref-type="bibr" rid="ref42">Sana and Fenesi, 2025</xref>).</p>
<p>Initial empirical evidence points to both the promise and complexity of this technological shift. At a macro level, meta-analytic findings suggest that AI-assisted learning produces large overall effects (d&#x202F;=&#x202F;1.17) relative to non-AI conditions, with particularly strong effects for vocabulary (d&#x202F;=&#x202F;2.21) and receptive skills (d&#x202F;=&#x202F;2.01). Notably, K&#x2013;12 learners&#x2014;whose working memory is still developing&#x2014;show descriptively larger gains (d&#x202F;=&#x202F;1.445) than college students (d&#x202F;=&#x202F;0.988), a pattern consistent with a compensatory function of AI&#x2019;s adaptive pacing for learners with less mature working memory capacity (<xref ref-type="bibr" rid="ref62">Xu et al., 2025</xref>).</p>
<p>At a finer-grained level, recent AI-mediated interventions demonstrate that AI does not simply reduce cognitive load in the way traditional multimedia design sought to achieve; rather, it can redistribute load across working memory subsystems. Generative AI tools appear to offload lower-level encoding demands traditionally taxing the phonological loop while substantially increasing central executive demands for critical evaluation, prompt management, and integration of AI-generated content (<xref ref-type="bibr" rid="ref14">Fan and Yao, 2025</xref>). Biometric-adaptive reading systems can preempt overload by adjusting difficulty in real time before errors accumulate (<xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>), while AI-driven multimodality enhances learning up to an empirically defined channel boundary of three to four concurrent sources, beyond which sensory overload undermines retention (<xref ref-type="bibr" rid="ref64">Yu, 2025</xref>). AI-mediated data-driven learning can offload the search burden that traditionally taxed working memory in concordancing tasks, theoretically freeing resources for deeper noticing and internalization (<xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary, 2024</xref>).</p>
<p>Parallel evidence from neurocognitive research suggests that working memory itself may be more malleable than earlier static trait models implied. Computer-based multisensory working memory training using language materials has been shown to enhance both working memory capacity and second language ability, with transfer effects to novel auditory tasks and increased dorsolateral prefrontal cortex efficiency (<xref ref-type="bibr" rid="ref16">Gkintoni et al., 2025</xref>). Additionally, evidence suggests that verbal, rather than spatial, working memory is particularly critical for second language achievement: high-achieving second language Chinese learners significantly outperformed low achievers on forward and backward digit span tasks but showed no differences in spatial working memory, with achievement correlating strongly with backward span (<xref ref-type="bibr" rid="ref59">Xiao et al., 2025</xref>). These findings have direct implications for what aspects of working memory AI systems should prioritize scaffolding (<xref ref-type="bibr" rid="ref47">Stakanova, 2023</xref>).</p>
<p>Despite these advances, a critical empirical gap tempers the promise of AI-mediated working memory support. Systematic reviews consistently document a measurement paradox: as AI in language education invokes working memory and cognitive load more explicitly, it measures working memory less directly. <xref ref-type="bibr" rid="ref6">Chalmers et al. (2021)</xref>, analyzing 111&#x202F;s language aptitude studies spanning six decades, found working memory among the most frequently examined cognitive predictors (8.1% of independent variables) yet identified no AI-mediated interventions explicitly targeting or measuring working memory. <xref ref-type="bibr" rid="ref66">Zhang and Aubrey (2024)</xref> found only one study examining working memory as an individual-difference factor across all second language pragmatics research, with no AI-mediated pragmatics studies including working memory measures. <xref ref-type="bibr" rid="ref17">Goh and Aryadoust (2025)</xref> reported that while intelligent personal assistants and generative AI are increasingly used for second language listening and speaking, none of the reviewed AI studies included working memory tasks or aptitude-treatment interaction analyses. <xref ref-type="bibr" rid="ref60">Xu (2025)</xref> noted that intelligent personal assistants are widely theorized to engage working memory through real-time communication practice, but direct empirical evidence on AI-mediated working memory outcomes remains absent.</p>
<p>Even the AI-mediated primary studies with the strongest cognitive focus rely on cognitive load scales and process indicators as proxies rather than direct working memory assessment. This creates a causal gap: without measuring working memory directly, researchers cannot determine whether AI reduces task demands (a design property), enhances functional working memory utilization (a processing property), or improves working memory capacity itself (a training property). Evidence that working memory may influence learning indirectly through proficiency (<xref ref-type="bibr" rid="ref53">Teng, 2024</xref>) further complicates the picture, suggesting that working memory&#x2013;AI relationships may involve mediation pathways that require sophisticated longitudinal and aptitude-treatment interaction designs to untangle.</p>
<p>Against this backdrop, the present systematic review is explicitly structured as a historical-comparative analysis of working memory in technology-enhanced language learning. To address the conceptual and empirical gaps identified above, the review is organized around three research questions:</p>
<disp-quote>
<p>RQ1 (Design Guidelines): What instructional design features of interactive language learning environments (2010&#x2013;2024) support working memory efficiency, and which features challenge or overload working memory capacity?</p>
</disp-quote>
<disp-quote>
<p>RQ2 (WM&#x202F;&#x00D7;&#x202F;AI Affordances Interactions): How do AI-mediated language learning affordances&#x2014;including generative chatbots, biometric-adaptive systems, and multimodal VR&#x2013;AR platforms&#x2014;interact with working memory processes, and do these interactions differ qualitatively from those observed in traditional interactive environments?</p>
</disp-quote>
<disp-quote>
<p>RQ3 (Boundary Conditions and Unintended Consequences): What are the empirical boundary conditions of AI-mediated working memory support, and what unintended consequences&#x2014;including cognitive load redistribution, measurement gaps, and potential over-scaffolding (i.e., overprotection) with downstream implications for metacognitive monitoring and executive-skill development&#x2014;emerge from AI integration in language learning?</p>
</disp-quote>
<p>Corresponding to these research questions, the review pursues three objectives. The first objective (aligned with RQ1) is to synthesize findings from a corpus of 27 empirical studies on the challenges and affordances of working memory in traditional, non-AI interactive language learning environments from 2010 to 2024, including computer-assisted language learning software, hypermedia, online platforms, and multimedia, thereby deriving evidence-based design guidelines for WM-sensitive instruction. The second objective (aligned with RQ2) is to juxtapose these findings with evidence from four recent AI-mediated primary empirical studies (<xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary, 2024</xref>; <xref ref-type="bibr" rid="ref14">Fan and Yao, 2025</xref>; <xref ref-type="bibr" rid="ref64">Yu, 2025</xref>; <xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>) that directly examine cognitive load and working memory-relevant processes in AI-assisted writing, adaptive reading, multimodal vocabulary learning, and AI-mediated data-driven learning, analyzing how AI affordances interact with WM subsystems in ways that differ from pre-AI technologies. The third objective (aligned with RQ3) is to identify the empirical boundary conditions under which AI-mediated support optimizes versus undermines WM functioning, and to document unintended consequences&#x2014;particularly the cognitive load redistribution from encoding to evaluation, the measurement paradox whereby WM is increasingly invoked but decreasingly measured, and potential over-scaffolding (i.e., overprotection) with downstream implications for metacognitive monitoring and executive-skill development&#x2014;that carry implications for future research and practice.</p>
<p>To systematize the relationship between WM constructs and AI affordances, <xref ref-type="table" rid="tab1">Table 1</xref> presents a conceptual framework mapping the three WM constructs (capacity, utilization, and training/plasticity) to four key AI affordances (adaptivity, multimodality, generative support, and feedback timing). For each pairing, the framework specifies the theorized mechanism and articulates a testable prediction suitable for future empirical investigation. For example, AI-driven adaptivity is hypothesized to respect capacity limits by dynamically adjusting task complexity, leading to the testable prediction that learners with lower WM capacity will show larger performance gains under adaptive versus fixed-difficulty conditions. Similarly, multimodal AI environments may improve WM utilization by distributing load across subsystems (phonological loop, visuospatial sketchpad), predicting that optimal retention will occur within a bounded channel range (e.g., 3&#x2013;4 concurrent sources) with diminishing returns beyond this threshold. Generative AI support may offload lower-level encoding processes while increasing central-executive demands, predicting construct-specific load redistribution measurable via differentiated cognitive load instruments. Finally, preemptive feedback timing enabled by biometric AI may prevent utilization failures before they accumulate, predicting superior outcomes compared to reactive (post-error) adaptation. This framework enables researchers to formulate and test construct-specific hypotheses rather than treating WM as an undifferentiated variable, thereby advancing theoretical precision in the study of AI-mediated language learning.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Corpus structure summary.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Corpus layer</th>
<th align="center" valign="top">Number of studies</th>
<th align="center" valign="top">Time period</th>
<th align="left" valign="top">Technology type</th>
<th align="left" valign="top">Working memory conceptualization</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Interactive corpus</td>
<td align="center" valign="middle">27</td>
<td align="char" valign="middle" char="&#x2013;">2010&#x2013;2024</td>
<td align="left" valign="middle">Traditional CALL, multimedia, online platforms, hypermedia, captioned video</td>
<td align="left" valign="middle">Working memory as a constraint to design around; occasionally measured directly via span tasks</td>
</tr>
<tr>
<td align="left" valign="middle">AI-mediated cluster</td>
<td align="center" valign="middle">4</td>
<td align="char" valign="middle" char="&#x2013;">2024&#x2013;2025</td>
<td align="left" valign="middle">Generative AI chatbots, biometric-adaptive platforms, VR-AR with LLM integration, AI-mediated DDL</td>
<td align="left" valign="middle">Working memory as a target for compensation and regulation; primarily assessed via cognitive load proxies and process data</td>
</tr>
<tr>
<td align="left" valign="middle">Total primary empirical</td>
<td align="center" valign="middle">31</td>
<td align="char" valign="middle" char="&#x2013;">2010&#x2013;2025</td>
<td align="left" valign="middle">&#x2013;</td>
<td align="left" valign="middle">&#x2013;</td>
</tr>
<tr>
<td align="left" valign="middle">Contextual literature</td>
<td align="center" valign="middle">~10</td>
<td align="char" valign="middle" char="&#x2013;">2021&#x2013;2025</td>
<td align="left" valign="middle">Meta-analyses, systematic reviews, theoretical and neurocognitive papers</td>
<td align="left" valign="middle">Interpretive context; documents the working memory measurement gap and motivates ATI-oriented research</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>CALL, computer-assisted language learning; VR-AR, virtual reality and augmented reality; LLM, large language model; DDL, data-driven learning; ATI, aptitude-treatment interaction.</p>
</table-wrap-foot>
</table-wrap>
<p>The broader AI-related literature&#x2014;including meta-analyses such as <xref ref-type="bibr" rid="ref62">Xu et al. (2025)</xref>, systematic reviews (<xref ref-type="bibr" rid="ref6">Chalmers et al., 2021</xref>; <xref ref-type="bibr" rid="ref16">Gkintoni et al., 2025</xref>; <xref ref-type="bibr" rid="ref17">Goh and Aryadoust, 2025</xref>; <xref ref-type="bibr" rid="ref66">Zhang and Aubrey, 2024</xref>), and theoretical frameworks (<xref ref-type="bibr" rid="ref42">Sana and Fenesi, 2025</xref>; <xref ref-type="bibr" rid="ref60">Xu, 2025</xref>)&#x2014;is used to contextualize these 31 core empirical studies but is not counted in the primary empirical corpus. Additional empirical work examining working memory-relevant constructs in non-AI or AI-adjacent contexts, such as <xref ref-type="bibr" rid="ref53">Teng (2024)</xref> and <xref ref-type="bibr" rid="ref59">Xiao et al. (2025)</xref>, further informs the interpretive framework.</p>
<p>By comparing the 27-study Interactive Era corpus with the 4-study AI-Mediated cluster and situating both within this broader context, the review seeks to identify continuities (what remains true across eras), transformations (what has fundamentally changed), and persistent measurement gaps (what researchers still cannot determine) in how working memory is conceptualized, engaged, and empirically assessed as language education transitions into the AI era.</p>
</sec>
<sec sec-type="methods" id="sec3">
<label>2</label>
<title>Method</title>
<p>This systematic review is reported in accordance with the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) 2020 Statement (<xref ref-type="bibr" rid="ref39">Page et al., 2021</xref>). <xref ref-type="fig" rid="fig1">Figure 1</xref> provides the PRISMA 2020 flow diagram documenting identification, screening, eligibility, and inclusion across both Phase 1 (Interactive Corpus) and Phase 2 (AI-Mediated Cluster), including categorized reasons for full-text exclusion with exact counts. The completed PRISMA 2020 checklist with page/section cross-references is provided in <xref ref-type="supplementary-material" rid="SM1">Appendix A</xref> (PRISMA 2020 Checklist). Because the review is explicitly historical&#x2013;comparative and constructs two analytically distinct corpora (Interactive vs. AI-mediated) rather than estimating a single pooled effect, synthesis is conducted within each corpus prior to cross-era comparison, and contextual (non-primary) literature used for interpretation is reported separately from the primary empirical study count.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>PRISMA 2020 flow diagram of study identification, screening, eligibility, and inclusion across phase 1 (interactive corpus) and phase 2 (AI-mediated cluster), including categorized reasons for full-text exclusions with exact counts.</p>
</caption>
<graphic xlink:href="fpsyg-17-1758104-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">PRISMA 2020 flow diagram showing the identification, screening, and inclusion process for systematic review studies. Out of 528 records identified from both databases and other methods, duplicates and exclusions were removed through each step, resulting in 31 studies included, with 27 in the Interactive Corpus and 4 in the AI-Mediated Cluster.</alt-text>
</graphic>
</fig>
<p>This study adopts a two-phase systematic review design with an explicit historical-comparative structure. The goal is not to collapse all studies into a single pooled estimate but to construct two analytically distinct yet comparable corpora, each analyzed in relation to the three guiding research questions (RQ1: design guidelines; RQ2: WM&#x202F;&#x00D7;&#x202F;AI affordances interactions; RQ3: boundary conditions and unintended consequences): an Interactive Corpus of 27 empirical studies examining working memory in traditional, non-AI technology-enhanced language learning environments from 2010 to 2024, and an AI-Mediated Cluster of four empirical studies from 2024 to 2025 providing detailed evidence on cognitive load and working memory-relevant processes in AI-mediated English as a foreign language and second language learning. These 31 studies constitute the primary empirical dataset. Additional meta-analytic, systematic review, theoretical, and AI-adjacent empirical work is drawn upon to interpret patterns and highlight gaps but is not included in the numerical count of the primary empirical corpus.</p>
<p>Phase 1 focused on the pre-AI Interactive Era. A systematic search was conducted for studies published between 2010 and 2024 that investigated working memory in interactive, technology-enhanced language learning contexts. Searches were conducted in major databases including Web of Science, Scopus, and ERIC. The core search string combined working memory with generic interactive technology terms: &#x201C;working memory&#x201D; AND (&#x201C;computer-assisted language learning&#x201D; OR &#x201C;interactive language learning&#x201D; OR &#x201C;online language learning&#x201D;). This string was adapted to each database&#x2019;s syntax while preserving its conceptual structure. Limits were set to peer-reviewed journal articles, English language, and second or foreign language learning contexts. Reference lists of eligible papers and key theoretical and review articles were screened through backward citation tracking to identify additional studies that met inclusion criteria but might not have been captured due to terminology variation.</p>
<p>In total, 372 records were initially identified through database searching and other sources (<xref ref-type="fig" rid="fig1">Figure 1</xref>). Titles and abstracts were screened for relevance, leading to the exclusion of 162 records that clearly did not concern working memory or technology-enhanced language learning. The remaining 196 full-text articles were assessed for eligibility against predefined inclusion and exclusion criteria. After this full-text screening, 169 articles were excluded (categorized reasons and counts are reported in <xref ref-type="fig" rid="fig1">Figure 1</xref>).</p>
<p>Studies were included in the Interactive Corpus if they reported empirical data (experimental, quasi-experimental, correlational, or mixed-methods) on language learning or performance in interactive, technology-enhanced environments such as computer-assisted language learning software, multimedia platforms, hypermedia, non-AI captioned video, or online courses. Studies were also required to explicitly discuss working memory, either conceptually by framing results in terms of working memory demands or capacity, or empirically by using working memory measures as predictors, moderators, or covariates in relation to language learning outcomes. Additionally, studies needed to provide sufficient methodological detail to allow assessment of sample characteristics, instructional context, technological environment, and outcome measures. Studies were excluded if they focused exclusively on non-interactive or purely traditional face-to-face instruction without technology, did not mention working memory or working memory-related constructs, were purely theoretical or descriptive without empirical data, or implemented AI-mediated instruction, as these were reserved for Phase 2 screening.</p>
<p>Applying these criteria yielded 27 empirical studies constituting the Interactive Corpus. These studies all predate the widespread deployment of generative AI and biometrically adaptive systems in language education and do not involve AI-mediated instruction. Technologies examined include conventional multimedia computer-assisted language learning, hypermedia environments, captioned video without AI, online learning platforms, language games, and traditional data-driven learning using concordancers.</p>
<p>Phase 2 targeted the emerging AI-Mediated Era, focusing on identifying primary empirical interventions that both implemented AI-mediated language learning and provided rich cognitive load or working memory-relevant data suitable for comparison with the Interactive Corpus. To capture the rapidly emerging AI landscape, the search strategy was expanded to include AI-specific terminology alongside working memory-related constructs. Searches were conducted through 2025 in the same core databases, using a representative string such as: (&#x201C;working memory&#x201D; OR &#x201C;cognitive load&#x201D;) AND (&#x201C;artificial intelligence&#x201D; OR &#x201C;AI-assisted&#x201D; OR &#x201C;generative AI&#x201D; OR &#x201C;chatbot&#x201D; OR &#x201C;intelligent tutoring&#x201D; OR &#x201C;adaptive learning&#x201D; OR &#x201C;AI-mediated&#x201D; OR &#x201C;virtual reality language learning&#x201D;). Additional targeted searches used combinations such as &#x201C;cognitive load&#x201D; AND &#x201C;AI-assisted writing,&#x201D; &#x201C;biometric feedback&#x201D; AND &#x201C;language learning,&#x201D; &#x201C;VR&#x201D; OR &#x201C;AR&#x201D; AND &#x201C;AI&#x201D; AND &#x201C;vocabulary learning,&#x201D; &#x201C;LLM&#x201D; AND &#x201C;second language acquisition,&#x201D; and &#x201C;working memory training&#x201D; AND &#x201C;L2.&#x201D; These searches were supplemented by forward and backward citation tracking from key AI-in-education and AI-in-second language acquisition publications, as well as manual scanning of recent issues of high-relevance journals including Computer Assisted Language Learning, Language Learning &#x0026; Technology, ReCALL, and Frontiers in Psychology. Identification, screening, eligibility, and inclusion counts for Phase 2 are reported in <xref ref-type="fig" rid="fig1">Figure 1</xref> to document the full two-phase selection process in PRISMA 2020 format.</p>
<p>To retain a tight, analytically coherent cluster for detailed comparative analysis, Phase 2 applied more restrictive criteria. Studies were included in the AI-Mediated Cluster if they implemented AI-mediated language learning interventions such as generative AI-assisted writing, AI-orchestrated virtual and augmented reality instruction, biometric-adaptive AI reading platforms, or AI-mediated data-driven learning. Studies were also required to report quantitative data on cognitive load and/or working memory-relevant processing, including validated cognitive load scales, detailed subscale analyses, or process indicators such as eye-tracking interpreted in terms of working memory demands. Additionally, studies needed to involve English as a foreign language or second language learners in classroom-like or ecologically valid learning settings, and to provide sufficient statistical detail including means, standard deviations, effect sizes, F-statistics, partial eta squared values, and beta coefficients to support comparative analysis. Studies were excluded if they were systematic reviews, meta-analyses, or theoretical papers without primary empirical data, as these were used for contextual interpretation only. Studies were also excluded if they did not include cognitive load or working memory-relevant measures or examined AI in non-language-learning contexts. Based on these criteria, four primary empirical studies were retained as the AI-Mediated Cluster (<xref ref-type="fig" rid="fig1">Figure 1</xref>).</p>
<p><xref ref-type="bibr" rid="ref64">Yu (2025)</xref> conducted a pretest-posttest randomized controlled trial with 383 Chinese English as a foreign language learners examining AI-driven multimodal vocabulary instruction using virtual and augmented reality technologies including Unity, Oculus Quest 2, and Google MediaPipe combined with a generative AI chatbot based on ChatGPT-4. The study provides quantitative data on sensory overload boundaries and establishes an inverted-U relationship between channel complexity and retention. <xref ref-type="bibr" rid="ref65">Yuan (2025)</xref> conducted a 12-week randomized controlled trial with 300 Chinese English as a foreign language learners investigating an AI-enhanced biometric-adaptive reading platform called Smart Sparrow that integrates eye-tracking using Tobii Pro X3-120 and physiological indicators to dynamically adjust text difficulty. The study demonstrates preemptive cognitive regulation and provides detailed cognitive load data from the Differentiated Cognitive Load Questionnaire subscales alongside eye-tracking indicators. <xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref> conducted a scale development and validation study with 546 participants across exploratory and confirmatory factor analysis samples for the Cognitive Load Scale for AI-Assisted L2 Writing, examining Chinese English as a foreign language learners using a generative AI chatbot called DeepSeek V3.1 for argumentative writing. The study provides a detailed four-factor profile of cognitive load redistribution across Authorial Core Processing, Critical Evaluation, Prompt Management, and Integrative Synthesis. <xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref> conducted a three-arm randomized controlled trial with 93 Iranian advanced English as a foreign language learners comparing AI-mediated intelligent data-driven learning using Microsoft Copilot based on GPT-4, traditional corpus-based data-driven learning using AntConc, and conventional instruction for developing interactional metadiscourse markers. The study demonstrates very large effects for AI-mediated data-driven learning and theorizes working memory support via Schmidt&#x2019;s noticing hypothesis.</p>
<p>To provide interpretive context for the 31-study empirical corpus, additional sources were systematically reviewed but not included in the numerical study count. These include meta-analytic work such as <xref ref-type="bibr" rid="ref62">Xu et al. (2025)</xref>, who synthesized 15 AI-assisted second language learning studies with 2,156 participants and reported large overall effects and potential developmental differences consistent with working memory compensation. Systematic and narrative reviews documenting the working memory measurement gap and AI&#x2019;s cognitive implications were also consulted, including <xref ref-type="bibr" rid="ref6">Chalmers et al. (2021)</xref>, <xref ref-type="bibr" rid="ref66">Zhang and Aubrey (2024)</xref>, <xref ref-type="bibr" rid="ref17">Goh and Aryadoust (2025)</xref>, and <xref ref-type="bibr" rid="ref16">Gkintoni et al. (2025)</xref>. Theoretical and conceptual papers elaborating aptitude-treatment interaction frameworks and AI-related aptitude perspectives were reviewed, including <xref ref-type="bibr" rid="ref42">Sana and Fenesi (2025)</xref> and <xref ref-type="bibr" rid="ref60">Xu (2025)</xref>. Empirical AI-adjacent studies with working memory-relevant data that inform but are not central to the two main corpora were also considered, including <xref ref-type="bibr" rid="ref53">Teng (2024)</xref> and <xref ref-type="bibr" rid="ref59">Xiao et al. (2025)</xref>. These sources inform the Discussion and Future Directions sections, particularly the articulation of the measurement paradox and the argument for aptitude-treatment interaction-based AI research, but they do not alter the primary empirical synthesis count of 31 studies.</p>
<p>For each of the 31 empirical studies comprising 27 Interactive and 4 AI-Mediated, a standardized data-extraction template was used to capture bibliographic information including authors, year, journal, and country or region. Participant characteristics were recorded including sample size, age and educational level, first language, target language, and proficiency indicators. Instructional context was documented including course type, skills targeted, and duration and intensity of the intervention. Technology characteristics were captured including type of platform or tool, whether non-AI or AI-mediated, and presence of multimodality, adaptivity, or biometric integration. Working memory-related constructs and measures were recorded including direct working memory tests such as span tasks and n-back, cognitive load scales such as NASA-TLX, the Differentiated Cognitive Load Questionnaire, and the Cognitive Load Scale for AI-Assisted L2 Writing, and process indicators such as eye-tracking and physiological measures interpreted in terms of working memory demands. Study design was documented including experimental, quasi-experimental, correlational, scale development and validation, or mixed methods. Outcome measures were recorded including language performance such as vocabulary, reading comprehension, and writing quality, process indicators, and affective outcomes. Key findings were extracted specifically those related to working memory challenges such as overload and inequitable effects by working memory level and working memory affordances such as strategic use of working memory, dynamic regulation, and compensation.</p>
<p>Analysis proceeded in two stages. In the within-era synthesis stage, for the Interactive Corpus, themes were identified concerning how working memory was conceptualized as constraint, predictor, or moderator, how it was measured directly versus inferred, and how interactive design features affected working memory-related outcomes in terms of affordances versus overload. For the AI-Mediated Cluster, the focus was on how AI systems redistributed cognitive load across working memory subsystems, how they regulated working memory demands through reactive versus preemptive mechanisms, and what specific boundary conditions emerged such as channel limits in multimodal virtual and augmented reality instruction.</p>
<p>In the historical-comparative integration stage, findings from the two corpora were compared along dimensions defined by the conceptual framework, including working memory as constraint versus working memory as target for compensation and regulation, load reduction versus load redistribution, reactive adaptation versus preemptive regulation via biometrics, direct working memory measurement versus reliance on cognitive load proxies and process indicators, and general notions of overload risk versus empirically specified boundary conditions such as three to four concurrent channels in virtual and augmented reality instruction. This comparative structure underpins the Results, Discussion, and Implications sections, where the evolution of English as a foreign language education from static interactive environments to AI-mediated systems is analyzed in terms of continuities, transformations, and persistent empirical gaps regarding working memory.</p>
<p>The quality of the selected studies was assessed using the Mixed Methods Appraisal Tool (<xref ref-type="bibr" rid="ref41">Pluye et al., 2011</xref>), which is designed to evaluate qualitative, quantitative, and mixed-methods research. The Mixed Methods Appraisal Tool was used to appraise study design, data collection, analysis, and reporting. For randomized controlled trials within both corpora, additional evaluation employed the Cochrane Risk of Bias tool (<xref ref-type="bibr" rid="ref19">Higgins et al., 2021</xref>). For non-randomized quantitative studies, the Risk of Bias in Non-randomized Studies of Interventions tool (<xref ref-type="bibr" rid="ref48">Sterne et al., 2016</xref>) guided assessment of confounding, selection, measurement, and reporting biases.</p>
<p>Notable quality considerations, especially pertinent to the AI-Mediated Cluster and contextual meta-analytic work, include evidence of publication bias favoring positive AI effects, as indicated by funnel plot asymmetry in the meta-analysis by <xref ref-type="bibr" rid="ref62">Xu et al. (2025)</xref>. The concentration of AI-mediated primary studies in EFL contexts, specifically Chinese and Iranian learners, limits generalizability to other languages, regions, and educational systems. The persistent absence of direct working memory measurement in AI-mediated intervention studies, despite heavy reliance on working memory-related theorizing, represents a significant methodological limitation. Quality assessment was conducted independently by two reviewers, with discrepancies resolved through discussion and consensus (<xref ref-type="table" rid="tab2">Table 2</xref>).</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Corpus structure summary.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Corpus layer</th>
<th align="center" valign="top">Number of studies</th>
<th align="center" valign="top">Time period</th>
<th align="left" valign="top">Technology type</th>
<th align="left" valign="top">Working memory conceptualization</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Interactive corpus</td>
<td align="center" valign="middle">27</td>
<td align="char" valign="middle" char="&#x2013;">2010&#x2013;2024</td>
<td align="left" valign="middle">Traditional CALL, multimedia, online platforms, hypermedia, captioned video</td>
<td align="left" valign="middle">Working memory as a constraint to design around; occasionally measured directly via span tasks</td>
</tr>
<tr>
<td align="left" valign="middle">AI-mediated cluster</td>
<td align="center" valign="middle">4</td>
<td align="char" valign="middle" char="&#x2013;">2024&#x2013;2025</td>
<td align="left" valign="middle">Generative AI chatbots, biometric-adaptive platforms, VR-AR with LLM integration, AI-mediated DDL</td>
<td align="left" valign="middle">Working memory as a target for compensation and regulation; primarily assessed via cognitive load proxies and process data</td>
</tr>
<tr>
<td align="left" valign="middle">Total primary empirical</td>
<td align="center" valign="middle">31</td>
<td align="char" valign="middle" char="&#x2013;">2010&#x2013;2025</td>
<td align="left" valign="middle">&#x2014;</td>
<td align="left" valign="middle">&#x2014;</td>
</tr>
<tr>
<td align="left" valign="middle">Contextual literature</td>
<td align="center" valign="middle">~10</td>
<td align="char" valign="middle" char="&#x2013;">2021&#x2013;2025</td>
<td align="left" valign="middle">Meta-analyses, systematic reviews, theoretical and neurocognitive papers</td>
<td align="left" valign="middle">Interpretive context; documents the working memory measurement gap and motivates ATI-oriented research</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>CALL, computer-assisted language learning; VR-AR, virtual reality and augmented reality; LLM, large language model; DDL, data-driven learning; ATI, aptitude-treatment interaction.</p>
</table-wrap-foot>
</table-wrap>
<p>These methodological foundations support the subsequent comparative analysis of how working memory is challenged, supported, and in AI contexts dynamically reconfigured across the two technological eras.</p>
</sec>
<sec sec-type="results" id="sec4">
<label>3</label>
<title>Results</title>
<p>The review synthesized 31 primary empirical studies examining the challenges and affordances of working memory (WM) in technology-enhanced language learning. Twenty-seven studies formed the Interactive Corpus, representing traditional computer-assisted language learning (CALL) software, multimedia platforms, hypermedia environments, captioned video, and online learning systems published between 2010 and 2024. Four studies formed the AI-Mediated Cluster, representing generative AI chatbots, biometric-adaptive reading platforms, and VR&#x2013;AR systems with large language model (LLM) integration published between 2024 and 2025. Across the corpus, research designs included randomized controlled trials, quasi-experimental studies, correlational investigations, scale development and validation work, and mixed-methods designs, conducted in classrooms, online platforms, language laboratories, and immersive virtual environments with learners of varied ages, proficiency levels, and first-language backgrounds.</p>
<p>The results are organized to foreground both the established patterns from traditional interactive environments and the emerging patterns from AI-mediated contexts. For each theme, findings from the 27 Interactive Corpus studies are presented first, followed by examination of how these patterns are transformed, extended, or problematized in the AI-Mediated Cluster and related recent reviews. <xref ref-type="table" rid="tab3">Table 3</xref> provides a study-level comparative evidence map of all included primary empirical studies (<italic>n</italic>&#x202F;=&#x202F;31) and serves as the traceable basis for the cross-era synthesis reported in the Results section.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Comparative analysis of the 31 included studies (<italic>n</italic>&#x202F;=&#x202F;31).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Study (Author, year)</th>
<th align="left" valign="top">Technology type</th>
<th align="left" valign="top">Era</th>
<th align="left" valign="top">WM conceptualization</th>
<th align="left" valign="top">WM measurement approach</th>
<th align="left" valign="top">Outcome measures</th>
<th align="left" valign="top">Key findings</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref2">Aryadoust (2020)</xref>
</td>
<td align="left" valign="top">Computerized while-listening test + eye-tracking</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM capacity as predictor of computerized listening performance</td>
<td align="left" valign="top">Indirect: eye-tracking/behavioral indicators; WM invoked as individual difference</td>
<td align="left" valign="top">Listening performance; gaze metrics; answer changes</td>
<td align="left" valign="top">Higher WM capacity aligned with more efficient processing and stronger performance.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref4">Baralt (2015)</xref>
</td>
<td align="left" valign="top">Online language teaching/CMC with recasts</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM capacity as moderator of feedback uptake under complexity</td>
<td align="left" valign="top">Direct: WM capacity measure (span/complex WM)</td>
<td align="left" valign="top">L2 development after recasts; interactional performance</td>
<td align="left" valign="top">WM capacity and cognitive complexity jointly shaped responsiveness to recasts.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref5">Cevik and Altun (2016)</xref>
</td>
<td align="left" valign="top">Computer-assisted complex task with instructional strategy manipulation</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM as resource underpinning complex task performance; strategy&#x2013;WM fit</td>
<td align="left" valign="top">Direct: WM performance measure(s)</td>
<td align="left" valign="top">Task performance (accuracy/efficiency)</td>
<td align="left" valign="top">Task outcomes varied by WM performance and instructional strategy.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref11">Denhovska et al. (2016)</xref>
</td>
<td align="left" valign="top">Incidental L2 grammar learning under controlled input conditions</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM as individual-difference constraint on incidental acquisition</td>
<td align="left" valign="top">Direct: WM measure used as predictor/moderator</td>
<td align="left" valign="top">Grammar learning indices</td>
<td align="left" valign="top">Higher WM supported incidental grammar acquisition under frequency exposure.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref20">Hong et al. (2021)</xref>
</td>
<td align="left" valign="top">Game-based environment (remote association game)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Cognitive load as WM-demand factor shaping engagement (flow)</td>
<td align="left" valign="top">Indirect: intrinsic cognitive load self-report</td>
<td align="left" valign="top">Flow; performance progress during gameplay</td>
<td align="left" valign="top">Higher intrinsic load reduced flow; gameplay interest increased flow.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref21">Huffman and Hahn (2017)</xref>
</td>
<td align="left" valign="top">Memory-enhancement procedures in foreign language learning tasks</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM bottlenecks addressed via encoding/memory supports</td>
<td align="left" valign="top">Not direct WM: WM discussed conceptually</td>
<td align="left" valign="top">Vocabulary/learning performance; retention</td>
<td align="left" valign="top">Optimized memory procedures improved learning outcomes consistent with reduced WM bottlenecks.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref22">Hwang et al. (2012)</xref>
</td>
<td align="left" valign="top">Personalized educational computer game (adaptive to learning styles)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Design intended to manage cognitive load within WM limits</td>
<td align="left" valign="top">Indirect/None: no direct WM task reported</td>
<td align="left" valign="top">Learning achievement; game-based performance</td>
<td align="left" valign="top">Personalized game design improved outcomes consistent with better cognitive fit.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref23">Hwang et al. (2013)</xref>
</td>
<td align="left" valign="top">Inquiry-based mobile learning model</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Cognitive load as proxy for WM demand in mobile inquiry</td>
<td align="left" valign="top">Indirect: cognitive load scale</td>
<td align="left" valign="top">Learning achievement; cognitive load</td>
<td align="left" valign="top">Inquiry-based mobile model influenced achievement and cognitive load profiles.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref25">Jiang et al. (2017)</xref>
</td>
<td align="left" valign="top">ELT multimedia courseware (multimedia learning environment)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Multimedia design as determinant of WM load (modality/segmentation)</td>
<td align="left" valign="top">Indirect: learner-evaluated multimedia design/cognitive load indicators</td>
<td align="left" valign="top">Courseware evaluation; learning/performance indices</td>
<td align="left" valign="top">CTML-aligned courseware features were associated with better learning usability and reduced overload.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref27">Jones et al. (2017)</xref>
</td>
<td align="left" valign="top">Smartphone-supported migrant language learning (field trial)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Mobile micro-learning to reduce WM burden and support sustained engagement</td>
<td align="left" valign="top">Not direct WM: WM discussed conceptually</td>
<td align="left" valign="top">Engagement/use patterns; language learning outcomes (field)</td>
<td align="left" valign="top">Smartphone design supported participation; task structure mitigated cognitive burden <italic>in situ</italic>.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref29">Kozan et al. (2015)</xref>
</td>
<td align="left" valign="top">Multimedia L2 text comprehension (input modality manipulation)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM capacity as moderator of modality effects in multimedia reading</td>
<td align="left" valign="top">Direct: WM capacity measure (span-based)</td>
<td align="left" valign="top">L2 text comprehension</td>
<td align="left" valign="top">Input modality effects on comprehension depended on WM capacity; optimal modality reduced overload.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref30">Kukulska-Hulme (2019)</xref>
</td>
<td align="left" valign="top">Mobile language learning in migrant contexts</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Mobile design to manage attentional/Wm demands in real-world learning</td>
<td align="left" valign="top">Not direct WM: conceptual framing</td>
<td align="left" valign="top">Design/implementation implications for mobile L2 learning</td>
<td align="left" valign="top">Highlights mobile design principles that reduce cognitive burden in authentic contexts.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref31">Lee (2021)</xref>
</td>
<td align="left" valign="top">Multimodal multiple-document reading + automated reflection report</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Scaffolding to support WM during multimodal integration</td>
<td align="left" valign="top">Indirect: performance + process prompts; WM inferred</td>
<td align="left" valign="top">Reading/integration performance; epistemic cognition indicators</td>
<td align="left" valign="top">Epistemic prompting and automated reflection supported multimodal integration and performance.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref33">Lin et al. (2022)</xref>
</td>
<td align="left" valign="top">Augmented-reality ubiquitous writing application (EFL)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">AR scaffolding to support WM during writing processes</td>
<td align="left" valign="top">Indirect: performance indicators; WM invoked via cognitive load rationale</td>
<td align="left" valign="top">Writing quality/performance</td>
<td align="left" valign="top">AR-supported writing improved outcomes consistent with scaffolded processing.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref36">Makransky et al. (2016)</xref>
</td>
<td align="left" valign="top">Virtual simulation environment (immersive learning)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Immersive simulation as source of cognitive load with potential skill gains</td>
<td align="left" valign="top">Indirect/None: WM inferred from learning performance</td>
<td align="left" valign="top">Skill learning and performance outcomes</td>
<td align="left" valign="top">Simulation-based preparation improved skill performance while highlighting cognitive demands of immersion.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref43">Scharinger et al. (2023)</xref>
</td>
<td align="left" valign="top">Gamified n-back WM task (EEG&#x202F;+&#x202F;eye-tracking)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM training/performance under gamification; attentional control</td>
<td align="left" valign="top">Direct: n-back performance; physiological/eye-tracking indices</td>
<td align="left" valign="top">n-back accuracy/RT; EEG; eye-tracking</td>
<td align="left" valign="top">Gamification altered engagement and neurocognitive markers with mixed implications for WM training efficiency.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref44">Schumacher and Ifenthaler (2018)</xref>
</td>
<td align="left" valign="top">Learning analytics-informed design (higher education)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Motivational dispositions as design inputs to optimize cognitive processing</td>
<td align="left" valign="top">Not direct WM: conceptual/empirical analytics framing</td>
<td align="left" valign="top">Learner dispositions; analytics-informed design implications</td>
<td align="left" valign="top">Motivational profiles inform analytics-driven personalization to support effective cognitive allocation.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref45">Silva Barbosa et al. (2023)</xref>
</td>
<td align="left" valign="top">Gamified logic tutoring system (stereotyped gamification)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Design features as determinants of cognitive-affective load</td>
<td align="left" valign="top">Indirect: affective/behavioral outcomes</td>
<td align="left" valign="top">Negative thinking; learning/performance in tutoring</td>
<td align="left" valign="top">Stereotyped gamification increased negative thinking, implying added non-instructional load.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref51">Teng (2019)</xref>
</td>
<td align="left" valign="top">Captioned videos for ESL comprehension</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM capacity as moderator of caption benefits</td>
<td align="left" valign="top">Direct: WM measure (span task)</td>
<td align="left" valign="top">Video comprehension</td>
<td align="left" valign="top">Captions supported comprehension with stronger benefits under adequate WM resources.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref52">Teng (2023)</xref>
</td>
<td align="left" valign="top">Captioned video for incidental vocabulary learning</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM capacity as predictor/moderator of incidental vocabulary gains</td>
<td align="left" valign="top">Direct: WM measure (span task)</td>
<td align="left" valign="top">Incidental vocabulary learning/retention</td>
<td align="left" valign="top">WM contributed to vocabulary learning and retention from captioned input.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref53">Teng (2024)</xref>
</td>
<td align="left" valign="top">Captioned video genres (incidental vocabulary)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM influences learning directly and indirectly via proficiency</td>
<td align="left" valign="top">Direct: Reading Span Task; proficiency as mediator</td>
<td align="left" valign="top">Vocabulary gains; proficiency</td>
<td align="left" valign="top">WM predicted proficiency, which in turn predicted vocabulary gains across video genres.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref54">T&#x00FC;rk and Er&#x00E7;etin (2014)</xref>
</td>
<td align="left" valign="top">Multimedia glosses (interactive vs. simultaneous) in L2 reading</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Presentation format to align demands with WM limits</td>
<td align="left" valign="top">Indirect/Direct: WM capacity considered in interpretation; gloss design reduces split attention</td>
<td align="left" valign="top">Reading comprehension; incidental vocabulary learning</td>
<td align="left" valign="top">Interactive gloss display improved comprehension and vocabulary relative to simultaneous display.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref57">Varol and Er&#x00E7;etin (2021)</xref>
</td>
<td align="left" valign="top">Electronic reading with gloss type/position manipulation</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM capacity moderates effectiveness of gloss designs</td>
<td align="left" valign="top">Direct: WM capacity measure</td>
<td align="left" valign="top">L2 reading comprehension</td>
<td align="left" valign="top">Gloss type and position interacted with WM capacity to influence comprehension.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref58">Weissheimer et al. (2019)</xref>
</td>
<td align="left" valign="top">Gamified L2 vocabulary learning (Vocabox)</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Engagement and cognitive demands in gamified vocabulary learning</td>
<td align="left" valign="top">Not direct WM: WM invoked conceptually</td>
<td align="left" valign="top">Vocabulary learning outcomes</td>
<td align="left" valign="top">Gamification supported vocabulary learning; design choices implicated cognitive load management.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref61">Xu and Xia (2021)</xref>
</td>
<td align="left" valign="top">Computer keystroke logging + process graphs in L2 writing</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Process scaffolding to reduce WM burden in writing development</td>
<td align="left" valign="top">Indirect: keystroke/process indicators</td>
<td align="left" valign="top">Writing process metrics; writing development outcomes</td>
<td align="left" valign="top">Process scaffolding supported writing development and clarified WM-intensive subprocesses.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref63">Yang et al. (2019)</xref>
</td>
<td align="left" valign="top">ANN-based computational modeling of cognitive abilities for English acquisition</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">WM-related cognitive abilities as predictors in individualized modeling</td>
<td align="left" valign="top">Indirect: modeled cognitive abilities (including WM-related inputs)</td>
<td align="left" valign="top">Predicted acquisition/performance indices</td>
<td align="left" valign="top">Modeling captured individual cognitive profiles relevant to acquisition, supporting personalization logic.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref67">Zhonggen et al. (2019)</xref>
</td>
<td align="left" valign="top">Mobile learning platform</td>
<td align="left" valign="top">Interactive</td>
<td align="left" valign="top">Cognitive load as proxy for WM demand in mobile learning</td>
<td align="left" valign="top">Indirect: cognitive load scale</td>
<td align="left" valign="top">Satisfaction; learning outcomes; cognitive load</td>
<td align="left" valign="top">Platform design influenced satisfaction and learning with measurable cognitive load differences.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref>
</td>
<td align="left" valign="top">AI-mediated intelligent data-driven learning (Microsoft Copilot) vs. AntConc vs. control</td>
<td align="left" valign="top">AI-mediated</td>
<td align="left" valign="top">AI offloads search burden; frees WM for noticing/internalization</td>
<td align="left" valign="top">Indirect: performance outcomes; WM inferred (no direct WM task)</td>
<td align="left" valign="top">EFL writing (interactional metadiscourse markers)</td>
<td align="left" valign="top">AI-mediated DDL produced very large writing gains relative to traditional DDL and control.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref>
</td>
<td align="left" valign="top">Generative AI-assisted L2 writing (DeepSeek)&#x202F;+&#x202F;scale development</td>
<td align="left" valign="top">AI-mediated</td>
<td align="left" valign="top">Load redistribution from encoding to evaluation/management (central executive)</td>
<td align="left" valign="top">Indirect: validated cognitive load scale (CL-AI-L2W)</td>
<td align="left" valign="top">Cognitive load subscales; writing-related process demands</td>
<td align="left" valign="top">Critical evaluation, prompt management, and synthesis loads exceeded encoding load; robust factor structure.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref64">Yu (2025)</xref>
</td>
<td align="left" valign="top">AI-driven VR-AR multimodal vocabulary instruction (ChatGPT-4 integration)</td>
<td align="left" valign="top">AI-mediated</td>
<td align="left" valign="top">AI enhances learning within channel-capacity limits; overload boundary conditions</td>
<td align="left" valign="top">Indirect: cognitive load/overload surveys; process-performance linkage</td>
<td align="left" valign="top">Vocabulary posttest/delayed posttest; overload indices</td>
<td align="left" valign="top">Large vocabulary gains with inverted-U channel effect; overload increased beyond 3&#x2013;4 channels.</td>
</tr>
<tr>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>
</td>
<td align="left" valign="top">Biometric-adaptive AI reading platform (Smart Sparrow + eye-tracking)</td>
<td align="left" valign="top">AI-mediated</td>
<td align="left" valign="top">Preemptive regulation of WM demands via biometric-based adaptation</td>
<td align="left" valign="top">Indirect: Differentiated Cognitive Load Questionnaire + eye-tracking indicators</td>
<td align="left" valign="top">Reading comprehension; cognitive load; eye-tracking metrics</td>
<td align="left" valign="top">Adaptive biometric regulation reduced reported load and improved comprehension relative to non-adaptive control.</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>This evidence-mapping table codes each primary empirical study by technology type, era (Interactive vs. AI-mediated), working-memory (WM) conceptualization, WM measurement approach (Direct&#x202F;=&#x202F;psychometric WM task; Indirect&#x202F;=&#x202F;cognitive-load (CL) scale/process proxy; None&#x202F;=&#x202F;WM invoked without an assessment), outcome measures, and WM-relevant findings. AI, artificial intelligence; ANN, artificial neural network; AR, augmented reality; CALL, computer-assisted language learning; CL, cognitive load; CL-AI-L2W, Cognitive Load Scale for AI-Assisted L2 Writing; CMC, computer-mediated communication; CTML, cognitive theory of multimedia learning; DDL, data-driven learning; EEG, electroencephalography; EFL, English as a foreign language; ESL, English as a second language; L2, second language; VR, virtual reality; VR-AR, virtual/augmented reality; WM, working memory.</p>
</table-wrap-foot>
</table-wrap>
<p>This table evidence-maps all included primary empirical studies using consistent coding rules to support transparent cross-era comparison. &#x201C;WM measurement approach&#x201D; is coded as Direct (psychometric WM task), Indirect (proxy indicators such as cognitive load scales and/or process measures interpreted as WM demand), or None (WM invoked conceptually without an empirical WM/CL operationalization). WM&#x202F;=&#x202F;working memory; AI&#x202F;=&#x202F;artificial intelligence.</p>
<sec id="sec5">
<label>3.1</label>
<title>Challenges</title>
<p>Across the corpus, WM-related challenges manifested in both traditional and AI-mediated environments, but with distinct profiles in each technological era.</p>
<p>In the Interactive Corpus, the most frequently documented challenge was cognitive load and information processing. Interactive environments routinely presented multiple streams of information&#x2014;text, audio, images, animations, hyperlinks&#x2014;often simultaneously (<xref ref-type="bibr" rid="ref35">Lusk et al., 2009</xref>). When sequencing was suboptimal or scaffolding insufficient, these conditions exceeded learners&#x2019; limited WM capacity, resulting in split attention, disorientation, and reduced learning outcomes (<xref ref-type="bibr" rid="ref29">Kozan et al., 2015</xref>). Learners with lower WM capacity were particularly disadvantaged under dense multimedia conditions, confirming core predictions of Cognitive Load Theory that learning deteriorates when intrinsic and extraneous load together exceed available resources.</p>
<p>Distractions and interruptions constituted a second pervasive challenge. Online and multimedia platforms made task-switching effortless, inviting off-task browsing, rapid window-shifting, and multitasking (<xref ref-type="bibr" rid="ref27">Jones et al., 2017</xref>; <xref ref-type="bibr" rid="ref30">Kukulska-Hulme, 2019</xref>). From an attentional control perspective, these environments diluted the sustained focus required for WM-dependent processing: learners who frequently shifted attention exhibited weaker retention and less accurate performance on comprehension and production tasks.</p>
<p>A third pattern concerned individual differences in WM capacity. Studies repeatedly showed that learners varied markedly in WM due to age, cognitive abilities, and prior language experience (<xref ref-type="bibr" rid="ref11">Denhovska et al., 2016</xref>). In line with the working memory model and related empirical work (<xref ref-type="bibr" rid="ref5">Cevik and Altun, 2016</xref>; <xref ref-type="bibr" rid="ref55">Unsworth and Engle, 2007</xref>; <xref ref-type="bibr" rid="ref57">Varol and Er&#x00E7;etin, 2021</xref>; <xref ref-type="bibr" rid="ref58">Weissheimer et al., 2019</xref>), individuals with higher WM capacity were more likely to benefit from complex hypermedia tasks, interactive glosses, and self-paced online activities, while lower-WM learners showed steeper performance decrements as task complexity and information density increased.</p>
<p>Language proficiency and task complexity interacted systematically with WM. When linguistic materials involved dense vocabulary, complex morphosyntax, or rapid input, lower-proficiency learners&#x2019; WM capacity was quickly saturated, particularly in multimedia and hyperlinked environments (<xref ref-type="bibr" rid="ref36">Makransky et al., 2016</xref>). Intrinsic cognitive load rose with linguistic complexity; in the absence of graded scaffolding, this produced floor effects for lower-proficiency learners and widened performance gaps between higher- and lower-WM profiles.</p>
<p>The corpus also documented technical difficulties and glitches as nontrivial WM stressors. Slow connections, system crashes, and unintuitive interfaces introduced extraneous cognitive load, diverting WM resources away from linguistic processing and toward troubleshooting (<xref ref-type="bibr" rid="ref18">Golonka et al., 2014</xref>; <xref ref-type="bibr" rid="ref23">Hwang et al., 2013</xref>; <xref ref-type="bibr" rid="ref50">Sweller et al., 2019</xref>). Learners repeatedly reported frustration and demotivation when technical issues co-occurred with demanding tasks.</p>
<p>Finally, anxiety and stress were shown to modulate WM functioning in technology-rich tasks. High-stakes computer-based tests, unfamiliar platforms, and visible performance metrics sometimes increased anxiety, which in turn impaired WM and task performance (<xref ref-type="bibr" rid="ref12">D&#x00F6;rnyei and Ushioda, 2021</xref>). Consistent with the Yerkes&#x2013;Dodson law, moderate arousal occasionally enhanced engagement, but sustained high stress undermined WM-dependent comprehension, production, and problem solving (<xref ref-type="bibr" rid="ref24">Ionescu and Vasc, 2014</xref>).</p>
<p>The AI-Mediated Cluster preserves many of these challenges but adds new layers of complexity. One important continuity&#x2014;with added precision&#x2014;is cognitive overload in multimodal environments. Whereas traditional studies treated overload as a diffuse risk in information-rich contexts, AI-mediated research begins to specify its quantitative boundary conditions. In a randomized controlled trial with 383 Chinese EFL learners, <xref ref-type="bibr" rid="ref64">Yu (2025)</xref> compared AI-driven VR&#x2013;AR multimodal vocabulary instruction (Unity, Oculus Quest 2, Google MediaPipe plus ChatGPT-4) to non-AI conditions. The AI multimodal group obtained much higher vocabulary scores than the control group (posttest M&#x202F;=&#x202F;137.00 vs. 76.05; Hedges&#x2019; g&#x202F;=&#x202F;1.24) and maintained substantial gains at delayed posttest (M&#x202F;=&#x202F;129.00). However, 61% of learners in the AI multimodal condition reported sensory overload, and overload scores were inversely correlated with retention (<italic>&#x03B2;</italic>&#x202F;=&#x202F;&#x2212;0.53, <italic>p</italic>&#x202F;=&#x202F;0.003). An inverted-U relationship emerged, with optimal performance at approximately three to four concurrent information channels (R<sup>2</sup>&#x202F;=&#x202F;0.41), a pattern consistent with capacity estimates in short-term memory research (<xref ref-type="bibr" rid="ref10">Cowan, 2001</xref>); beyond this threshold, WM appeared overwhelmed despite AI&#x2019;s adaptive capabilities. This pattern empirically confirms channel limits that earlier CALL research had mainly theorized.</p>
<p>More fundamentally, AI-mediated environments alter not only the amount but also the distribution of cognitive load. Whereas traditional designs sought to reduce total load by optimizing sequencing and modality, AI systems often redistribute load across WM subsystems. <xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref> developed and validated the Cognitive Load Scale for AI-Assisted L2 Writing (CL-AI-L2W; <italic>&#x03B1;</italic>&#x202F;=&#x202F;0.94) with 546 Chinese university EFL learners using the generative AI chatbot DeepSeek V3.1 for argumentative writing. Their four-factor model showed that Authorial Core Processing&#x2014;lexical retrieval and grammatical encoding&#x2014;had the lowest mean load (M&#x202F;=&#x202F;3.48 on a 7-point scale), whereas Critical Evaluation of AI-generated content had the highest (M&#x202F;=&#x202F;4.81), followed by Prompt Management (M&#x202F;=&#x202F;4.55) and Integrative Synthesis (M&#x202F;=&#x202F;4.40), with excellent fit indices (CFI&#x202F;=&#x202F;0.97, RMSEA&#x202F;=&#x202F;0.059). In contrast to the Interactive Corpus, where WM was primarily taxed at the level of input processing (phonological loop and visuospatial sketchpad), AI-assisted writing tasks shifted the main burden to central-executive functions&#x2014;monitoring, verification, and integrative reasoning. Learners lacking metacognitive or evaluative strategies may thus experience AI support as cognitively more demanding, even when encoding load is reduced (<xref ref-type="bibr" rid="ref32">Li, 2023</xref>).</p>
<p>A cross-cutting challenge that became particularly salient in the AI-Mediated Cluster is the empirical measurement gap. Despite six decades of L2 aptitude research establishing WM as a key predictor, AI studies rarely measure WM directly. <xref ref-type="bibr" rid="ref6">Chalmers et al. (2021)</xref> reported that WM was among the most frequently examined cognitive predictors (8.1% of independent variables) in 111 aptitude studies, yet they identified no AI-mediated interventions explicitly targeting or measuring WM. <xref ref-type="bibr" rid="ref66">Zhang and Aubrey (2024)</xref> found only one study including WM as an individual-difference factor across the entire L2 pragmatics literature, with no AI-mediated pragmatics studies assessing WM. <xref ref-type="bibr" rid="ref17">Goh and Aryadoust (2025)</xref> showed that research on intelligent personal assistants and generative AI for L2 listening and speaking almost never incorporates WM tasks or aptitude&#x2013;treatment interaction analyses. <xref ref-type="bibr" rid="ref60">Xu (2025)</xref> similarly noted that intelligent personal assistants are widely theorized to engage WM through real-time conversational practice, but direct empirical evidence of AI-mediated WM outcomes is lacking. Even in the AI-mediated primary studies with the strongest cognitive focus&#x2014;<xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>, <xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref>, and <xref ref-type="bibr" rid="ref64">Yu (2025)</xref>&#x2014;WM remains an inferred construct; cognitive load scales (Differentiated Cognitive Load Questionnaire, CL-AI-L2W, sensory overload surveys) and process indicators (eye-tracking) are used as proxies for WM engagement. By contrast, several Interactive Corpus studies employed direct WM assessments (e.g., digit spans, reading span tasks). As a result, just as AI research invokes WM and cognitive load more explicitly, it paradoxically measures WM less directly, creating a causal gap: it remains unclear whether AI reduces task demands, enhances functional use of existing WM, or changes WM capacity itself.</p>
<p>Domain-specific evidence further sharpens this picture. <xref ref-type="bibr" rid="ref59">Xiao et al. (2025)</xref> compared high- and low-achieving L2 Chinese learners (<italic>N</italic>&#x202F;=&#x202F;64) in an intensive immersion program and found significant group differences in forward digit span (M&#x202F;=&#x202F;5.94 vs. 4.80, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.05) and backward digit span (M&#x202F;=&#x202F;5.26 vs. 4.03, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.05), with backward span showing the strongest correlation with achievement (r&#x202F;=&#x202F;0.44, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.01). Spatial WM showed no group differences. These findings underscore that verbal rather than spatial WM is most critical for L2 success, a nuance particularly relevant for AI designs that might otherwise target generic &#x201C;cognitive load reduction&#x201D; rather than specific verbal and phonological processes.</p>
</sec>
<sec id="sec6">
<label>3.2</label>
<title>Affordances</title>
<p>Alongside these challenges, the corpus highlighted a rich set of WM-related affordances that interactive and AI-mediated environments can offer when appropriately designed.</p>
<p>In the Interactive Corpus, well-designed interactive and multimodal features supported richer encoding by providing complementary visual and auditory cues, thereby reducing reliance on a single channel and enabling more robust mental representations. Multimedia glosses, captioned video, and synchronized audio-text presentations were particularly effective when they minimized split attention and coordinated information presentation (<xref ref-type="bibr" rid="ref51">Teng, 2019</xref>, <xref ref-type="bibr" rid="ref52">2023</xref>).</p>
<p>Adaptive and personalized feedback emerged as another important affordance. Even relatively simple pre-AI adaptive mechanisms&#x2014;such as choice of difficulty level, branching based on performance, or targeted hints&#x2014;helped learners align task demands with their WM capacity and fostered a sense of competence and autonomy (<xref ref-type="bibr" rid="ref44">Schumacher and Ifenthaler, 2018</xref>).</p>
<p>Interactive environments also provided substantial opportunities for collaborative and social learning, enabling learners to share the cognitive burden of complex tasks through peer explanation, joint problem solving, and collaborative writing or reading (<xref ref-type="bibr" rid="ref1">Apps et al., 2019</xref>; <xref ref-type="bibr" rid="ref26">Johnson et al., 2014</xref>). In these contexts, WM demands could be distributed across group members, mitigating individual capacity limitations.</p>
<p>Multiple studies indicated that thoughtfully designed environments enhanced attention and engagement via gamification and motivational elements, which protected WM by reducing off-task behavior and supporting sustained focus (<xref ref-type="bibr" rid="ref7">Chan et al., 2022</xref>; <xref ref-type="bibr" rid="ref40">Parmaxi and Zaphiris, 2017</xref>). Similarly, tasks that explicitly activated prior knowledge and schema facilitated chunking and integration of new information, thereby improving WM efficiency (<xref ref-type="bibr" rid="ref21">Huffman and Hahn, 2017</xref>; <xref ref-type="bibr" rid="ref33">Lin et al., 2022</xref>).</p>
<p>Finally, flexible and adaptive learning opportunities&#x2014;such as self-paced modules, on-demand replay, and ubiquitous access via mobile devices&#x2014;allowed learners to regulate their own exposure and rehearsal schedules, reducing time pressure and enabling more strategic use of WM resources (<xref ref-type="bibr" rid="ref22">Hwang et al., 2012</xref>; <xref ref-type="bibr" rid="ref67">Zhonggen et al., 2019</xref>).</p>
<p>The AI-Mediated Cluster builds on these affordances and introduces qualitatively new possibilities. One of the most significant innovations is dynamic biometric-based adaptation. Traditional systems adjusted difficulty primarily after learners&#x2019; correctness or response time indicated struggle. In contrast, <xref ref-type="bibr" rid="ref65">Yuan (2025)</xref> implemented an AI-enhanced adaptive reading platform (Smart Sparrow) that used eye-tracking (Tobii Pro X3-120) and physiological monitoring to adjust text difficulty for 300 Chinese EFL learners over 12&#x202F;weeks. The AI-adaptive group outperformed a non-adaptive online reading control on PET reading comprehension (posttest M&#x202F;=&#x202F;82.6 vs. 70.2; <italic>F</italic>(1, 298)&#x202F;=&#x202F;118.34, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001, &#x03B7;<sup>2</sup>&#x202F;=&#x202F;0.28) and reported significantly lower overall cognitive load on the Differentiated Cognitive Load Questionnaire (M&#x202F;=&#x202F;3.1 vs. 4.0; F(1, 298)&#x202F;=&#x202F;61.45, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001, &#x03B7;<sup>2</sup>&#x202F;=&#x202F;0.17). Eye-tracking indices&#x2014;shorter fixation durations (250&#x202F;ms vs. 285&#x202F;ms), fewer regressions per 100 words (4.5 vs. 6.5), and longer saccades (8.2 vs. 7.1 characters)&#x2014;indicated more efficient processing. In effect, the platform operated as an external regulator of WM demands, adjusting difficulty during processing rather than waiting until errors accumulated.</p>
<p>Meta-analytic evidence suggests a further affordance in the form of compensation for developmental WM limitations. Synthesizing 15 AI-assisted L2 learning studies (N&#x202F;=&#x202F;2,156), <xref ref-type="bibr" rid="ref62">Xu et al. (2025)</xref> reported a large overall effect of AI-assisted instruction (g&#x202F;=&#x202F;1.167), with especially strong effects for vocabulary (d&#x202F;=&#x202F;2.210) and receptive skills (d&#x202F;=&#x202F;2.011). Notably, K&#x2013;12 learners showed descriptively larger gains (d&#x202F;=&#x202F;1.445, 95% CI [1.377, 1.512]) than college students (d&#x202F;=&#x202F;0.988, 95% CI [0.934, 1.042]), a pattern consistent with the idea that AI&#x2019;s adaptive pacing and scaffolding can compensate for still-developing WM. Although this difference did not reach conventional significance (<italic>p</italic>&#x202F;=&#x202F;0.066), it suggests that AI may reduce performance gaps associated with lower WM, aligning with theoretical calls for AI-enabled Aptitude&#x2013;Treatment Interaction designs (<xref ref-type="bibr" rid="ref42">Sana and Fenesi, 2025</xref>).</p>
<p>AI systems also enable scaffolded noticing through AI-mediated data-driven learning (DDL). Traditional DDL requires learners to manually query corpora, scan concordance lines, and infer patterns, all of which impose heavy WM demands. <xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref> compared three conditions for 93 advanced Iranian EFL learners: AI-mediated intelligent DDL using Microsoft Copilot (GPT-4-based), traditional corpus-based DDL using AntConc, and conventional instruction. The AI-mediated group dramatically outperformed both comparison groups on the realization of interactional metadiscourse markers in writing (posttest M&#x202F;=&#x202F;16.16 vs. 12.00 vs. 11.38). ANCOVA revealed a very large group effect (<italic>F</italic>(2, 92)&#x202F;=&#x202F;168.04, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001, partial &#x03B7;<sup>2</sup>&#x202F;&#x2248;&#x202F;0.79), with Hedges&#x2019; g&#x202F;&#x2248;&#x202F;3.39 for AI versus control. While WM was not directly measured, the authors argued&#x2014;drawing on Schmidt&#x2019;s noticing hypothesis&#x2014;that Copilot&#x2019;s ability to locate, cluster, and explain usage patterns reduced the WM-intensive search burden inherent in manual corpus exploration, freeing WM resources for higher-level noticing and abstraction. This type of AI-facilitated pattern highlighting was not available in the traditional CALL tools represented in the Interactive Corpus.</p>
<p>AI-driven multimodality also appears to refine the affordances of multimedia input (<xref ref-type="bibr" rid="ref9">Cosentino and Giannakos, 2023</xref>). <xref ref-type="bibr" rid="ref64">Yu&#x2019;s (2025)</xref> VR&#x2013;AR study suggested that when channel complexity is held within the empirically defined range of three to four concurrent sources, dual-channel processing is optimized and retention improves by roughly one quarter relative to less complex conditions. At the same time, individual differences in sensory processing moderated the benefits of multimodal AI environments (<italic>&#x03B2;</italic>&#x202F;=&#x202F;0.31, <italic>p</italic>&#x202F;=&#x202F;0.007), indicating that optimal multimodal orchestration must be tuned not only to general WM limits but also to learner-specific processing profiles.</p>
<p>Findings from AI-adjacent multimedia research clarify indirect WM pathways. <xref ref-type="bibr" rid="ref53">Teng (2024)</xref>, in a captioned video study without AI, found that WM (Reading Span Task) did not directly predict incidental vocabulary learning but significantly predicted English proficiency (<italic>&#x03B3;</italic>&#x202F;=&#x202F;0.372, <italic>p</italic>&#x202F;=&#x202F;0.002), which in turn predicted vocabulary gains for comedy and educational video genres (<italic>&#x03B2;</italic>&#x202F;&#x2248;&#x202F;0.544&#x2013;0.551). This mediation pattern suggests that WM may support technology-mediated learning indirectly by supporting proficiency development, which then facilitates efficient processing of rich input. For AI design, this implies that systems that accelerate proficiency may indirectly enhance the functional contribution of WM, even if WM is not targeted as an outcome.</p>
<p>Taken together, the corpus portrays a transition from environments that mainly test the limits of WM (Interactive Corpus) to systems that can regulate, redistribute, and sometimes compensate for WM demands (AI-Mediated Cluster), albeit with important unresolved questions about how WM itself is affected (<xref ref-type="table" rid="tab4">Table 4</xref>).</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Affordances identified across technological eras.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Affordance</th>
<th align="left" valign="top">Operational definition/description</th>
<th align="left" valign="top">Evidence in interactive corpus (Study IDS)</th>
<th align="left" valign="top">Evidence in AI-mediated cluster (Study IDs)</th>
<th align="left" valign="top">Representative studies</th>
<th align="left" valign="top">Notes on mechanisms relevant to WM/cognitive load</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Multimodal support</td>
<td align="left" valign="middle">Provision of complementary visual and auditory cues that enable dual-channel encoding, reducing reliance on a single WM subsystem</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref51">Teng (2019</xref>, <xref ref-type="bibr" rid="ref52">2023)</xref>; <xref ref-type="bibr" rid="ref29">Kozan et al. (2015)</xref>; <xref ref-type="bibr" rid="ref35">Lusk et al. (2009)</xref>; <xref ref-type="bibr" rid="ref54">T&#x00FC;rk and Er&#x00E7;etin (2014)</xref>; <xref ref-type="bibr" rid="ref57">Varol and Er&#x00E7;etin (2021)</xref></td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref64">Yu (2025)</xref>&#x2014;AI-orchestrated VR&#x2013;AR with empirically bounded channel limits (3&#x2013;4 sources optimal)</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref52">Teng (2023)</xref>; <xref ref-type="bibr" rid="ref64">Yu (2025)</xref></td>
<td align="left" valign="middle">Engages phonological loop and visuospatial sketchpad in parallel; reduces split attention when modalities are coordinated; AI enables dynamic channel orchestration within empirically defined limits</td>
</tr>
<tr>
<td align="left" valign="middle">Adaptive feedback</td>
<td align="left" valign="middle">System-provided feedback that adjusts difficulty, pacing, or scaffolding based on learner performance or state</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref44">Schumacher and Ifenthaler (2018)</xref>; <xref ref-type="bibr" rid="ref22">Hwang et al. (2012)</xref>; <xref ref-type="bibr" rid="ref20">Hong et al. (2021)</xref></td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>&#x2014;biometric-adaptive platform using eye-tracking and physiological monitoring for preemptive regulation</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>; <xref ref-type="bibr" rid="ref44">Schumacher and Ifenthaler (2018)</xref></td>
<td align="left" valign="middle">Aligns task demands with available WM capacity; AI extends from reactive (post-error) to preemptive (during-processing) regulation; reduces extraneous load by preventing overload before errors accumulate</td>
</tr>
<tr>
<td align="left" valign="middle">Collaborative/social learning</td>
<td align="left" valign="middle">Opportunities for peer interaction that distribute cognitive burden across group members</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref1">Apps et al. (2019)</xref>; <xref ref-type="bibr" rid="ref26">Johnson et al. (2014)</xref></td>
<td align="left" valign="middle"><italic>Evidence gap</italic>: No AI-mediated studies in the cluster examined AI-facilitated collaboration with WM/cognitive load measures</td>
<td align="left" valign="middle">
<xref ref-type="bibr" rid="ref1">Apps et al. (2019)</xref>
</td>
<td align="left" valign="middle">Distributes WM demands across participants; enables joint problem-solving and shared explanation; AI potential for intelligent grouping and collaborative scaffolding remains untested</td>
</tr>
<tr>
<td align="left" valign="middle">Gamification/engagement</td>
<td align="left" valign="middle">Game-like elements (points, levels, challenges) that sustain attention and reduce off-task behavior</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref7">Chan et al. (2022)</xref>; <xref ref-type="bibr" rid="ref40">Parmaxi and Zaphiris (2017)</xref>; <xref ref-type="bibr" rid="ref58">Weissheimer et al. (2019)</xref>; <xref ref-type="bibr" rid="ref20">Hong et al. (2021)</xref></td>
<td align="left" valign="middle"><italic>Evidence gap</italic>: AI-mediated cluster did not include gamified AI interventions with WM/cognitive load assessment</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref7">Chan et al. (2022)</xref>; <xref ref-type="bibr" rid="ref20">Hong et al. (2021)</xref></td>
<td align="left" valign="middle">Protects WM by reducing distraction and supporting sustained focus; moderate arousal enhances engagement without exceeding WM capacity; AI-adaptive gamification with WM measures is an untested design space</td>
</tr>
<tr>
<td align="left" valign="middle">Schema/Prior Knowledge Activation</td>
<td align="left" valign="middle">Instructional features that explicitly connect new information to existing knowledge structures</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref21">Huffman and Hahn (2017)</xref>; <xref ref-type="bibr" rid="ref33">Lin et al. (2022)</xref></td>
<td align="left" valign="middle"><italic>Evidence gap</italic>: AI-mediated studies did not explicitly examine schema activation mechanisms</td>
<td align="left" valign="middle">
<xref ref-type="bibr" rid="ref21">Huffman and Hahn (2017)</xref>
</td>
<td align="left" valign="middle">Facilitates chunking and integration, improving WM efficiency; reduces intrinsic load by leveraging long-term memory; AI potential for personalized schema priming based on learner knowledge profiles is unexplored</td>
</tr>
<tr>
<td align="left" valign="middle">Flexible/Self-Paced Learning</td>
<td align="left" valign="middle">Learner control over pacing, replay, and access timing that enables strategic WM resource management</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref22">Hwang et al. (2012)</xref>; <xref ref-type="bibr" rid="ref67">Zhonggen et al. (2019)</xref>; <xref ref-type="bibr" rid="ref30">Kukulska-Hulme (2019)</xref></td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>&#x2014;AI dynamically adjusts pacing based on real-time biometric indicators</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>; <xref ref-type="bibr" rid="ref67">Zhonggen et al. (2019)</xref></td>
<td align="left" valign="middle">Reduces time pressure; allows rehearsal and consolidation; AI transforms flexibility from learner-controlled to system-optimized based on cognitive state indicators</td>
</tr>
<tr>
<td align="left" valign="middle">Biometric-Based Adaptation</td>
<td align="left" valign="middle">Real-time adjustment of task parameters based on physiological indicators (eye-tracking, arousal)</td>
<td align="left" valign="middle"><italic>Not present</italic>: Pre-AI systems lacked biometric integration for WM-relevant adaptation</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>&#x2014;eye-tracking indices (fixation duration, regressions, saccade length) used to regulate text difficulty</td>
<td align="left" valign="middle">
<xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>
</td>
<td align="left" valign="middle">Enables preemptive load regulation during processing; shorter fixations and fewer regressions indicate more efficient WM utilization; represents qualitative shift from reactive to anticipatory support</td>
</tr>
<tr>
<td align="left" valign="middle">Cognitive Load Redistribution</td>
<td align="left" valign="middle">Shifting cognitive demands from lower-level encoding to higher-order evaluation and integration</td>
<td align="left" valign="middle"><italic>Not applicable</italic>: Interactive tools reduced load but did not systematically redistribute across WM subsystems</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref>&#x2014;CL-AI-L2W scale demonstrates redistribution from Authorial Core Processing (M&#x202F;=&#x202F;3.48) to Critical Evaluation (M&#x202F;=&#x202F;4.81), Prompt Management (M&#x202F;=&#x202F;4.55), and Integrative Synthesis (M&#x202F;=&#x202F;4.40)</td>
<td align="left" valign="middle">
<xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref>
</td>
<td align="left" valign="middle">Offloads phonological loop demands (lexical retrieval, grammatical encoding); increases central-executive demands (monitoring, verification, integration); learners lacking metacognitive strategies may experience net increase in perceived difficulty</td>
</tr>
<tr>
<td align="left" valign="middle">WM Compensation for Developmental Differences</td>
<td align="left" valign="middle">AI support that disproportionately benefits learners with lower or still-developing WM capacity</td>
<td align="left" valign="middle"><italic>Limited evidence</italic>: Interactive studies showed WM as predictor but not compensation target</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref62">Xu et al. (2025)</xref> meta-analysis&#x2014;K&#x2013;12 learners (d&#x202F;=&#x202F;1.445) showed descriptively larger gains than college students (d&#x202F;=&#x202F;0.988), consistent with compensatory function</td>
<td align="left" valign="middle">
<xref ref-type="bibr" rid="ref62">Xu et al. (2025)</xref>
</td>
<td align="left" valign="middle">AI adaptive pacing may reduce performance gaps associated with lower WM; aligns with ATI framework predictions; direct WM measurement needed to confirm compensation vs. demand reduction mechanisms</td>
</tr>
<tr>
<td align="left" valign="middle">Scaffolded Noticing via AI-DDL</td>
<td align="left" valign="middle">AI automation of corpus search and pattern highlighting that frees WM for higher-order abstraction</td>
<td align="left" valign="middle"><italic>Partial precedent</italic>: Traditional DDL (e.g., AntConc) required WM-intensive manual search</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref>&#x2014;AI-mediated DDL (Microsoft Copilot) dramatically outperformed traditional DDL (Hedges&#x2019; g&#x202F;&#x2248;&#x202F;3.39 vs. control)</td>
<td align="left" valign="middle">
<xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref>
</td>
<td align="left" valign="middle">Reduces WM burden of concordancing and pattern identification; theorized to free resources for noticing (Schmidt&#x2019;s hypothesis) and internalization; AI locates, clusters, and explains usage patterns automatically</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>WM, working memory; DDL, data-driven learning; ATI, aptitude&#x2013;treatment interaction; CL-AI-L2W, Cognitive Load Scale for AI-Assisted L2 Writing. Evidence gaps are explicitly marked where affordances lack empirical examination with WM or cognitive load measures in the respective corpus. Effect sizes and descriptive statistics are reported where available from primary studies. Mechanisms column indicates specific WM subsystems engaged (phonological loop, visuospatial sketchpad, central executive) and load types affected (intrinsic, extraneous, germane; <xref ref-type="table" rid="tab5">Table 5</xref>).</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Design principles to minimize cognitive overload.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Principle (actionable)</th>
<th align="left" valign="top">Theoretical rationale (WM/cognitive load linkage)</th>
<th align="left" valign="top">Supporting evidence</th>
<th align="left" valign="top">Application in interactive environments</th>
<th align="left" valign="top">Application in AI-mediated environments</th>
<th align="left" valign="top">Evidence strength/validation status</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Limit concurrent information channels to 3&#x2013;4 sources</td>
<td align="left" valign="middle">Exceeding channel capacity overwhelms the central executive and depletes phonological loop and visuospatial sketchpad resources simultaneously, consistent with <xref ref-type="bibr" rid="ref10">Cowan's (2001)</xref> magical number 4 limit for short-term memory chunks</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref64">Yu (2025)</xref>: Inverted-U relationship with optimal retention at 3&#x2013;4 channels (R<sup>2</sup>&#x202F;=&#x202F;0.41); 61% reported overload beyond threshold; <italic>&#x03B2;</italic>&#x202F;=&#x202F;&#x2212;0.53 for overload&#x2013;retention relationship</td>
<td align="left" valign="middle">Coordinate text, audio, and images to avoid split attention; minimize simultaneous animations and hyperlinks (<xref ref-type="bibr" rid="ref29">Kozan et al., 2015</xref>; <xref ref-type="bibr" rid="ref35">Lusk et al., 2009</xref>)</td>
<td align="left" valign="middle">AI systems should dynamically orchestrate multimodal input within empirically bounded limits; VR&#x2013;AR platforms must monitor and cap concurrent sensory streams</td>
<td align="left" valign="middle">Supported by experimental RCT (<xref ref-type="bibr" rid="ref64">Yu, 2025</xref>) and foundational WM theory (<xref ref-type="bibr" rid="ref10">Cowan, 2001</xref>); replicated across multimedia studies</td>
</tr>
<tr>
<td align="left" valign="middle">Implement preemptive rather than reactive difficulty adjustment</td>
<td align="left" valign="middle">Reactive adaptation (post-error) allows cognitive overload to accumulate before intervention; preemptive regulation maintains WM load within capacity limits during processing, preventing error cascades</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>: Biometric-adaptive reading reduced cognitive load (M&#x202F;=&#x202F;3.1 vs. 4.0; &#x03B7;<sup>2</sup>&#x202F;=&#x202F;0.17) and improved comprehension (&#x03B7;<sup>2</sup>&#x202F;=&#x202F;0.28) with eye-tracking-based preemptive adjustment</td>
<td align="left" valign="middle">Limited to post-performance branching; difficulty adjusted after errors or slow response times accumulate (<xref ref-type="bibr" rid="ref44">Schumacher and Ifenthaler, 2018</xref>)</td>
<td align="left" valign="middle">AI platforms can use real-time biometric indicators (eye-tracking, physiological monitoring) to adjust text complexity before overload manifests</td>
<td align="left" valign="middle">Supported by RCT with biometric validation (<xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>); theoretical alignment with CLT&#x2014;needs replication across modalities</td>
</tr>
<tr>
<td align="left" valign="middle">Offload lower-level encoding tasks to free resources for higher-order processing</td>
<td align="left" valign="middle">Automating routine phonological loop demands (lexical retrieval, syntactic assembly) frees central-executive resources for comprehension, integration, and evaluation (<xref ref-type="bibr" rid="ref50">Sweller et al., 2019</xref>)</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref>: AI-mediated DDL outperformed traditional DDL (Hedges&#x2019; g&#x202F;&#x2248;&#x202F;3.39) by offloading search burden; <xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref>: Authorial Core Processing showed lowest load (M&#x202F;=&#x202F;3.48)</td>
<td align="left" valign="middle">Interactive glosses reduce lookup burden (<xref ref-type="bibr" rid="ref54">T&#x00FC;rk and Er&#x00E7;etin, 2014</xref>); captions offload auditory decoding (<xref ref-type="bibr" rid="ref51">Teng, 2019</xref>, <xref ref-type="bibr" rid="ref52">2023</xref>)</td>
<td align="left" valign="middle">Generative AI chatbots automate pattern search and lexical retrieval; AI-assisted writing reduces encoding demands while freeing WM for synthesis</td>
<td align="left" valign="middle">Supported by multiple RCTs and scale validation; very large effect sizes for AI-DDL; consistent with noticing hypothesis</td>
</tr>
<tr>
<td align="left" valign="middle">Coordinate modalities to support dual-channel processing without redundancy</td>
<td align="left" valign="middle">Presenting complementary (not identical) information across visual and auditory channels leverages both WM subsystems; redundant presentation wastes capacity (<xref ref-type="bibr" rid="ref37">Mayer, 2014</xref>; <xref ref-type="bibr" rid="ref38">Mutlu-Bayraktar et al., 2019</xref>)</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref51">Teng (2019</xref>, <xref ref-type="bibr" rid="ref52">2023)</xref>: Captioned video enhanced comprehension when captions complemented rather than duplicated audio; <xref ref-type="bibr" rid="ref57">Varol and Er&#x00E7;etin (2021)</xref>: Gloss coordination improved outcomes</td>
<td align="left" valign="middle">Synchronize audio-text presentations; avoid simultaneous identical captions and narration; use glosses that add rather than repeat information</td>
<td align="left" valign="middle">AI can personalize channel combinations based on learner processing profiles; dynamic modality switching based on task phase and learner state</td>
<td align="left" valign="middle">Supported by multiple quasi-experimental and correlational studies; consistent with multimedia learning principles; AI personalization is proposed&#x2014;needs direct WM validation</td>
</tr>
<tr>
<td align="left" valign="middle">Scaffold metacognitive evaluation skills when AI redistributes load</td>
<td align="left" valign="middle">AI offloads encoding but increases central-executive demands for critical evaluation and integration; learners lacking metacognitive strategies may experience net cognitive burden increase (<xref ref-type="bibr" rid="ref14">Fan and Yao, 2025</xref>; <xref ref-type="bibr" rid="ref32">Li, 2023</xref>)</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref>: Critical Evaluation (M&#x202F;=&#x202F;4.81), Prompt Management (M&#x202F;=&#x202F;4.55), and Integrative Synthesis (M&#x202F;=&#x202F;4.40) showed highest load in AI-assisted writing; CL-AI-L2W <italic>&#x03B1;</italic>&#x202F;=&#x202F;0.94</td>
<td align="left" valign="middle">Not directly applicable; pre-AI tools did not systematically redistribute load toward evaluation</td>
<td align="left" valign="middle">Explicitly teach prompt formulation, output verification, and AI&#x2013;human integration skills before deploying AI writing/DDL tools; provide metacognitive scaffolding</td>
<td align="left" valign="middle">Supported by validated scale development (<xref ref-type="bibr" rid="ref14">Fan and Yao, 2025</xref>); theoretical support from CLT redistribution literature; pedagogical intervention studies needed</td>
</tr>
<tr>
<td align="left" valign="middle">Grade task complexity to match proficiency and WM capacity</td>
<td align="left" valign="middle">Intrinsic cognitive load rises with linguistic complexity; when unscaffolded, this saturates WM and produces floor effects for lower-proficiency learners (<xref ref-type="bibr" rid="ref50">Sweller et al., 2019</xref>)</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref11">Denhovska et al. (2016)</xref>; <xref ref-type="bibr" rid="ref36">Makransky et al. (2016)</xref>: Lower-proficiency learners showed steeper WM-related performance decrements; <xref ref-type="bibr" rid="ref59">Xiao et al. (2025)</xref>: Verbal WM (backward span) strongly predicted L2 achievement (r&#x202F;=&#x202F;0.44)</td>
<td align="left" valign="middle">Provide difficulty selection options; sequence tasks from simple to complex; offer optional scaffolding for complex morphosyntax</td>
<td align="left" valign="middle">AI systems can continuously adjust linguistic complexity based on real-time performance and, potentially, WM indicators; K&#x2013;12 learners may benefit disproportionately (<xref ref-type="bibr" rid="ref62">Xu et al., 2025</xref>: d&#x202F;=&#x202F;1.445 vs. 0.988)</td>
<td align="left" valign="middle">Supported by multiple correlational and quasi-experimental studies; meta-analytic evidence for developmental differences; ATI validation with direct WM measures needed</td>
</tr>
<tr>
<td align="left" valign="middle">Minimize extraneous technical and interface demands</td>
<td align="left" valign="middle">Technical difficulties (slow connections, unintuitive interfaces, system crashes) impose extraneous cognitive load, diverting WM from linguistic processing (<xref ref-type="bibr" rid="ref18">Golonka et al., 2014</xref>; <xref ref-type="bibr" rid="ref23">Hwang et al., 2013</xref>)</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref18">Golonka et al. (2014)</xref>; <xref ref-type="bibr" rid="ref23">Hwang et al. (2013)</xref>; <xref ref-type="bibr" rid="ref50">Sweller et al. (2019)</xref>: Technical issues correlated with reduced learning outcomes and increased frustration</td>
<td align="left" valign="middle">Design intuitive interfaces; ensure reliable connectivity; minimize navigation complexity; provide clear instructions</td>
<td align="left" valign="middle">AI platforms must maintain seamless performance; avoid latency-induced attention shifts; ensure AI response consistency to prevent troubleshooting demands</td>
<td align="left" valign="middle">Supported by multiple observational and correlational studies; consistent with extraneous load reduction principles; direct WM impact studies limited</td>
</tr>
<tr>
<td align="left" valign="middle">Support distributed cognition through collaborative structures</td>
<td align="left" valign="middle">Distributing task demands across group members reduces individual WM burden; peer explanation and joint problem-solving leverage collective cognitive resources (<xref ref-type="bibr" rid="ref1">Apps et al., 2019</xref>; <xref ref-type="bibr" rid="ref26">Johnson et al., 2014</xref>)</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref1">Apps et al. (2019)</xref>; <xref ref-type="bibr" rid="ref26">Johnson et al. (2014)</xref>: Collaborative structures improved outcomes in WM-demanding online tasks through shared explanation and distributed processing</td>
<td align="left" valign="middle">Incorporate peer discussion, collaborative writing, and joint problem-solving activities; structure group roles to distribute cognitive load</td>
<td align="left" valign="middle">AI could facilitate intelligent grouping based on complementary WM profiles or provide AI-mediated collaborative scaffolding; this design space remains untested</td>
<td align="left" valign="middle">Supported by quasi-experimental studies in interactive contexts; theoretical alignment with distributed cognition; AI-facilitated collaboration with WM measures is an evidence gap</td>
</tr>
<tr>
<td align="left" valign="middle">Activate prior knowledge and schema before introducing new material</td>
<td align="left" valign="middle">Schema activation facilitates chunking, reducing intrinsic load by allowing new information to integrate with existing long-term memory structures (<xref ref-type="bibr" rid="ref21">Huffman and Hahn, 2017</xref>; <xref ref-type="bibr" rid="ref50">Sweller et al., 2019</xref>)</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref21">Huffman and Hahn (2017)</xref>; <xref ref-type="bibr" rid="ref33">Lin et al. (2022)</xref>: Prior knowledge activation improved WM efficiency and enhanced retention in technology-mediated tasks</td>
<td align="left" valign="middle">Use advance organizers; preview key vocabulary; connect new content to familiar concepts before complex multimedia exposure</td>
<td align="left" valign="middle">AI systems could personalize schema priming based on learner knowledge profiles inferred from interaction history; this potential is currently unexplored</td>
<td align="left" valign="middle">Supported by experimental and quasi-experimental studies; theoretically grounded in schema theory and CLT; AI-personalized activation is proposed&#x2014;needs empirical testing</td>
</tr>
<tr>
<td align="left" valign="middle">Incorporate direct WM assessment for adaptive calibration</td>
<td align="left" valign="middle">Without direct WM measurement, systems cannot distinguish whether interventions reduce task demands, improve resource utilization, or enhance capacity itself; proxies conflate distinct mechanisms</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref6">Chalmers et al. (2021)</xref>; <xref ref-type="bibr" rid="ref17">Goh and Aryadoust (2025)</xref>; <xref ref-type="bibr" rid="ref66">Zhang and Aubrey (2024)</xref>: Systematic reviews document measurement gap; <xref ref-type="bibr" rid="ref53">Teng (2024)</xref>: WM&#x2013;learning relationships may be mediated by proficiency</td>
<td align="left" valign="middle">Occasional inclusion of span tasks for research purposes; individual differences typically unmeasured in routine instruction</td>
<td align="left" valign="middle">AI systems should integrate validated WM assessments (e.g., automated span tasks) for real-time adaptive calibration and APT-based personalization</td>
<td align="left" valign="middle">Proposed based on measurement gap analysis; theoretically critical for causal inference; no current AI implementations include validated WM assessment</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>WM, working memory; CLT, Cognitive Load Theory; DDL, data-driven learning; ATI, aptitude&#x2013;treatment interaction; RCT, randomized controlled trial; CL-AI-L2W, Cognitive Load Scale for AI-Assisted L2 Writing. Evidence strength is categorized as: &#x201C;Supported by multiple experimental studies&#x201D; (convergent evidence from &#x2265;2 RCTs or quasi-experiments with consistent findings), &#x201C;Supported by experimental/quasi-experimental study&#x201D; (single well-designed study), &#x201C;Supported by correlational/observational studies&#x201D; (non-experimental evidence), &#x201C;Proposed&#x2014;needs direct WM validation&#x201D; (theoretically grounded but lacking empirical WM assessment), or &#x201C;Evidence gap&#x201D; (no studies examining the principle with WM/cognitive load measures in the specified environment). Effect sizes and statistical parameters are reported where available from primary studies.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec7">
<label>3.3</label>
<title>Adopted instructional tools and comparative overview</title>
<p>The Interactive Corpus employed a range of tools to create interactive language learning environments. Online platforms were the most common, appearing in approximately 48% of the 27 studies. CALL software was used in about 24% of studies, mobile applications in 16%, and early non-AI virtual reality environments in 12% (<xref ref-type="bibr" rid="ref36">Makransky et al., 2016</xref>). All interactive tools integrated some combination of multimedia features&#x2014;videos, images, animations&#x2014;to support WM by providing redundant cues and multiple input channels. Approximately three quarters of the studies incorporated gamification elements or adaptive feedback mechanisms to maintain attention and engagement, and more than one third provided structured opportunities for peer collaboration, while roughly two fifths used individualized feedback based on learners&#x2019; performance trajectories.</p>
<p>The AI-Mediated Cluster introduced a qualitatively different family of tools. These included generative AI chatbots such as DeepSeek V3.1 for AI-assisted writing (<xref ref-type="bibr" rid="ref14">Fan and Yao, 2025</xref>) and Microsoft Copilot for intelligent DDL (<xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary, 2024</xref>); biometric-adaptive reading platforms such as Smart Sparrow augmented with eye-tracking and physiological monitoring (<xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>); and AI-orchestrated VR&#x2013;AR systems combining Unity, Oculus Quest 2, and Google MediaPipe with ChatGPT-4 for multimodal vocabulary learning (<xref ref-type="bibr" rid="ref64">Yu, 2025</xref>). Complementary reviews documented emerging use of intelligent personal assistants and spoken dialogue systems for listening and speaking practice (<xref ref-type="bibr" rid="ref17">Goh and Aryadoust, 2025</xref>; <xref ref-type="bibr" rid="ref60">Xu, 2025</xref>).</p>
<p>Conceptually, this shift can be summarized as a movement from pre-scripted, reactive tools to dynamic, dialogic cognitive partners. Feedback systems move from pre-programmed corrective responses to open-ended generative explanations; adaptation mechanisms move from performance-based difficulty adjustment to biometric-driven, real-time regulation; multimodal delivery moves from static multimedia to embodied VR&#x2013;AR experiences orchestrated by LLMs; and personalization moves from predetermined learning paths to dynamic prompt&#x2013;response dialogue.</p>
<p>At the same time, a methodological divergence is evident. Several Interactive Corpus studies occasionally incorporated direct WM measures (e.g., digit spans, reading span tasks), enabling explicit WM&#x2013;performance analyses. None of the four AI-mediated primary studies, nor the broader AI-in-practice literature summarized in recent reviews, included validated WM instruments. Instead, AI studies relied on self-report cognitive load scales and process indicators as proxies. This divergence complicates direct comparison of WM effects across eras and contributes to the measurement paradox that emerges from the broader synthesis.</p>
<p>In summary, the comparative analysis reveals both continuities and transformations. Cognitive overload remains a central concern, but AI research now specifies empirical boundaries such as <xref ref-type="bibr" rid="ref64">Yu&#x2019;s (2025)</xref> three- to four-channel optimum. Individual differences in WM continue to shape technology effectiveness, yet AI-mediated environments show potential to compensate for lower or immature WM through adaptive pacing and targeted support (<xref ref-type="bibr" rid="ref62">Xu et al., 2025</xref>). Multimodal presentation remains beneficial, though AI allows dynamic, learner-sensitive orchestration rather than static design. Perhaps most critically, the locus of cognitive challenge has shifted: from managing dense input and technical distractions in the Interactive Corpus to managing higher-order evaluative and integrative demands in the AI-Mediated Cluster. At the same time, direct measurement of WM has declined just as AI research relies more heavily on WM-based explanations, leaving open key questions about whether AI primarily changes task demands, functional WM use, or WM capacity itself&#x2014;questions that subsequent sections take up in the Discussion and Future Directions.</p>
</sec>
</sec>
<sec sec-type="discussion" id="sec8">
<label>4</label>
<title>Discussion</title>
<p>Returning to the three research questions, this comparative synthesis illuminates a fundamental shift in the architecture of technology-enhanced language learning&#x2014;from a hermeneutics of accommodation to a hermeneutics of co-regulation. Regarding RQ1 (design guidelines), the Interactive Corpus demonstrates that multimodal coordination, adaptive pacing, and schema activation support WM efficiency, whereas information density, technical disruptions, and unscaffolded complexity overload capacity. Regarding RQ2 (WM&#x202F;&#x00D7;&#x202F;AI affordances interactions), AI-mediated environments exhibit qualitatively distinct interaction patterns: rather than simply reducing load, they redistribute cognitive demands from encoding (phonological loop) to evaluation and integration (central executive), as evidenced by the CL-AI-L2W factor structure and biometric-adaptive process data. Regarding RQ3 (boundary conditions and unintended consequences), the review identifies empirical channel limits (3&#x2013;4 concurrent sources in VR&#x2013;AR), the measurement paradox (increasing theoretical invocation but decreasing direct assessment of WM), and the risk that load redistribution may disadvantage learners lacking metacognitive strategies, as well as potential over-scaffolding (i.e., overprotection) with downstream effects on metacognitive monitoring and executive-skill development.</p>
<p>In the pre-AI corpus, working memory was treated as a finite, static buffer: an invariant biological constraint around which instructional design had to be carefully engineered. Technologies were expected to adapt; cognition was assumed to be fixed. The design mandate was therefore to manage information density, sequencing, and modality so that task demands remained within known working-memory limits, consistent with the core tenets of Cognitive Load Theory and multimedia learning frameworks (<xref ref-type="bibr" rid="ref37">Mayer, 2014</xref>; <xref ref-type="bibr" rid="ref49">Sweller, 2020</xref>).</p>
<p>The emergence of AI-mediated environments, particularly those integrating generative AI and biometric sensing, destabilizes this arrangement. The unit of analysis is no longer the solitary learner but a dynamically coupled human&#x2013;AI system. In this regulatory model, working-memory demands are not simply &#x201C;respected&#x201D; but actively monitored, redistributed, and, at least in principle, shaped over time. Biometric-adaptive systems that adjust text complexity in real time based on physiological markers of arousal and effort exemplify this shift from reactive accommodation to preemptive regulation (<xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>). Meta-analytic evidence that K&#x2013;12 learners, whose working memory is still developing, can achieve gains comparable to or larger than adults when supported by AI pacing and feedback suggests that AI may function as a compensatory prosthesis rather than merely a gentler delivery channel (<xref ref-type="bibr" rid="ref62">Xu et al., 2025</xref>). In this sense, AI is not simply a scaffold resting on a fixed cognitive foundation; it is an active participant in the regulation of task demands and cognitive resources.</p>
<p>A central theme emerging from this comparison is the move from load reduction to load redistribution. Traditional interactive environments, informed by Cognitive Load Theory, sought to lower overall cognitive load by minimizing extraneous demands and managing intrinsic complexity. AI-mediated environments, by contrast, alter the structure of the load rather than its sheer volume. Generative AI in L2 writing offloads lower-level encoding demands&#x2014;such as lexical retrieval and syntactic assembly&#x2014;that classically tax the phonological loop, while substantially increasing demands on central-executive processes: prompt formulation, output verification, and integrative synthesis of human and machine contributions (<xref ref-type="bibr" rid="ref14">Fan and Yao, 2025</xref>). The factor structure and mean profiles of the CL-AI-L2W scale, with relatively lower load on authorial core processing and higher load on evaluation and management, empirically capture this redistribution. Similarly, AI-mediated data-driven learning automates manual pattern search and concordancing, freeing cognitive resources for higher-order noticing and internalization, as reflected in the substantial performance advantages of AI-supported DDL over traditional corpus-based tasks (<xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary, 2024</xref>).</p>
<p>This redistribution is double-edged. For learners with robust executive control and metacognitive strategies, AI may indeed create a cognitive surplus&#x2014;that is, freed working memory capacity available for higher-order processing when routine demands are offloaded (<xref ref-type="bibr" rid="ref50">Sweller et al., 2019</xref>)&#x2014;by removing routine encoding burdens. For learners with fragile central-executive resources or limited experience in critical evaluation, the same tools may intensify perceived difficulty: the effort saved in generating language is immediately reallocated to monitoring, checking, and reconciling AI output with task requirements and personal goals. The benefits of AI are therefore conditional, not automatic, and hinge on learners&#x2019; preparedness for the new forms of cognitive work that AI-mediated tasks demand.</p>
<p>The review also exposes a deeper epistemic problem: the Measurement Paradox. As the field&#x2019;s theoretical discourse on AI and cognition grows more sophisticated&#x2014;foregrounding working memory, cognitive load, and aptitude&#x2013;treatment interactions&#x2014;the direct measurement of working memory has receded. Many of the pre-AI studies included validated span tasks or closely related measures, enabling explicit tests of how working-memory capacity constrained or enabled learning. In contrast, the AI-mediated intervention studies, despite their claims about compensation and optimization, rely almost exclusively on subjective cognitive load ratings, proficiency scores, or process indicators as proxies for working memory. Across multiple systematic reviews, working memory is repeatedly invoked as central to aptitude and AI-mediated learning, yet rarely measured with psychometric rigor (<xref ref-type="bibr" rid="ref6">Chalmers et al., 2021</xref>; <xref ref-type="bibr" rid="ref17">Goh and Aryadoust, 2025</xref>; <xref ref-type="bibr" rid="ref42">Sana and Fenesi, 2025</xref>; <xref ref-type="bibr" rid="ref66">Zhang and Aubrey, 2024</xref>; <xref ref-type="bibr" rid="ref60">Xu, 2025</xref>). <xref ref-type="bibr" rid="ref53">Teng&#x2019;s (2024)</xref> finding that working memory influences vocabulary learning indirectly via proficiency illustrates the complexity of these relationships and underscores that nuanced, multi-step pathways cannot be inferred from load ratings alone.</p>
<p>This reliance on proxies creates a causal black box. When an AI intervention is associated with lower self-reported load and better outcomes, we cannot determine whether the AI simplified the task environment, improved learners&#x2019; allocation of existing working-memory resources, or contributed to durable changes in capacity. These mechanisms are conceptually distinct and carry very different implications for design, equity, and long-term development, yet they are empirically conflated when working memory is left unmeasured.</p>
<p>Domain-specific and neurocognitive evidence in the corpus further complicates the picture but also opens new possibilities. Findings that verbal working-memory measures, particularly backward digit span, differentiate high- and low-achieving L2 learners more strongly than spatial measures suggest that specific subsystems of working memory are especially critical for language learning (<xref ref-type="bibr" rid="ref59">Xiao et al., 2025</xref>). Reviews of multisensory working-memory training using linguistically rich materials indicate that aspects of working memory may be more plastic than trait models assumed, with gains in capacity and neural efficiency accompanied by transfer to new auditory tasks (<xref ref-type="bibr" rid="ref16">Gkintoni et al., 2025</xref>). Although these studies are not themselves AI-mediated, they point toward an underexplored design space in which AI could be used not only to compensate for working-memory constraints but also to deliberately train key verbal working-memory processes in ecologically valid language tasks.</p>
<p>Taken together, the present review suggests that the field stands at a crossroads. On one path lies a sophisticated narrative of AI as a co-regulator of cognition that compensates for developmental and individual differences in working memory. On the other lies a persistent methodological reticence to measure working memory directly, leaving fundamental causal questions unanswered. At present, the evidence base is insufficient to adjudicate longer-run developmental trade-offs; a balanced account therefore must specify where AI-mediated support may backfire through over-scaffolding (i.e., overprotection), observable as dependency on AI support, reduced independent strategy use, and reduced transfer under reduced-support conditions, with potential downstream costs for metacognitive monitoring and executive-skill development. These boundary conditions can be tested empirically by incorporating scaffold-fading schedules, explicit reduced-support phases, delayed transfer tasks, and metacognitive/executive outcome measures (e.g., monitoring accuracy, strategy-use indices, and executive-control measures) alongside immediate performance outcomes. Bridging this gap is essential if AI is to be understood&#x2014;and responsibly deployed&#x2014;not merely as a convenience layer on existing pedagogies, but as a transformative partner in the orchestration of cognitive effort.</p>
<sec id="sec9">
<label>4.1</label>
<title>Synthesis by research question: evidence, confidence, and remaining gaps</title>
<disp-quote>
<p>RQ1 (Design Guidelines): What instructional design features of interactive language learning environments (2010&#x2013;2024) support working memory efficiency, and which features challenge or overload working memory capacity?</p>
</disp-quote>
<p><italic>Interactive Corpus Evidence.</italic> The 27 pre-AI studies consistently demonstrate that multimodal coordination (synchronized audio-visual presentation), adaptive pacing, schema activation, and collaborative task structures support WM efficiency by distributing demands across subsystems and enabling chunking. Conversely, high information density, unscaffolded complexity, technical disruptions, and unregulated multitasking overtax WM capacity, particularly for lower-capacity learners.</p>
<p><italic>AI-Mediated Cluster Evidence.</italic> The four AI-mediated studies do not directly test traditional design features but confirm that the principles identified in the Interactive Corpus remain operative: <xref ref-type="bibr" rid="ref64">Yu (2025)</xref> demonstrates that even AI-orchestrated multimodality produces overload when channel complexity exceeds three to four concurrent sources, and <xref ref-type="bibr" rid="ref65">Yuan (2025)</xref> shows that adaptive pacing&#x2014;when enhanced with biometric sensing&#x2014;substantially reduces cognitive load and improves outcomes.</p>
<p><italic>Confidence and Limits.</italic> Confidence in the design guidelines is moderate to high for the Interactive Corpus, supported by multiple methodologically diverse studies with some direct WM measurement. Confidence for AI contexts is lower because AI-mediated studies did not isolate traditional design features experimentally.</p>
<p><italic>What Remains Unknown.</italic> Whether specific design features (e.g., schema activation, collaborative scaffolding) interact differently with AI affordances than with pre-AI tools remains untested. Highest-priority empirical test: A factorial experiment comparing schema-priming and collaborative-support manipulations in matched AI-mediated versus non-AI conditions, with direct WM assessment at baseline and post-intervention.</p>
<disp-quote>
<p>RQ2 (WM&#x202F;&#x00D7;&#x202F;AI Affordances Interactions): How do AI-mediated language learning affordances&#x2014;including generative chatbots, biometric-adaptive systems, and multimodal VR&#x2013;AR platforms&#x2014;interact with working memory processes, and do these interactions differ qualitatively from those observed in traditional interactive environments?</p>
</disp-quote>
<p><italic>Interactive Corpus Evidence.</italic> In pre-AI environments, WM was primarily engaged at the input-processing level (phonological loop, visuospatial sketchpad), and individual WM capacity functioned as a moderator of learning outcomes. Higher-capacity learners benefited more from complex multimedia and self-directed tasks; adaptation was reactive and performance-based.</p>
<p><italic>AI-Mediated Cluster Evidence.</italic> AI affordances produce qualitatively distinct interaction patterns. <xref ref-type="bibr" rid="ref14">Fan and Yao's (2025)</xref> CL-AI-L2W scale reveals that generative AI shifts the locus of cognitive demand from lower-level encoding (Authorial Core Processing: M&#x202F;=&#x202F;3.48) to central-executive functions (Critical Evaluation: M&#x202F;=&#x202F;4.81; Prompt Management: M&#x202F;=&#x202F;4.55; Integrative Synthesis: M&#x202F;=&#x202F;4.40). <xref ref-type="bibr" rid="ref65">Yuan (2025)</xref> demonstrates that biometric-adaptive systems enable preemptive rather than reactive regulation, adjusting difficulty during processing before errors accumulate. <xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref> show that AI-mediated DDL offloads search demands, theoretically freeing WM for higher-order noticing. Meta-analytic evidence (<xref ref-type="bibr" rid="ref62">Xu et al., 2025</xref>) suggests that AI may compensate for developmental WM limitations, with K&#x2013;12 learners showing descriptively larger gains than adults.</p>
<p><italic>Confidence and Limits.</italic> Confidence that AI redistributes load is moderate, supported by validated scale data and effect sizes. However, confidence in the compensation hypothesis is limited because no AI-mediated study directly measured WM capacity; inferences rely on cognitive load proxies and age-based comparisons.</p>
<p><italic>What Remains Unknown.</italic> Whether AI-mediated load redistribution benefits or disadvantages learners with different WM profiles (high vs. low capacity, verbal vs. spatial strengths) cannot be determined without direct WM assessment. Highest-priority empirical test: An aptitude&#x2013;treatment interaction study crossing AI condition (generative AI-assisted writing vs. control) with baseline WM capacity (measured via validated span tasks), examining whether AI selectively benefits lower-WM learners or intensifies central-executive demands for all.</p>
<disp-quote>
<p>RQ3 (Boundary Conditions and Unintended Consequences): What are the empirical boundary conditions of AI-mediated working memory support, and what unintended consequences&#x2014;including cognitive load redistribution and measurement gaps&#x2014;emerge from AI integration in language learning?</p>
</disp-quote>
<p><italic>Interactive Corpus Evidence.</italic> Pre-AI studies identified general overload thresholds tied to information density and task complexity but did not quantify precise channel limits. Unintended consequences included performance gaps favoring higher-WM learners and anxiety-induced WM impairment under high-stakes or unfamiliar conditions.</p>
<p><italic>AI-Mediated Cluster Evidence.</italic> <xref ref-type="bibr" rid="ref64">Yu (2025)</xref> provides the first empirically specified boundary condition: an inverted-U relationship between multimodal channel complexity and retention, with optimal performance at three to four concurrent sources (R<sup>2</sup>&#x202F;=&#x202F;0.41) and significant overload beyond this threshold (<italic>&#x03B2;</italic>&#x202F;=&#x202F;&#x2212;0.53). The redistribution of load from encoding to evaluation (<xref ref-type="bibr" rid="ref14">Fan and Yao, 2025</xref>) constitutes an unintended consequence: learners lacking metacognitive strategies may experience AI support as cognitively more demanding despite reduced encoding burden. The measurement paradox&#x2014;whereby WM is increasingly invoked theoretically but decreasingly measured empirically&#x2014;emerges as a cross-cutting methodological consequence, documented across multiple systematic reviews (<xref ref-type="bibr" rid="ref6">Chalmers et al., 2021</xref>; <xref ref-type="bibr" rid="ref17">Goh and Aryadoust, 2025</xref>; <xref ref-type="bibr" rid="ref66">Zhang and Aubrey, 2024</xref>).</p>
<p><italic>Confidence and Limits.</italic> Confidence in the three-to-four channel boundary is moderate, based on a single large-scale RCT; replication across populations and modality combinations is needed. Confidence in the load redistribution pattern is moderate to high, given the robust factor structure and fit indices of the CL-AI-L2W scale. Confidence regarding the measurement paradox is high, as it is documented across multiple independent reviews.</p>
<p><italic>What Remains Unknown.</italic> Whether the channel boundary generalizes beyond VR&#x2013;AR vocabulary learning to other skills and AI configurations remains untested. Whether load redistribution produces differential effects for learners with varying central-executive capacity is unknown. Highest-priority empirical test: A replication of <xref ref-type="bibr" rid="ref64">Yu (2025)</xref> with direct WM measurement (especially executive-function tasks) to determine whether individual differences in central-executive capacity moderate the channel-limit threshold and overload effects.</p>
</sec>
<sec id="sec10">
<label>4.2</label>
<title>Objective attainment statement</title>
<p>The three research objectives were substantially but incompletely met. Objective 1 (aligned with RQ1) was largely achieved: the review synthesized consistent evidence-based design guidelines for WM-sensitive interactive instruction from the 27-study Interactive Corpus. Objective 2 (aligned with RQ2) was partially achieved: the review identified qualitatively distinct WM&#x202F;&#x00D7;&#x202F;AI interaction patterns (load redistribution, preemptive regulation, potential compensation), but the absence of direct WM measurement in AI studies limits causal conclusions. Objective 3 (aligned with RQ3) was substantially achieved: the review documented specific boundary conditions (channel limits), unintended consequences (load redistribution disadvantaging metacognitively unprepared learners), and the measurement paradox, though the empirical base for AI-specific boundaries remains narrow.</p>
</sec>
<sec id="sec11">
<label>4.3</label>
<title>Prioritized gap list</title>
<p>Based on the synthesis, the following empirical gaps are prioritized for future research:</p>
<list list-type="order">
<list-item>
<p>Direct WM measurement in AI intervention studies (highest priority). No AI-mediated intervention in the corpus included validated WM tasks. Without such measurement, it is impossible to distinguish whether AI reduces task demands, improves functional WM utilization, or changes WM capacity itself.</p>
</list-item>
<list-item>
<p>Aptitude&#x2013;treatment interaction designs. Studies are needed that cross AI affordances with baseline WM profiles to test compensation hypotheses and identify which learners benefit most (or least) from specific AI configurations.</p>
</list-item>
<list-item>
<p>Replication of multimodal channel boundaries. <xref ref-type="bibr" rid="ref64">Yu's (2025)</xref> three-to-four channel optimum requires replication across diverse learner populations, language skills, and AI-multimodal configurations.</p>
</list-item>
<list-item>
<p>Central-executive load and metacognitive preparedness. Research should examine whether explicit metacognitive training mitigates the increased evaluative demands associated with generative AI use.</p>
</list-item>
<list-item>
<p>Longitudinal tracking of WM dynamics. Studies are needed to determine whether sustained AI-mediated instruction affects WM capacity over time or primarily operates through demand reduction.</p>
</list-item>
<list-item>
<p>Geographical and linguistic diversification. The AI-mediated evidence base is concentrated in East Asian EFL contexts; replication in other linguistic, cultural, and institutional settings is essential for generalizability.</p>
</list-item>
</list>
<p>The paradigm shift from static accommodation to dynamic co-regulation carries far-reaching implications for instructional design, pedagogy, and research.</p>
<p>For design, the central task moves from minimizing load in the abstract to optimizing the alignment between task demands and learners&#x2019; evolving cognitive profiles. AI-mediated systems should be conceived as adaptive regulators that can detect signs of overload or underload and adjust pacing, complexity, and modality in real time. Such systems need to distinguish between supportive compensation and overprotection, providing scaffolding that is gradually faded rather than indefinitely maintained. Design choices that automate lower-level processes must be weighed against the risk of eroding opportunities for productive struggle and desirable difficulty. To translate these implications into implementable steps, an evidence-informed checklist for designers and instructors is provided below.</p>
</sec>
<sec id="sec12">
<label>4.4</label>
<title>Practical guidelines for designers and instructors</title>
<list list-type="order">
<list-item>
<p>Manage information density and enforce channel limits by keeping concurrent information streams to three to four at most and using progressive disclosure and chunking. Rationale: Limiting extraneous load prevents working memory saturation and aligns with observed multimedia overload and empirically bounded multimodal thresholds (<xref ref-type="bibr" rid="ref35">Lusk et al., 2009</xref>; <xref ref-type="bibr" rid="ref29">Kozan et al., 2015</xref>; <xref ref-type="bibr" rid="ref64">Yu, 2025</xref>; <xref ref-type="bibr" rid="ref10">Cowan, 2001</xref>).</p>
</list-item>
<list-item>
<p>Sequence and coordinate modalities to prevent split attention (present core input first, then add complementary cues; avoid redundant text-audio duplication). Rationale: Coordinated multimodal timing supports dual-channel processing and improves comprehension under working memory constraints (<xref ref-type="bibr" rid="ref37">Mayer, 2014</xref>; <xref ref-type="bibr" rid="ref54">T&#x00FC;rk and Er&#x00E7;etin, 2014</xref>; <xref ref-type="bibr" rid="ref29">Kozan et al., 2015</xref>).</p>
</list-item>
<list-item>
<p>Scaffold strategically, then fade supports based on performance signals (start with guided prompts/worked examples; progressively withdraw assistance as accuracy stabilizes). Rationale: Research-informed recommendation: scaffolding can externalize interim processing to reduce working memory demands, while fading preserves desirable difficulty and independent executive control (<xref ref-type="bibr" rid="ref44">Schumacher and Ifenthaler, 2018</xref>; <xref ref-type="bibr" rid="ref42">Sana and Fenesi, 2025</xref>).</p>
</list-item>
<list-item>
<p>Implement adaptive pacing and just-in-time feedback (enable pause/replay/stepwise reveal; deliver concise feedback at the point of error or hesitation). Rationale: Adaptive pacing and timely feedback regulate cognitive load and have been associated with improved comprehension alongside reduced load in adaptive learning implementations (<xref ref-type="bibr" rid="ref22">Hwang et al., 2012</xref>; <xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>).</p>
</list-item>
<list-item>
<p>Use biometric or high-quality behavioral proxies to tune cognitive load where feasible and ethically governed (e.g., eye-tracking indices, response latency, revision traces) with opt-in consent and data minimization. Rationale: Research-informed recommendation: real-time proxy signals can enable preemptive regulation of processing demands, but current language-learning evidence is limited to a small number of implementations and requires privacy and fairness safeguards (<xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>; <xref ref-type="bibr" rid="ref9">Cosentino and Giannakos, 2023</xref>; <xref ref-type="bibr" rid="ref45">Silva Barbosa et al., 2023</xref>).</p>
</list-item>
<list-item>
<p>Teach metacognitive self-regulation for AI-supported tasks (plan prompts, verify AI outputs, and justify revisions using checklists and reflection routines). Rationale: AI assistance can shift load toward central-executive evaluation and integration, so explicit metacognitive instruction supports effective working memory allocation and reduces uncritical reliance (<xref ref-type="bibr" rid="ref14">Fan and Yao, 2025</xref>; <xref ref-type="bibr" rid="ref31">Lee, 2021</xref>; <xref ref-type="bibr" rid="ref17">Goh and Aryadoust, 2025</xref>).</p>
</list-item>
</list>
<p>Pedagogically, AI integration demands a new emphasis on metacognitive acculturation. If AI shifts the burden from encoding to evaluation, learners must be explicitly taught how to manage this new cognitive landscape. Instruction needs to foreground skills such as structuring effective prompts, monitoring AI outputs for accuracy and relevance, and integrating external suggestions into coherent personal representations. Classroom assessment should, in turn, move beyond evaluating products generated in isolation to examining how learners orchestrate human&#x2013;AI interaction over time&#x2014;for example, by analyzing prompt sequences, revision histories, and justification of decisions in AI-supported tasks.</p>
<p>Methodologically, the field must recalibrate its evidentiary standards. Subjective cognitive load measures are insufficient on their own to sustain claims about compensation, optimization, or training effects. A more rigorous paradigm requires the routine inclusion of direct working-memory assessments, the integration of behavioral and process-tracing data, and the systematic testing of aptitude&#x2013;treatment interactions. Without such shifts, the discourse on AI and working memory will remain largely speculative, even as the technologies themselves become more pervasive.</p>
</sec>
<sec id="sec13">
<label>4.5</label>
<title>Integrating direct working memory assessment in AI-mediated research</title>
<sec id="sec14">
<label>4.5.1</label>
<title>Instruments and implementation guidelines</title>
<p>To address the persistent absence of direct WM measurement documented across the AI-mediated literature, this section provides concrete guidance for researchers seeking to incorporate validated WM instruments into AI-assisted language learning studies. The recommendations distinguish between direct psychometric WM assessment and behavioral proxy indicators, and offer practical implementation strategies for embedding WM measurement within AI-mediated research designs.</p>
</sec>
<sec id="sec15">
<label>4.5.2</label>
<title>Direct WM assessment versus behavioral proxies</title>
<p>A fundamental distinction must be drawn between direct WM assessment&#x2014;validated psychometric tasks that isolate WM constructs through controlled stimuli and standardized administration&#x2014;and behavioral proxy indicators derived from learner interaction logs, response latencies, and eye-tracking metrics. Direct assessment provides construct-valid measures of WM capacity, updating efficiency, or subsystem-specific functioning, enabling causal inference about whether AI interventions affect WM resources, utilization, or capacity itself. Behavioral proxies, by contrast, reflect processing patterns that may be influenced by WM but also by motivation, familiarity, strategy use, and task design. While proxies offer ecological validity and continuous monitoring capability, they cannot substitute for direct measurement when the research question concerns WM mechanisms. Robust AI-mediated research designs should therefore combine direct WM assessment (for construct validity and individual-difference analysis) with behavioral proxies (for process-level insight and real-time monitoring), triangulating findings across measurement modalities.</p>
</sec>
<sec id="sec16">
<label>4.5.3</label>
<title>Curated working memory tasks for computer and Mobile delivery</title>
<p><xref ref-type="table" rid="tab6">Table 6</xref> presents a curated set of validated WM tasks suitable for computerized or mobile administration in AI-mediated language learning research. For each task, the table specifies the construct targeted (capacity, updating, or subsystem-specific functioning), administration mode, scoring procedures, feasibility for embedding in AI platforms, minimum psychometric and reporting requirements, and practical implementation guidance including timing burden and practice-effect mitigation strategies.</p>
<table-wrap position="float" id="tab6">
<label>Table 6</label>
<caption>
<p>Validated working memory tasks for AI-mediated language learning research.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Task</th>
<th align="left" valign="top">Construct targeted</th>
<th align="left" valign="top">Administration mode</th>
<th align="left" valign="top">Scoring</th>
<th align="left" valign="top">Feasibility for AI platform integration</th>
<th align="left" valign="top">Psychometric requirements</th>
<th align="left" valign="top">Implementation guidance</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Operation span (OSPAN)</td>
<td align="left" valign="middle">Central executive capacity; complex span</td>
<td align="left" valign="middle">Computer/tablet; 15&#x2013;20&#x202F;min; requires keyboard or touch response</td>
<td align="left" valign="middle">Partial-credit unit scoring (sum of correctly recalled items in correct serial position); absolute scoring also reported</td>
<td align="left" valign="middle">High: Can be administered as pre/post module; automated scoring feasible; validated computerized versions available (<xref ref-type="bibr" rid="ref56">Unsworth et al., 2005</xref>)</td>
<td align="left" valign="middle">Report internal consistency (&#x03B1;&#x202F;&#x2265;&#x202F;0.70); test&#x2013;retest reliability if repeated; cite normative data; report language of stimuli</td>
<td align="left" valign="middle">Use parallel forms or alternate stimulus sets for pre/post to mitigate practice effects; allow one practice trial block; administer in quiet conditions; timing: ~20&#x202F;min total</td>
</tr>
<tr>
<td align="left" valign="middle">Reading span task (RST)</td>
<td align="left" valign="middle">Verbal WM capacity; phonological loop + central executive</td>
<td align="left" valign="middle">Computer/tablet; 15&#x2013;25&#x202F;min; requires sentence verification and letter/word recall</td>
<td align="left" valign="middle">Partial-credit scoring; processing accuracy reported separately</td>
<td align="left" valign="middle">High: Well-suited for L2 research; sentence stimuli can be adapted to target language proficiency; automated administration validated</td>
<td align="left" valign="middle">&#x03B1;&#x202F;&#x2265;&#x202F;0.70; report both storage and processing accuracy; language-appropriate sentence norming required</td>
<td align="left" valign="middle">Critical for L2 contexts: use proficiency-appropriate sentences to avoid floor/ceiling effects; provide parallel forms; ~20&#x2013;25&#x202F;min total including instructions</td>
</tr>
<tr>
<td align="left" valign="middle">Backward digit span</td>
<td align="left" valign="middle">Verbal WM capacity; phonological loop + manipulation</td>
<td align="left" valign="middle">Computer/mobile; 5&#x2013;10&#x202F;min; audio or visual digit presentation with typed/spoken response</td>
<td align="left" valign="middle">Longest sequence correctly recalled; total correct trials</td>
<td align="left" valign="middle">Very High: Brief; minimal technical requirements; easily embedded as &#x201C;micro-assessment&#x201D; between AI tasks</td>
<td align="left" valign="middle">Report span score and total correct; internal consistency via split-half; cite WAIS-IV or equivalent norms</td>
<td align="left" valign="middle">Suitable for intermittent administration (every 2&#x2013;3 sessions); use parallel digit sequences; ~5&#x2013;7&#x202F;min; low practice effects with adequate intervals</td>
</tr>
<tr>
<td align="left" valign="middle">N-back (verbal)</td>
<td align="left" valign="middle">Updating; central executive</td>
<td align="left" valign="middle">Computer/tablet; 10&#x2013;15&#x202F;min; continuous stimulus presentation with match/non-match response</td>
<td align="left" valign="middle">d&#x2032; (sensitivity); accuracy; reaction time</td>
<td align="left" valign="middle">Moderate-High: Requires precise timing control; gamified versions available (<xref ref-type="bibr" rid="ref43">Scharinger et al., 2023</xref>); suitable for separate assessment module</td>
<td align="left" valign="middle">Report d&#x2032;, accuracy, and RT; specify n-level (typically 2-back for adequate difficulty); internal consistency via split-half</td>
<td align="left" valign="middle">Higher cognitive demand may induce fatigue; administer at session start; 2-back recommended for most populations; ~12&#x2013;15&#x202F;min; parallel stimulus sets available</td>
</tr>
<tr>
<td align="left" valign="middle">Corsi block-tapping (backward)</td>
<td align="left" valign="middle">Visuospatial WM capacity</td>
<td align="left" valign="middle">Tablet (touch-based); 5&#x2013;10&#x202F;min; sequential block highlighting with reverse-order response</td>
<td align="left" valign="middle">Longest sequence correctly recalled; total correct</td>
<td align="left" valign="middle">High: Touch-screen administration well-validated; suitable for cross-linguistic samples (non-verbal)</td>
<td align="left" valign="middle">Report span and total correct; cite normative references (e.g., <xref ref-type="bibr" rid="ref28">Kessels et al., 2000</xref>)</td>
<td align="left" valign="middle">Useful when verbal WM confounded by L2 proficiency; ~5&#x2013;8&#x202F;min; low language demands; parallel spatial configurations for repeated testing</td>
</tr>
<tr>
<td align="left" valign="middle">Symmetry span</td>
<td align="left" valign="middle">Visuospatial WM capacity; complex span</td>
<td align="left" valign="middle">Computer/tablet; 15&#x2013;20&#x202F;min; symmetry judgment + spatial location recall</td>
<td align="left" valign="middle">Partial-credit unit scoring for spatial recall; symmetry accuracy reported separately</td>
<td align="left" valign="middle">Moderate: Requires graphical display capability; validated computerized versions exist</td>
<td align="left" valign="middle">&#x03B1;&#x202F;&#x2265;&#x202F;0.70; report both storage and processing scores; cite automated version validation</td>
<td align="left" valign="middle">Alternative to RST when verbal confounds are a concern; ~18&#x2013;22&#x202F;min; parallel forms available; administer in sessions without heavy visuospatial AI tasks</td>
</tr>
<tr>
<td align="left" valign="middle">Running span</td>
<td align="left" valign="middle">Updating efficiency; WM capacity under continuous input</td>
<td align="left" valign="middle">Computer; 10&#x2013;15&#x202F;min; variable-length lists with recall of final n items</td>
<td align="left" valign="middle">Proportion correct at each list length; updating efficiency index</td>
<td align="left" valign="middle">Moderate: Requires variable list programming; less commonly implemented but theoretically important for AI streaming contexts</td>
<td align="left" valign="middle">Report proportion correct by list length; internal consistency; less established norms&#x2014;pilot validation recommended</td>
<td align="left" valign="middle">Particularly relevant for AI contexts involving continuous information streams (e.g., real-time AI feedback); ~12&#x202F;min; develop parallel item sets</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec17">
<label>4.5.4</label>
<title>Integration strategies for AI-mediated research designs</title>
<p>Three primary integration strategies are recommended, depending on research questions, practical constraints, and the temporal granularity required:</p>
<list list-type="order">
<list-item>
<p>Pre-Post Module Design. Administer a comprehensive WM battery (e.g., OSPAN + Backward Digit Span + one visuospatial task) at baseline and post-intervention as a separate assessment module. This approach supports aptitude&#x2013;treatment interaction analyses and enables detection of WM-related change over time. Recommended timing: 35&#x2013;45&#x202F;min per assessment occasion; schedule assessments on separate days from intensive AI learning sessions to avoid fatigue confounds.</p>
</list-item>
<list-item>
<p>Intermittent Micro-Assessment Design. Embed brief WM tasks (e.g., Backward Digit Span, abbreviated n-back) at strategic points within the AI learning sequence&#x2014;for example, at the beginning of every third or fourth session. This approach enables tracking of WM fluctuations and state-dependent performance while minimizing learner burden. Recommended timing: 5&#x2013;10&#x202F;min per micro-assessment; maintain consistent timing relative to AI task engagement.</p>
</list-item>
<list-item>
<p>Triangulated Multi-Method Design. Combine pre-post direct WM assessment with continuous behavioral proxy collection (interaction logs, response latencies, eye-tracking indices) throughout the AI intervention. Analyze convergence and divergence between direct and proxy measures to distinguish task-demand effects from WM-capacity effects. This approach provides the strongest inferential base for understanding whether AI reduces demands, improves utilization, or affects capacity.</p>
</list-item>
</list>
</sec>
<sec id="sec18">
<label>4.5.5</label>
<title>Minimum psychometric and reporting standards</title>
<p>To ensure interpretability and cross-study comparability, AI-mediated WM research should adhere to the following minimum reporting standards:
</p>
<list list-type="bullet">
<list-item>
<p>Internal consistency: Report Cronbach&#x2019;s <italic>&#x03B1;</italic> or split-half reliability for all WM tasks (minimum acceptable: &#x03B1;&#x202F;&#x2265;&#x202F;0.70).</p>
</list-item>
<list-item>
<p>Validity evidence: Cite validation studies for computerized versions; report correlations with established WM measures if using adapted instruments.</p>
</list-item>
<list-item>
<p>Language considerations: For verbal WM tasks, specify stimulus language, proficiency requirements for sentence/word stimuli, and any adaptations for L2 populations; pilot with target population to ensure appropriate difficulty.</p>
</list-item>
<list-item>
<p>Sample descriptives: Report WM score distributions (means, SDs, ranges) to enable meta-analytic integration and comparison with normative data.</p>
</list-item>
<list-item>
<p>Effect size reporting: Report standardized effect sizes (Cohen&#x2019;s d, partial &#x03B7;<sup>2</sup>) for WM-related main effects and interactions.</p>
</list-item>
</list>
</sec>
<sec id="sec19">
<label>4.5.6</label>
<title>Practical implementation considerations</title>
<p>Timing burden represents a significant practical constraint. Researchers should budget 20&#x2013;45&#x202F;min for comprehensive pre-post assessment or 5&#x2013;10&#x202F;min for intermittent micro-assessments. Practice effects can be mitigated through parallel forms (available for OSPAN, RST, digit span) or adequate inter-assessment intervals (minimum 2&#x2013;3&#x202F;weeks for complex span tasks). For studies involving repeated WM assessment, counterbalancing of parallel forms across participants is essential. Mobile delivery is feasible for most tasks but requires validation of touch-screen response accuracy and careful attention to ambient noise for auditory stimuli. Finally, researcher should consider participant fatigue: avoid scheduling WM assessment immediately after cognitively demanding AI tasks, and monitor for floor or ceiling effects that may indicate inappropriate task difficulty for the sample.</p>
</sec>
</sec>
<sec id="sec20">
<label>4.6</label>
<title>Ethical and risk considerations</title>
<p>The transition to AI-mediated language learning introduces ethical and risk dimensions that extend beyond the equity considerations previously noted and warrant systematic attention from researchers and practitioners. This subsection distinguishes between empirically documented concerns and normative recommendations that, while theoretically grounded, await direct empirical validation in AI-mediated language learning contexts.</p>
<sec id="sec21">
<label>4.6.1</label>
<title>Algorithmic bias and fairness risks</title>
<p>Adaptive and generative AI systems inherit potential biases from their training data and algorithmic design, raising fairness concerns for language learners from diverse linguistic, cultural, and socioeconomic backgrounds. Evidence-supported concern: The concentration of AI-mediated research in East Asian EFL contexts (Chinese and Iranian learners) documented in this review indicates that AI systems may be disproportionately optimized for these populations, potentially disadvantaging learners whose L1 backgrounds, learning styles, or cultural contexts differ from those represented in system development and validation samples. Normative recommendation: Researchers should conduct subgroup performance analyses across demographic and linguistic variables (L1 background, proficiency level, gender, socioeconomic status) to detect differential AI effectiveness and report disaggregated outcomes even when overall effects are positive. Until such analyses are routinely conducted, claims about AI&#x2019;s compensatory benefits for &#x201C;lower-WM learners&#x201D; should be interpreted cautiously, as compensation effects may not generalize across all learner subgroups.</p>
</sec>
<sec id="sec22">
<label>4.6.2</label>
<title>Privacy and data governance</title>
<p>AI-mediated language learning systems collect extensive learner data, including interaction logs, response patterns, performance trajectories, and&#x2014;in biometric-adaptive implementations&#x2014;physiological and eye-tracking data (<xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>). Evidence-supported concern: None of the AI-mediated studies in the corpus reported detailed data governance protocols, creating uncertainty about consent procedures, data minimization practices, retention periods, access controls, and policies governing secondary use of learner data. Normative recommendation: Future research should adhere to established data governance principles: (a) informed consent that specifies what data are collected, for what purposes, for how long, and who will have access; (b) data minimization, collecting only data necessary for the stated research or pedagogical purpose; (c) defined retention periods with secure deletion protocols; (d) access controls restricting data to authorized personnel; and (e) explicit policies prohibiting or governing secondary use (e.g., commercial applications, algorithm training beyond the original study). These safeguards are particularly critical when research involves minors or occurs in educational institutions where power asymmetries may compromise voluntary consent.</p>
</sec>
<sec id="sec23">
<label>4.6.3</label>
<title>Transparency and explainability of AI-driven pedagogical decisions</title>
<p>When AI systems make consequential decisions about task difficulty, feedback content, or learning pathways, learners, educators, and researchers have legitimate interests in understanding how and why those decisions are made. Evidence-supported concern: The AI-mediated studies reviewed (biometric-adaptive reading, generative AI writing assistance, AI-orchestrated VR-AR instruction) employed proprietary or opaque algorithms whose decision logic was not fully specified, limiting the ability of researchers to replicate findings or of educators to calibrate pedagogical expectations. Normative recommendation: Research reports should document the decision rules or model architectures governing AI adaptations to the extent permitted by proprietary constraints, explain what learner inputs trigger what system responses, and acknowledge transparency limitations explicitly. For classroom implementation, educators should be provided with interpretable summaries of how the AI system is adjusting instruction and why, enabling informed pedagogical oversight rather than blind reliance on algorithmic recommendations.</p>
</sec>
<sec id="sec24">
<label>4.6.4</label>
<title>Biometric data: heightened sensitivity and safeguards</title>
<p>Biometric-adaptive systems such as those employing eye-tracking and physiological monitoring (<xref ref-type="bibr" rid="ref65">Yuan, 2025</xref>) collect uniquely sensitive data that can reveal cognitive states, emotional responses, and attentional patterns beyond what learners may intend to disclose. Evidence-supported concern: While <xref ref-type="bibr" rid="ref65">Yuan (2025)</xref> demonstrated pedagogical benefits of biometric adaptation, the study did not report protocols for biometric data handling, creating uncertainty about whether proportionality (collecting only biometric data necessary for the pedagogical function), opt-out mechanisms (allowing learners to decline biometric monitoring while still accessing instruction), and enhanced security safeguards (encryption, anonymization, restricted access) were implemented. Normative recommendation: Research involving biometric data should apply heightened ethical scrutiny: (a) proportionality assessments demonstrating that biometric collection is necessary and that less intrusive alternatives are insufficient; (b) robust opt-out options that do not penalize learners who decline biometric monitoring; (c) enhanced security protocols including encryption at rest and in transit, anonymization where feasible, and audit trails for data access; and (d) explicit institutional review board (IRB) or ethics committee approval addressing biometric-specific risks. These safeguards are essential for maintaining learner trust and ensuring that the pedagogical benefits of biometric adaptation are not outweighed by privacy intrusions.</p>
</sec>
<sec id="sec25">
<label>4.6.5</label>
<title>Equity and dependency risks</title>
<p>Framing AI as a compensatory tool for learners with lower working-memory capacity invites careful scrutiny of where compensation ends and dependency begins (<xref ref-type="bibr" rid="ref45">Silva Barbosa et al., 2023</xref>). Evidence-supported concern: If key cognitive operations are consistently offloaded to AI, there is a risk that learners with fewer prior advantages become locked into permanently scaffolded trajectories, while their more advantaged peers learn to leverage AI as an amplifier of already strong executive and metacognitive skills. Normative recommendation: Instructional designs should incorporate planned fading of AI support and explicit metacognitive training to prevent dependency, with longitudinal monitoring of whether AI-supported learners develop autonomous competencies comparable to those achieved through less scaffolded pathways.</p>
</sec>
<sec id="sec26">
<label>4.6.6</label>
<title>Reporting standards for ethical transparency</title>
<p>To enable ethical evaluation and cross-study comparison, AI-mediated language learning research should report the following minimum information: (a) data types collected (interaction logs, performance data, biometric signals, demographic variables); (b) stated purpose for each data type; (c) retention period and deletion protocols; (d) access controls specifying who can access identifiable or sensitive data; (e) subgroup performance checks disaggregated by key demographic and linguistic variables; (f) consent procedures, including provisions for minors and institutional contexts; and (g) IRB/ethics committee approval status with any biometric-specific conditions. These reporting standards are normative recommendations derived from established research ethics principles; their routine adoption would substantially improve the field&#x2019;s ability to assess the ethical dimensions of AI-mediated instruction.</p>
</sec>
</sec>
<sec id="sec27">
<label>4.7</label>
<title>Limitations</title>
<p>The comparative analysis juxtaposes two bodies of literature that differ not only in technology but also in time, geography, and methodology. The pre-AI corpus spans more than a decade and includes varied populations, settings, and designs; the AI-mediated studies are clustered within a narrow temporal window, geographically concentrated in only two national contexts (China and Iran) and uniformly situated in EFL learning settings (<xref ref-type="table" rid="tab7">Table 7</xref>). Observed differences may therefore reflect broader shifts in educational practice, demographics, or research norms rather than technology alone, and claims about cross-context generalizability should be treated as provisional until tested via explicit moderator and replication designs.</p>
<table-wrap position="float" id="tab7">
<label>Table 7</label>
<caption>
<p>Geographic distribution, L1 background, educational setting, and AI tool types in the AI-mediated cluster (<italic>k</italic>&#x202F;=&#x202F;4; total <italic>N</italic>&#x202F;=&#x202F;1,322).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Dimension</th>
<th align="left" valign="top">Category</th>
<th align="center" valign="top">k (studies)</th>
<th align="center" valign="top">Total N</th>
<th align="left" valign="top">Notes/examples (from included primary studies)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="2">Geographic context</td>
<td align="left" valign="middle">China</td>
<td align="center" valign="middle">3</td>
<td align="center" valign="middle">1,229</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref64">Yu (2025)</xref>; <xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>; <xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref></td>
</tr>
<tr>
<td align="left" valign="middle">Iran</td>
<td align="center" valign="middle">1</td>
<td align="center" valign="middle">93</td>
<td align="left" valign="middle">
<xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref>
</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="2">Learner L1 background</td>
<td align="left" valign="middle">Chinese L1 (variety not reported; inferred from study context)</td>
<td align="center" valign="middle">3</td>
<td align="center" valign="middle">1,229</td>
<td align="left" valign="middle">All three China-based EFL samples</td>
</tr>
<tr>
<td align="left" valign="middle">Iran-based EFL sample (L1 not explicitly reported; context implies Persian/Farsi)</td>
<td align="center" valign="middle">1</td>
<td align="center" valign="middle">93</td>
<td align="left" valign="middle">Iranian advanced EFL sample</td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="3">Educational setting</td>
<td align="left" valign="middle">EFL (all studies)</td>
<td align="center" valign="middle">4</td>
<td align="center" valign="middle">1,322</td>
<td align="left" valign="middle">No primary AI-mediated studies in ESL settings in this corpus</td>
</tr>
<tr>
<td align="left" valign="middle">University/tertiary explicitly reported</td>
<td align="center" valign="middle">1</td>
<td align="center" valign="middle">546</td>
<td align="left" valign="middle">
<xref ref-type="bibr" rid="ref14">Fan and Yao (2025)</xref>
</td>
</tr>
<tr>
<td align="left" valign="middle">Institutional EFL; level not clearly specified</td>
<td align="center" valign="middle">3</td>
<td align="center" valign="middle">776</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref64">Yu (2025)</xref>; <xref ref-type="bibr" rid="ref65">Yuan (2025)</xref>; <xref ref-type="bibr" rid="ref13">Esfandiari and Allaf-Akbary (2024)</xref></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="4">AI tool types <italic>(non-mutually exclusive)</italic></td>
<td align="left" valign="middle">Generative LLM/chatbot-mediated learning</td>
<td align="center" valign="middle">3</td>
<td align="center" valign="middle">1,022</td>
<td align="left" valign="middle">ChatGPT-4; DeepSeek V3.1; Microsoft Copilot (GPT-4-based)</td>
</tr>
<tr>
<td align="left" valign="middle">Biometric-adaptive reading platform</td>
<td align="center" valign="middle">1</td>
<td align="center" valign="middle">300</td>
<td align="left" valign="middle">Smart Sparrow + eye-tracking/physiological indicators</td>
</tr>
<tr>
<td align="left" valign="middle">VR&#x2013;AR multimodal instruction with AI orchestration</td>
<td align="center" valign="middle">1</td>
<td align="center" valign="middle">383</td>
<td align="left" valign="middle">Unity/Oculus/MediaPipe + ChatGPT-4</td>
</tr>
<tr>
<td align="left" valign="middle">AI-mediated intelligent DDL</td>
<td align="center" valign="middle">1</td>
<td align="center" valign="middle">93</td>
<td align="left" valign="middle">Copilot-based DDL vs. AntConc-based DDL</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Categories are coded from the four included primary AI-mediated studies in this review; in several cases, L1 and educational level are not explicitly reported and are therefore indicated as inferred/unspecified to make reporting gaps transparent. Tool-type categories can overlap within the same study.</p>
</table-wrap-foot>
</table-wrap>
<p><xref ref-type="table" rid="tab7">Table 7</xref> clarifies that the AI-mediated evidence base in the present corpus is geographically and culturally narrow, with the large majority of primary evidence drawn from Chinese EFL learners and the remainder from an Iranian EFL sample. This concentration matters because AI&#x2013;WM effects are plausibly moderated by (a) educational culture and assessment norms (e.g., exam orientation, academic-integrity policy, and the evaluative stakes attached to AI-assisted writing), (b) differential technology access and institutional constraints on AI availability (device access, connectivity reliability, platform restrictions), (c) L1 typology and orthographic depth (which shape baseline decoding/encoding demands and therefore the relative burden on phonological-loop vs. visuospatial resources), and (d) classroom ecology (teacher mediation, class size, peer-collaboration norms, and the extent to which AI use is coached vs. left implicit). Accordingly, the present synthesis should be interpreted as strongest for comparable EFL contexts and should not assume transportability to ESL environments or underrepresented linguistic communities without explicit tests of these moderators.</p>
</sec>
<sec id="sec28">
<label>4.8</label>
<title>Future directions</title>
<p>Several lines of inquiry emerge as priorities for advancing understanding of working memory in AI-mediated language learning; however, progress now depends less on additional conceptual claims and more on a reproducible methodological program that can adjudicate among competing mechanisms (task-demand reduction, functional WM utilization, and durable WM change). Accordingly, future work should replace general calls with a methodological blueprint specifying (a) recommended designs, (b) minimum measurement sets, (c) analysis expectations, and (d) reporting standards that collectively enable cumulative, comparable evidence across AI tools, skills, and learning ecologies.</p>
<p>Recommended designs. Future studies should prioritize randomized controlled trials (RCTs) that incorporate baseline WM stratification and aptitude&#x2013;treatment interaction (ATI) logic. At minimum, trials should (i) administer validated WM measures at baseline and use blocked/stratified randomization to balance WM distributions across conditions (or prespecify WM&#x202F;&#x00D7;&#x202F;condition moderation as the primary ATI test), (ii) include an active comparator rather than &#x201C;no-treatment&#x201D; controls (e.g., non-AI digital support or teacher-guided support matched for time-on-task), and (iii) implement longitudinal measurement with pretest, immediate posttest, and delayed follow-up (e.g., 4&#x2013;8&#x202F;weeks) to distinguish short-term performance effects from retention and transfer. Where interventions are delivered in intact classes or schools, cluster RCTs or classroom-embedded individual randomization should be used with analytic correction for nesting. Where the research question concerns immediate interface-level regulation (e.g., adaptive vs. non-adaptive reading), crossover/within-subject designs are appropriate, provided that order effects and carryover are mitigated through counterbalancing and washout periods.</p>
<p>Minimum measurement set. To close the measurement paradox identified in the AI literature, AI-mediated studies should adopt a minimum measurement bundle that includes (1) direct WM tasks, (2) language outcomes aligned to the targeted skill, and (3) process data that captures how learners allocate effort in human&#x2013;AI interaction. For WM, studies should include at least one validated verbal complex-span index and one brief manipulation/updating index (with parallel forms where feasible), administered at baseline and, when claims involve WM change, repeated post-intervention and at delayed follow-up (see <xref ref-type="table" rid="tab6">Table 6</xref> for validated computerized options). For outcomes, studies should include skill-appropriate performance measures (e.g., writing quality/accuracy/complexity indices for AI-assisted writing; comprehension and inference measures for adaptive reading; vocabulary form&#x2013;meaning mapping and delayed retention for VR&#x2013;AR multimodal instruction), with immediate and delayed tests to quantify retention. For process data, studies should minimally capture interaction traces (e.g., prompt sequences, revision histories/keystroke logs, time-on-task, response latencies), supplemented where feasible by validated cognitive-load instruments and/or physiological proxies (e.g., eye-tracking indices in adaptive reading) to distinguish load reduction from load redistribution.</p>
<p>Analysis expectations. Analyses should be planned explicitly to test ATI/moderation and ecological nesting. WM should be treated as a continuous moderator when possible (to avoid information loss), with prespecified WM&#x202F;&#x00D7;&#x202F;condition interaction terms as primary tests of instructional fit. In classroom/ecological implementations, multilevel models should be used to account for nesting (learners within classes/teachers/schools) and repeated measures (pre/post/follow-up). Where AI is hypothesized to operate via load redistribution, mediation models should be prespecified to test whether process indicators (e.g., prompt-management burden, revision depth, fixation/regression patterns) explain AI effects on language outcomes, while separating these pathways from baseline proficiency and motivation. Missing data and attrition should be handled transparently (e.g., mixed-effects models with maximum likelihood, sensitivity analyses, and/or multiple imputation), and primary analyses should follow intention-to-treat principles to avoid inflation of AI effects through selective compliance.</p>
<p>Reporting standards for reproducibility and risk-of-bias control. Reports should document randomization procedures (unit of randomization, allocation method, and concealment where applicable), comparator conditions (content, time-on-task, and instructor involvement), attrition and adherence (with reasons and condition-wise flow), and prespecified outcomes/moderators (with preregistration when feasible). Because AI systems change rapidly, intervention fidelity must include AI-specific transparency: the tool name, model/version (or release date), access mode (web/app/API), prompting constraints (templates, guardrails), and the exact scaffolds given to learners (e.g., evaluation checklists, prompt training). When human ratings are used (e.g., writing quality), rater blinding to condition and reliability indices should be reported. Finally, risk-of-bias safeguards should be explicit (baseline equivalence checks, contamination controls between conditions, and protocol deviations), enabling credible cross-study synthesis.</p>
<p>Exemplar protocol 1 (ATI-stratified classroom RCT: generative AI-assisted writing). A next-step study could recruit multiple intact EFL classes across at least two institutions to ensure ecological validity. Learners would complete a baseline WM battery (verbal complex span plus a brief manipulation/updating task) and a baseline writing assessment. Within each class, learners would be stratified by baseline WM (e.g., tertiles or continuous blocking) and then randomized to (a) generative AI-assisted writing with an explicit metacognitive evaluation routine (prompt-planning + output-verification checklist) or (b) a matched non-AI digital writing support condition (e.g., teacher-provided exemplars and feedback cycles matched for time-on-task). Instruction would run for 6&#x2013;8&#x202F;weeks with standardized task prompts and equivalent writing opportunities. Outcomes would include immediate posttest writing performance and a delayed follow-up writing task, while process data would include prompt sequences, revision histories/keystroke logs, and a validated cognitive-load profile during writing. The primary analysis would be a multilevel longitudinal model (learners nested within classes) testing time &#x00D7; condition effects and the prespecified WM&#x202F;&#x00D7;&#x202F;condition ATI interaction; secondary analyses would test whether process measures (e.g., revision depth, prompt-management burden) mediate condition effects on writing outcomes, clarifying whether AI benefits operate through demand reduction or load redistribution.</p>
<p>Exemplar protocol 2 (randomized crossover: biometric-adaptive vs. non-adaptive AI reading). To isolate preemptive regulation effects while controlling stable individual differences, a crossover design could assign learners to two counterbalanced sequences: (A) biometric-adaptive AI reading for 2&#x2013;3&#x202F;weeks followed by non-adaptive AI reading for 2&#x2013;3&#x202F;weeks, or (B) the reverse order, with a short washout period and matched text difficulty bands. Baseline WM would be measured prior to the first phase, and reading comprehension plus delayed retention would be assessed at the end of each phase. Process measures would include eye-tracking/reading-time indicators and validated cognitive-load subscales captured repeatedly during reading sessions. Analysis would use mixed-effects models including condition, period, and order effects, with WM as a prespecified moderator to test whether preemptive adaptation disproportionately benefits lower-WM learners (a compensatory hypothesis) or instead shifts burden toward executive monitoring uniformly. This design would directly operationalize the causal question that current AI studies leave unresolved: whether improved performance reflects reduced task demands, improved WM utilization, or WM-linked differential responsiveness to adaptive regulation.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec29">
<label>5</label>
<title>Conclusion</title>
<p>This review has traced the evolution of working memory as a central construct in technology-enhanced language learning across two technological epochs. In the pre-AI era, working memory functioned as a hard constraint: an internal bottleneck around which well-designed systems attempted to navigate. In the emerging AI-mediated era, it becomes part of a distributed, dynamically regulated system in which cognitive demands can be sensed, reallocated, and, potentially, reshaped through continuous interaction with intelligent tools.</p>
<p>The analysis suggests that AI&#x2019;s most consequential contribution is not simply its capacity to deliver content more efficiently, but its ability to reorganize the division of cognitive labor between human and machine. By offloading some processes and intensifying others, AI forces a reconceptualization of what it means to design, teach, and learn in ways that are sensitive to working-memory limits and possibilities. At the same time, the methodological apparatus of the field has not yet caught up with these conceptual advances. Without systematic, direct measurement of working memory and rigorous tests of how it interacts with AI affordances, claims about compensation, optimization, and training will remain aspirational.</p>
<p>The challenge, then, is twofold. Conceptually, the field must embrace a view of learning in which working memory is neither a fixed liability to be accommodated nor a simple trait to be correlated, but a dynamic resource that co-evolves with the tools through which learners engage with language. Methodologically, it must commit to an empirical program capable of illuminating the hidden dynamics of this human&#x2013;AI collaboration. Only by holding these two commitments together can AI-mediated language learning move from speculative promise to evidence-based practice&#x2014;and, in doing so, transform working memory from a limiting boundary into a central site of educational design and innovation.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec30">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="sec31">
<title>Author contributions</title>
<p>XD: Writing &#x2013; review &#x0026; editing, Methodology, Formal analysis, Validation, Project administration, Data curation, Writing &#x2013; original draft, Resources, Investigation, Conceptualization.</p>
</sec>
<sec sec-type="COI-statement" id="sec32">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec33">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec34">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec35">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fpsyg.2026.1758104/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fpsyg.2026.1758104/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Supplementary_file_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Apps</surname><given-names>T.</given-names></name> <name><surname>Beckman</surname><given-names>K.</given-names></name> <name><surname>Bennett</surname><given-names>S.</given-names></name> <name><surname>Dalgarno</surname><given-names>B.</given-names></name> <name><surname>Kennedy</surname><given-names>G.</given-names></name> <name><surname>Lockyer</surname><given-names>L.</given-names></name></person-group> (<year>2019</year>). <article-title>The role of social cues in supporting students to overcome challenges in online multi-stage assignments</article-title>. <source>Internet High. Educ.</source> <volume>42</volume>, <fpage>25</fpage>&#x2013;<lpage>33</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.iheduc.2019.03.004</pub-id></mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Aryadoust</surname><given-names>V.</given-names></name></person-group> (<year>2020</year>). <article-title>Dynamics of item reading and answer changing in two hearings in a computerized while-listening performance test: an eye-tracking study</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>33</volume>, <fpage>510</fpage>&#x2013;<lpage>537</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2019.1574267</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baddeley</surname><given-names>A.</given-names></name></person-group> (<year>2000</year>). <article-title>The episodic buffer: a new component of working memory?</article-title> <source>Trends Cogn. Sci.</source> <volume>4</volume>, <fpage>417</fpage>&#x2013;<lpage>423</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S1364-6613(00)015382</pub-id>, <pub-id pub-id-type="pmid">11058819</pub-id></mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Baralt</surname><given-names>M.</given-names></name></person-group> (<year>2015</year>). &#x201C;<article-title>Working memory capacity, cognitive complexity, and L2 recasts in online language teaching</article-title>&#x201D; in <source>Working memory in second language acquisition and processing</source>. eds. <person-group person-group-type="editor"><name><surname>Wen</surname><given-names>Z.</given-names></name> <name><surname>Mota</surname><given-names>M.</given-names></name> <name><surname>McNeill</surname><given-names>A.</given-names></name></person-group> (<publisher-loc>Bristol</publisher-loc>: <publisher-name>John Benjamins</publisher-name>), <fpage>248</fpage>&#x2013;<lpage>269</lpage>. doi: <pub-id pub-id-type="doi">10.21832/9781783093595-018</pub-id></mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cevik</surname><given-names>V.</given-names></name> <name><surname>Altun</surname><given-names>A. R. I. F.</given-names></name></person-group> (<year>2016</year>). <article-title>Roles of working memory performance and instructional strategy in complex cognitive task performance</article-title>. <source>J. Comput. Assist. Learn.</source> <volume>32</volume>, <fpage>594</fpage>&#x2013;<lpage>606</lpage>. doi: <pub-id pub-id-type="doi">10.1111/jcal.12156</pub-id></mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chalmers</surname><given-names>J.</given-names></name> <name><surname>Eisenchlas</surname><given-names>S. A.</given-names></name> <name><surname>Munro</surname><given-names>A.</given-names></name> <name><surname>Schalley</surname><given-names>A. C.</given-names></name></person-group> (<year>2021</year>). <article-title>Sixty years of second language aptitude research: a systematic quantitative literature review</article-title>. <source>Lang. Linguist. Compass</source> <volume>15</volume>:<fpage>e12440</fpage>. doi: <pub-id pub-id-type="doi">10.1111/lnc3.12440</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chan</surname><given-names>G. L.</given-names></name> <name><surname>Santally</surname><given-names>M. I.</given-names></name> <name><surname>Whitehead</surname><given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>Gamification as technology enabler in SEN and DHH education</article-title>. <source>Educ. Inf. Technol.</source> <volume>27</volume>, <fpage>9031</fpage>&#x2013;<lpage>9064</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10639-022-10984-y</pub-id>, <pub-id pub-id-type="pmid">35345601</pub-id></mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>L.</given-names></name> <name><surname>Chen</surname><given-names>P.</given-names></name> <name><surname>Lin</surname><given-names>Z.</given-names></name></person-group> (<year>2020</year>). <article-title>Artificial intelligence in education: a review</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>75264</fpage>&#x2013;<lpage>75278</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2020.2988510</pub-id></mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cosentino</surname><given-names>G.</given-names></name> <name><surname>Giannakos</surname><given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Multisensory interaction and analytics to enhance smart learning environments: a systematic literature review</article-title>. <source>IEEE Trans. Learn. Technol.</source> <volume>16</volume>, <fpage>414</fpage>&#x2013;<lpage>430</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TLT.2023.3243210</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cowan</surname><given-names>N.</given-names></name></person-group> (<year>2001</year>). <article-title>The magical number 4 in short-term memory: a reconsideration of mental storage capacity</article-title>. <source>Behav. Brain Sci.</source> <volume>24</volume>, <fpage>87</fpage>&#x2013;<lpage>114</lpage>. doi: <pub-id pub-id-type="doi">10.1017/S0140525X01003922</pub-id>, <pub-id pub-id-type="pmid">11515286</pub-id></mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Denhovska</surname><given-names>N.</given-names></name> <name><surname>Serratrice</surname><given-names>L.</given-names></name> <name><surname>Payne</surname><given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>Acquisition of second language grammar under incidental learning conditions: the role of frequency and working memory</article-title>. <source>Lang. Learn.</source> <volume>66</volume>, <fpage>159</fpage>&#x2013;<lpage>190</lpage>. doi: <pub-id pub-id-type="doi">10.1111/lang.12142</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>D&#x00F6;rnyei</surname><given-names>Z.</given-names></name> <name><surname>Ushioda</surname><given-names>E.</given-names></name></person-group> (<year>2021</year>). <source>Teaching and researching motivation</source> <publisher-loc>New York</publisher-loc>: <publisher-name>Routledge</publisher-name>.</mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Esfandiari</surname><given-names>R.</given-names></name> <name><surname>Allaf-Akbary</surname><given-names>O.</given-names></name></person-group> (<year>2024</year>). <article-title>Assessing interactional metadiscourse in EFL writing through intelligent data-driven learning: the Microsoft copilot in the spotlight</article-title>. <source>Lang. Test. Asia</source> <volume>14</volume>:<fpage>51</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s40468-024-00326-9</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fan</surname><given-names>L.</given-names></name> <name><surname>Yao</surname><given-names>G.</given-names></name></person-group> (<year>2025</year>). <article-title>Cognitive load scale for AI-assisted L2 writing: scale development and validation</article-title>. <source>Front. Psychol.</source> <volume>16</volume>:<fpage>1666974</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyg.2025.1666974</pub-id>, <pub-id pub-id-type="pmid">41245310</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Gathercole</surname><given-names>S.</given-names></name> <name><surname>Alloway</surname><given-names>T. P.</given-names></name></person-group> (<year>2008</year>). <source>Working memory and learning: a practical guide for teachers</source> <publisher-loc>London</publisher-loc>: <publisher-name>Sage</publisher-name>.</mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gkintoni</surname><given-names>E.</given-names></name> <name><surname>Vassilopoulos</surname><given-names>S. P.</given-names></name> <name><surname>Nikolaou</surname><given-names>G.</given-names></name></person-group> (<year>2025</year>). <article-title>Brain-inspired multisensory learning: a systematic review of neuroplasticity and cognitive outcomes in adult multicultural and second language acquisition</article-title>. <source>Biomimetics</source> <volume>10</volume>:<fpage>397</fpage>. doi: <pub-id pub-id-type="doi">10.3390/biomimetics10060397</pub-id>, <pub-id pub-id-type="pmid">40558367</pub-id></mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Goh</surname><given-names>C. C. M.</given-names></name> <name><surname>Aryadoust</surname><given-names>V.</given-names></name></person-group> (<year>2025</year>). <article-title>Developing and assessing second language listening and speaking: does AI make it better?</article-title> <source>Annu. Rev. Appl. Linguist.</source> <volume>45</volume>, <fpage>179</fpage>&#x2013;<lpage>199</lpage>. doi: <pub-id pub-id-type="doi">10.1017/S0267190525100111</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Golonka</surname><given-names>E. M.</given-names></name> <name><surname>Bowles</surname><given-names>A. R.</given-names></name> <name><surname>Frank</surname><given-names>V. M.</given-names></name> <name><surname>Richardson</surname><given-names>D. L.</given-names></name> <name><surname>Freynik</surname><given-names>S.</given-names></name></person-group> (<year>2014</year>). <article-title>Technologies for foreign language learning: a review of technology types and their effectiveness</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>27</volume>, <fpage>70</fpage>&#x2013;<lpage>105</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2012.700315</pub-id></mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Higgins</surname><given-names>J. P. T.</given-names></name> <name><surname>Savovi&#x0107;</surname><given-names>J.</given-names></name> <name><surname>Page</surname><given-names>M. J.</given-names></name> <name><surname>Elbers</surname><given-names>R. G.</given-names></name> <name><surname>Sterne</surname><given-names>J. A. C.</given-names></name></person-group> (<year>2021</year>). <article-title>Assessing risk of bias in a randomized trial</article-title>. In <person-group person-group-type="editor"><name><surname>Higgins</surname><given-names>J. P. T.</given-names></name> <name><surname>Thomas</surname><given-names>J.</given-names></name> <name><surname>Chandler</surname><given-names>J.</given-names></name> <name><surname>Cumpston</surname><given-names>M.</given-names></name> <name><surname>Li</surname><given-names>T.</given-names></name> <name><surname>Page</surname><given-names>M. J.</given-names></name> <etal/></person-group>. (Eds.), <source>Cochrane handbook for systematic reviews of interventions (version 6.2). Cochrane</source>. Available online at: <ext-link xlink:href="https://training.cochrane.org/handbook" ext-link-type="uri">https://training.cochrane.org/handbook</ext-link> (Accessed June 15, 2024)</mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hong</surname><given-names>J. C.</given-names></name> <name><surname>Hwang</surname><given-names>M. Y.</given-names></name> <name><surname>Tai</surname><given-names>K. H.</given-names></name> <name><surname>Lin</surname><given-names>P. H.</given-names></name></person-group> (<year>2021</year>). <article-title>The effects of intrinsic cognitive load and gameplay interest on flow experience reflecting performance progress in a Chinese remote association game</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>34</volume>, <fpage>358</fpage>&#x2013;<lpage>378</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2019.1614068</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huffman</surname><given-names>W. B.</given-names></name> <name><surname>Hahn</surname><given-names>S.</given-names></name></person-group> (<year>2017</year>). <article-title>Investigating optimal memory enhancement procedures in foreign language learning</article-title>. <source>Appl. Cogn. Psychol.</source> <volume>31</volume>, <fpage>539</fpage>&#x2013;<lpage>545</lpage>. doi: <pub-id pub-id-type="doi">10.1002/acp.3351</pub-id></mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hwang</surname><given-names>G. J.</given-names></name> <name><surname>Sung</surname><given-names>H. Y.</given-names></name> <name><surname>Hung</surname><given-names>C. M.</given-names></name> <name><surname>Huang</surname><given-names>I.</given-names></name> <name><surname>Tsai</surname><given-names>C. C.</given-names></name></person-group> (<year>2012</year>). <article-title>Development of a personalized educational computer game based on students&#x2019; learning styles</article-title>. <source>Educ. Technol. Res. Dev.</source> <volume>60</volume>, <fpage>623</fpage>&#x2013;<lpage>638</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11423-012-9241-x</pub-id></mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hwang</surname><given-names>G. J.</given-names></name> <name><surname>Wu</surname><given-names>P. H.</given-names></name> <name><surname>Zhuang</surname><given-names>Y. Y.</given-names></name> <name><surname>Huang</surname><given-names>Y. M.</given-names></name></person-group> (<year>2013</year>). <article-title>Effects of the inquiry-based mobile learning model on the cognitive load and learning achievement of students</article-title>. <source>Interact. Learn. Environ.</source> <volume>21</volume>, <fpage>338</fpage>&#x2013;<lpage>354</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10494820.2011.575789</pub-id></mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ionescu</surname><given-names>T.</given-names></name> <name><surname>Vasc</surname><given-names>D.</given-names></name></person-group> (<year>2014</year>). <article-title>Embodied cognition: challenges for psychology and education</article-title>. <source>Procedia. Soc. Behav. Sci.</source> <volume>128</volume>, <fpage>275</fpage>&#x2013;<lpage>280</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.sbspro.2014.03.156</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname><given-names>D.</given-names></name> <name><surname>Renandya</surname><given-names>W. A.</given-names></name> <name><surname>Zhang</surname><given-names>L. J.</given-names></name></person-group> (<year>2017</year>). <article-title>Evaluating ELT multimedia courseware from the perspective of cognitive theory of multimedia learning</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>30</volume>, <fpage>726</fpage>&#x2013;<lpage>744</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2017.1359187</pub-id></mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname><given-names>D. W.</given-names></name> <name><surname>Johnson</surname><given-names>R. T.</given-names></name> <name><surname>Smith</surname><given-names>K. A.</given-names></name></person-group> (<year>2014</year>). <article-title>Cooperative learning: improving university instruction by basing practice on validated theory</article-title>. <source>J. Excell. Univ. Teach.</source> <volume>25</volume>, <fpage>1</fpage>&#x2013;<lpage>26</lpage>.</mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jones</surname><given-names>A.</given-names></name> <name><surname>Kukulska-Hulme</surname><given-names>A.</given-names></name> <name><surname>Norris</surname><given-names>L.</given-names></name> <name><surname>Gaved</surname><given-names>M.</given-names></name> <name><surname>Scanlon</surname><given-names>E.</given-names></name> <name><surname>Jones</surname><given-names>J.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Supporting immigrant language learning on smartphones: a field trial</article-title>. <source>Stud. Educ. Adults</source> <volume>49</volume>, <fpage>228</fpage>&#x2013;<lpage>252</lpage>. doi: <pub-id pub-id-type="doi">10.1080/02660830.2018.1463655</pub-id></mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kessels</surname><given-names>R. P. C.</given-names></name> <name><surname>van Zandvoort</surname><given-names>M. J. E.</given-names></name> <name><surname>Postma</surname><given-names>A.</given-names></name> <name><surname>Kappelle</surname><given-names>L. J.</given-names></name> <name><surname>de Haan</surname><given-names>E. H. F.</given-names></name></person-group> (<year>2000</year>). <article-title>The Corsi block-tapping task: standardization and normative data</article-title>. <source>Appl. Neuropsychol.</source> <volume>7</volume>, <fpage>252</fpage>&#x2013;<lpage>258</lpage>. doi: <pub-id pub-id-type="doi">10.1207/S15324826AN0704_8</pub-id>, <pub-id pub-id-type="pmid">11296689</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kozan</surname><given-names>K.</given-names></name> <name><surname>Er&#x00E7;etin</surname><given-names>G.</given-names></name> <name><surname>Richardson</surname><given-names>J. C.</given-names></name></person-group> (<year>2015</year>). <article-title>Input modality and working memory: effects on second language text comprehension in a multimedia learning environment</article-title>. <source>System</source> <volume>55</volume>, <fpage>63</fpage>&#x2013;<lpage>73</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.system.2015.09.001</pub-id></mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kukulska-Hulme</surname><given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>Mobile language learning innovation inspired by migrants</article-title>. <source>J. Learn. Dev.</source> <volume>6</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi: <pub-id pub-id-type="doi">10.56059/jl4d.v6i2.349</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname><given-names>Y. H.</given-names></name></person-group> (<year>2021</year>). <article-title>Scaffolding university students' epistemic cognition during multimodal multiple-document reading: the effects of the epistemic prompting and the automated reflection report</article-title>. <source>Internet High. Educ.</source> <volume>49</volume>:<fpage>100777</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.iheduc.2020.100777</pub-id></mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>Working memory and second language writing: a systematic review</article-title>. <source>Stud. Second. Lang. Acquis.</source> <volume>45</volume>:<fpage>647</fpage>. doi: <pub-id pub-id-type="doi">10.1017/S0272263123000189</pub-id></mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname><given-names>V.</given-names></name> <name><surname>Liu</surname><given-names>G. Z.</given-names></name> <name><surname>Chen</surname><given-names>N. S.</given-names></name></person-group> (<year>2022</year>). <article-title>The effects of an augmented-reality ubiquitous writing application: a comparative pilot project for enhancing EFL writing instruction</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>35</volume>, <fpage>989</fpage>&#x2013;<lpage>1030</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2020.1770291</pub-id></mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Lobin</surname><given-names>H.</given-names></name> <name><surname>R&#x00F6;sler</surname><given-names>D.</given-names></name></person-group> (<year>2012</year>). &#x201C;<article-title>Tutoring systems and computer-assisted language learning (CALL)</article-title>&#x201D; in <source>Handbook of technical communication</source> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Walter de Gruyter</publisher-name>), <fpage>571</fpage>&#x2013;<lpage>589</lpage>.</mixed-citation></ref>
<ref id="ref35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lusk</surname><given-names>D. L.</given-names></name> <name><surname>Evans</surname><given-names>A. D.</given-names></name> <name><surname>Jeffrey</surname><given-names>T. R.</given-names></name> <name><surname>Palmer</surname><given-names>K. R.</given-names></name> <name><surname>Wikstrom</surname><given-names>C. S.</given-names></name> <name><surname>Doolittle</surname><given-names>P. E.</given-names></name></person-group> (<year>2009</year>). <article-title>Multimedia learning and individual differences: mediating the effects of working memory capacity with segmentation</article-title>. <source>Br. J. Educ. Technol.</source> <volume>40</volume>, <fpage>636</fpage>&#x2013;<lpage>651</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1467-8535.2008.00848.x</pub-id></mixed-citation></ref>
<ref id="ref36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Makransky</surname><given-names>G.</given-names></name> <name><surname>Thisgaard</surname><given-names>M. W.</given-names></name> <name><surname>Gadegaard</surname><given-names>H.</given-names></name></person-group> (<year>2016</year>). <article-title>Virtual simulations as preparation for lab exercises: assessing learning of key laboratory skills in microbiology and improvement of essential non-cognitive skills</article-title>. <source>PLoS One</source> <volume>11</volume>:<fpage>e0155895</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0155895</pub-id>, <pub-id pub-id-type="pmid">27253395</pub-id></mixed-citation></ref>
<ref id="ref37"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Mayer</surname><given-names>R. E.</given-names></name></person-group> (<year>2014</year>). &#x201C;<article-title>Introduction to multimedia learning</article-title>&#x201D; in <source>The Cambridge handbook of multimedia learning</source>. ed. <person-group person-group-type="editor"><name><surname>Mayer</surname><given-names>R. E.</given-names></name></person-group> (<publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>24</lpage>.</mixed-citation></ref>
<ref id="ref38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mutlu-Bayraktar</surname><given-names>D.</given-names></name> <name><surname>Cosgun</surname><given-names>V.</given-names></name> <name><surname>Altan</surname><given-names>T.</given-names></name></person-group> (<year>2019</year>). <article-title>Cognitive load in multimedia learning environments: a systematic review</article-title>. <source>Comput. Educ.</source> <volume>141</volume>:<fpage>103618</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compedu.2019.103618</pub-id></mixed-citation></ref>
<ref id="ref39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Page</surname><given-names>M. J.</given-names></name> <name><surname>McKenzie</surname><given-names>J. E.</given-names></name> <name><surname>Bossuyt</surname><given-names>P. M.</given-names></name> <name><surname>Boutron</surname><given-names>I.</given-names></name> <name><surname>Hoffmann</surname><given-names>T. C.</given-names></name> <name><surname>Mulrow</surname><given-names>C. D.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title>. <source>BMJ</source> <volume>372</volume>:<fpage>n71</fpage>. doi: <pub-id pub-id-type="doi">10.1136/bmj.n71</pub-id></mixed-citation></ref>
<ref id="ref40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Parmaxi</surname><given-names>A.</given-names></name> <name><surname>Zaphiris</surname><given-names>P.</given-names></name></person-group> (<year>2017</year>). <article-title>Web 2.0 in computer-assisted language learning: a research synthesis and implications for instructional design and educational practice</article-title>. <source>Interact. Learn. Environ.</source> <volume>25</volume>, <fpage>704</fpage>&#x2013;<lpage>716</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10494820.2016.1172243</pub-id></mixed-citation></ref>
<ref id="ref41"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Pluye</surname><given-names>P.</given-names></name> <name><surname>Robert</surname><given-names>E.</given-names></name> <name><surname>Cargo</surname><given-names>M.</given-names></name> <name><surname>Bartlett</surname><given-names>G.</given-names></name> <name><surname>O&#x2019;Cathain</surname><given-names>A.</given-names></name> <name><surname>Griffiths</surname><given-names>F.</given-names></name> <etal/></person-group>. <year>2011</year> <source>Mixed methods appraisal tool (MMAT), version 2011. Montreal: User guide Canadian Intellectual Property Office, Industry Canada</source></mixed-citation></ref>
<ref id="ref42"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sana</surname><given-names>F.</given-names></name> <name><surname>Fenesi</surname><given-names>B.</given-names></name></person-group> (<year>2025</year>). <article-title>Working memory and instructional fit: reintroducing aptitude&#x2013;treatment interaction in education research</article-title>. <source>Behav. Sci.</source> <volume>15</volume>:<fpage>765</fpage>. doi: <pub-id pub-id-type="doi">10.3390/bs15060765</pub-id>, <pub-id pub-id-type="pmid">40564547</pub-id></mixed-citation></ref>
<ref id="ref43"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Scharinger</surname><given-names>C.</given-names></name> <name><surname>Prislan</surname><given-names>L.</given-names></name> <name><surname>Bernecker</surname><given-names>K.</given-names></name> <name><surname>Ninaus</surname><given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Gamification of an n-back working memory task&#x2013;is it worth the effort? An EEG and eye-tracking study</article-title>. <source>Biol. Psychol.</source> <volume>179</volume>:<fpage>108545</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.biopsycho.2023.108545</pub-id>, <pub-id pub-id-type="pmid">36965785</pub-id></mixed-citation></ref>
<ref id="ref44"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schumacher</surname><given-names>C.</given-names></name> <name><surname>Ifenthaler</surname><given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>The importance of students&#x2019; motivational dispositions for designing learning analytics</article-title>. <source>J. Comput. High. Educ.</source> <volume>30</volume>, <fpage>599</fpage>&#x2013;<lpage>619</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12528-018-9188-y</pub-id></mixed-citation></ref>
<ref id="ref45"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Silva Barbosa</surname><given-names>J. F.</given-names></name> <name><surname>Chalco Challco</surname><given-names>G.</given-names></name> <name><surname>Bittencourt</surname><given-names>I. I.</given-names></name></person-group> (<year>2023</year>). <article-title>Does gender-stereotyped gamification increase negative thinking? Results from an experimental study with logic tutoring systems</article-title>. <source>Interact. Learn. Environ.</source> <volume>32</volume>, <fpage>5632</fpage>&#x2013;<lpage>5659</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10494820.2023.2229145</pub-id></mixed-citation></ref>
<ref id="ref46"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Squires</surname><given-names>D. R.</given-names></name></person-group> (<year>2017</year>). <article-title>Working memory &#x0026; augmented reality's trajectory: a literature review of AR in education, online learning, workforce training, and working memory research</article-title>. <source>J. Educ. Technol.</source> <volume>14</volume>, <fpage>55</fpage>&#x2013;<lpage>63</lpage>.</mixed-citation></ref>
<ref id="ref47"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Stakanova</surname><given-names>E.</given-names></name></person-group> (<year>2023</year>). &#x201C;<article-title>Exploring bilingualism: tackling working memory in meaning-making instruction</article-title>&#x201D; in <source>Complex social Systems in Dynamic Environments: Advanced theories, innovative methods, and interdisciplinary research results</source>. ed. <person-group person-group-type="editor"><name><surname>Maximova</surname><given-names>S. G.</given-names></name></person-group> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>), <fpage>219</fpage>&#x2013;<lpage>237</lpage>.</mixed-citation></ref>
<ref id="ref48"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sterne</surname><given-names>J. A. C.</given-names></name> <name><surname>Hern&#x00E1;n</surname><given-names>M. A.</given-names></name> <name><surname>Reeves</surname><given-names>B. C.</given-names></name> <name><surname>Savovi&#x0107;</surname><given-names>J.</given-names></name> <name><surname>Berkman</surname><given-names>N. D.</given-names></name> <name><surname>Viswanathan</surname><given-names>M.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>ROBINS-I: a tool for assessing risk of bias in non-randomised studies of interventions</article-title>. <source>BMJ</source> <volume>355</volume>:<fpage>i4919</fpage>. doi: <pub-id pub-id-type="doi">10.1136/bmj.i4919</pub-id>, <pub-id pub-id-type="pmid">27733354</pub-id></mixed-citation></ref>
<ref id="ref49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sweller</surname><given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>Cognitive load theory and educational technology</article-title>. <source>Educ. Technol. Res. Dev.</source> <volume>68</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11423-019-09701-3</pub-id></mixed-citation></ref>
<ref id="ref50"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sweller</surname><given-names>J.</given-names></name> <name><surname>van Merri&#x00EB;nboer</surname><given-names>J. J.</given-names></name> <name><surname>Paas</surname><given-names>F.</given-names></name></person-group> (<year>2019</year>). <article-title>Cognitive architecture and instructional design: 20 years later</article-title>. <source>Educ. Psychol. Rev.</source> <volume>31</volume>, <fpage>261</fpage>&#x2013;<lpage>292</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10648-019-09465-5</pub-id></mixed-citation></ref>
<ref id="ref51"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Teng</surname><given-names>F.</given-names></name></person-group> (<year>2019</year>). <article-title>Maximizing the potential of captions for primary school ESL students&#x2019; comprehension of English-language videos</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>32</volume>, <fpage>665</fpage>&#x2013;<lpage>691</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2018.1532912</pub-id></mixed-citation></ref>
<ref id="ref52"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Teng</surname><given-names>M. F.</given-names></name></person-group> (<year>2023</year>). <article-title>Effectiveness of captioned videos for incidental vocabulary learning and retention: the role of working memory</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>2023</volume>, <fpage>1</fpage>&#x2013;<lpage>29</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2023.2173613</pub-id></mixed-citation></ref>
<ref id="ref53"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Teng</surname><given-names>M. F.</given-names></name></person-group> (<year>2024</year>). <article-title>Incidental vocabulary learning from captioned video genres: proficiency, working memory, and aptitude</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>517</volume>, <fpage>1</fpage>&#x2013;<lpage>43</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2024.2421517</pub-id></mixed-citation></ref>
<ref id="ref54"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>T&#x00FC;rk</surname><given-names>E.</given-names></name> <name><surname>Er&#x00E7;etin</surname><given-names>G.</given-names></name></person-group> (<year>2014</year>). <article-title>Effects of interactive versus simultaneous display of multimedia glosses on L2 reading comprehension and incidental vocabulary learning</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>27</volume>, <fpage>1</fpage>&#x2013;<lpage>25</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2012.692384</pub-id></mixed-citation></ref>
<ref id="ref55"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Unsworth</surname><given-names>N.</given-names></name> <name><surname>Engle</surname><given-names>R. W.</given-names></name></person-group> (<year>2007</year>). <article-title>The nature of individual differences in working memory capacity: active maintenance in primary memory and controlled search from secondary memory</article-title>. <source>Psychol. Rev.</source> <volume>114</volume>, <fpage>104</fpage>&#x2013;<lpage>132</lpage>. doi: <pub-id pub-id-type="doi">10.1037/0033-295X.114.1.104</pub-id>, <pub-id pub-id-type="pmid">17227183</pub-id></mixed-citation></ref>
<ref id="ref56"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Unsworth</surname><given-names>N.</given-names></name> <name><surname>Heitz</surname><given-names>R. P.</given-names></name> <name><surname>Schrock</surname><given-names>J. C.</given-names></name> <name><surname>Engle</surname><given-names>R. W.</given-names></name></person-group> (<year>2005</year>). <article-title>An automated version of the operation span task</article-title>. <source>Behav. Res. Methods</source> <volume>37</volume>, <fpage>498</fpage>&#x2013;<lpage>505</lpage>. doi: <pub-id pub-id-type="doi">10.3758/BF03195718</pub-id>, <pub-id pub-id-type="pmid">16405146</pub-id></mixed-citation></ref>
<ref id="ref57"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Varol</surname><given-names>B.</given-names></name> <name><surname>Er&#x00E7;etin</surname><given-names>G.</given-names></name></person-group> (<year>2021</year>). <article-title>Effects of gloss type, gloss position, and working memory capacity on second language comprehension in electronic reading</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>34</volume>, <fpage>820</fpage>&#x2013;<lpage>844</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2019.1643738</pub-id></mixed-citation></ref>
<ref id="ref58"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Weissheimer</surname><given-names>J.</given-names></name> <name><surname>de Souza</surname><given-names>J. G. M.</given-names></name> <name><surname>Antunes</surname><given-names>J. P. L.</given-names></name> <name><surname>de Souza Filho</surname><given-names>N. S.</given-names></name></person-group> (<year>2019</year>). <article-title>Gamification and L2 vocabulary learning: the Vocabox experience in the languages without borders program</article-title>. <source>Rev. Linguagem Ensino</source> <volume>22</volume>, <fpage>1136</fpage>&#x2013;<lpage>1154</lpage>. doi: <pub-id pub-id-type="doi">10.15210/rle.v22i4.16453</pub-id></mixed-citation></ref>
<ref id="ref59"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xiao</surname><given-names>Q.</given-names></name> <name><surname>Shi</surname><given-names>Y.</given-names></name> <name><surname>Cheng</surname><given-names>D.</given-names></name></person-group> (<year>2025</year>). <article-title>The domain-general and domain-specific cognitive profiles in high and low-achieving Chinese L2 learners</article-title>. <source>Front. Psychol.</source> <volume>16</volume>:<fpage>1577986</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyg.2025.1577986</pub-id>, <pub-id pub-id-type="pmid">41143012</pub-id></mixed-citation></ref>
<ref id="ref60"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname><given-names>W.</given-names></name></person-group> (<year>2025</year>). <article-title>Personality, intelligence, and second language learning success: a systematic review</article-title>. <source>Behav. Sci.</source> <volume>15</volume>:<fpage>428</fpage>. doi: <pub-id pub-id-type="doi">10.3390/bs15040428</pub-id>, <pub-id pub-id-type="pmid">40282050</pub-id></mixed-citation></ref>
<ref id="ref61"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname><given-names>C.</given-names></name> <name><surname>Xia</surname><given-names>J.</given-names></name></person-group> (<year>2021</year>). <article-title>Scaffolding process knowledge in L2 writing development: insights from computer keystroke log and process graph</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>34</volume>, <fpage>583</fpage>&#x2013;<lpage>608</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2019.1632901</pub-id></mixed-citation></ref>
<ref id="ref62"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname><given-names>G.</given-names></name> <name><surname>Yu</surname><given-names>A.</given-names></name> <name><surname>Liu</surname><given-names>L.</given-names></name></person-group> (<year>2025</year>). <article-title>A meta-analysis examining AI-assisted L2 learning</article-title>. <source>Int. Rev. Appl. Linguist. Lang. Teach.</source> doi: <pub-id pub-id-type="doi">10.1515/iral-2024-0213</pub-id></mixed-citation></ref>
<ref id="ref63"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname><given-names>J.</given-names></name> <name><surname>Thomas</surname><given-names>M. S.</given-names></name> <name><surname>Qi</surname><given-names>X.</given-names></name> <name><surname>Liu</surname><given-names>X.</given-names></name></person-group> (<year>2019</year>). <article-title>Using an ANN-based computational model to simulate and evaluate Chinese students&#x2019; individualized cognitive abilities important in their English acquisition</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>32</volume>, <fpage>366</fpage>&#x2013;<lpage>397</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2018.1517125</pub-id></mixed-citation></ref>
<ref id="ref64"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname><given-names>M.</given-names></name></person-group> (<year>2025</year>). <article-title>Optimizing EFL vocabulary acquisition: a randomized controlled mixed-methods investigation of artificial intelligence-driven incidental, contextual, and multimodal strategies</article-title>. <source>Educ. Inf. Technol.</source> <volume>30</volume>, <fpage>1</fpage>&#x2013;<lpage>45</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10639-025-13803-2</pub-id></mixed-citation></ref>
<ref id="ref65"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yuan</surname><given-names>H.</given-names></name></person-group> (<year>2025</year>). <article-title>Artificial intelligence in language learning: biometric feedback and adaptive reading for improved comprehension and reduced anxiety</article-title>. <source>Humanit. Soc. Sci. Commun.</source> <volume>12</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. doi: <pub-id pub-id-type="doi">10.1057/s41599-025-04878-w</pub-id></mixed-citation></ref>
<ref id="ref66"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>L.</given-names></name> <name><surname>Aubrey</surname><given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>The role of individual differences in second language pragmatics: a systematic review</article-title>. <source>Int. J. Appl. Linguist.</source> <volume>34</volume>, <fpage>1316</fpage>&#x2013;<lpage>1334</lpage>. doi: <pub-id pub-id-type="doi">10.1111/ijal.12573</pub-id></mixed-citation></ref>
<ref id="ref67"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhonggen</surname><given-names>Y.</given-names></name> <name><surname>Ying</surname><given-names>Z.</given-names></name> <name><surname>Zhichun</surname><given-names>Y.</given-names></name> <name><surname>Wentao</surname><given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Student satisfaction, learning outcomes, and cognitive loads with a mobile learning platform</article-title>. <source>Comput. Assist. Lang. Learn.</source> <volume>32</volume>, <fpage>323</fpage>&#x2013;<lpage>341</lpage>. doi: <pub-id pub-id-type="doi">10.1080/09588221.2018.1517093</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/16306/overview">Jesus de la Fuente</ext-link>, University of Navarra, Spain</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3122000/overview">Ahmadreza Mohebbi</ext-link>, The University of Auckland, New Zealand</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3311799/overview">Adel Shaban</ext-link>, Middlesbrough College, United Kingdom</p>
</fn>
</fn-group>
</back>
</article>