<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Lang. Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Language Sciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Lang. Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2813-4605</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/flang.2026.1763160</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Auditory-perceptual acuity impacts prosodic boundary prediction in a gating task</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Hofmann</surname> <given-names>Andrea</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/1393110"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Tuomainen</surname> <given-names>Outi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<uri xlink:href="https://loop.frontiersin.org/people/169102"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Hanne</surname> <given-names>Sandra</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<uri xlink:href="https://loop.frontiersin.org/people/3101903"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Ver&#x000ED;ssimo</surname> <given-names>Jo&#x000E3;o</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/1802083"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Wartenburger</surname> <given-names>Isabell</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<uri xlink:href="https://loop.frontiersin.org/people/2704"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Linguistics</institution>, <city>Cognitive Sciences</city>, <city>University of Potsdam</city>, <city>Potsdam</city>, <country country="de">Germany</country></aff>
<aff id="aff2"><label>2</label><institution>Center of Linguistics</institution>, <city>School of Arts and Humanities</city>, <city>University of Lisbon</city>, <city>Lisbon</city>, <country country="pt">Portugal</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Andrea Hofmann, <email xlink:href="mailto:andhofma@uni-potsdam.de">andhofma@uni-potsdam.de</email></corresp>
<fn fn-type="equal" id="fn001"><label>&#x02020;</label><p>These authors have contributed equally to this work and share senior authorship</p></fn></author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-02">
<day>02</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>5</volume>
<elocation-id>1763160</elocation-id>
<history>
<date date-type="received">
<day>08</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>17</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Hofmann, Tuomainen, Hanne, Ver&#x000ED;ssimo and Wartenburger.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Hofmann, Tuomainen, Hanne, Ver&#x000ED;ssimo and Wartenburger</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-02">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Processing of prosodic phrasing requires listeners to integrate acoustic cues that unfold incrementally during speech comprehension, yet substantial individual differences exist in how listeners use unfolding prosodic information. This study investigated whether individual differences in auditory-perceptual discrimination abilities for prosodic boundary cues are related to processing of prosodic phrasing, and, more specifically, the ability to use the incremental bottom-up prosodic information for making top-down predictions about the syntactic structure of an unfolding utterance. Sixty German-speaking adults completed adaptive staircase procedures measuring Just-Noticeable-Difference thresholds for auditory-perceptual acuity in pitch, pause, and final lengthening discrimination. In addition, they performed a gating task that provided snippets of coordinate three-name sequences with or without an internal prosodic boundary in a randomized order. Performance in the gating task was analyzed using Bayesian multilevel Signal Detection Theory models to separate discriminability from response bias. Participants with higher auditory-perceptual acuity demonstrated better prediction of the upcoming structure across all gates. When all three auditory-perceptual acuity measures were modeled simultaneously, each individual effect attenuated substantially, indicating shared, rather than independent, predictive variance. These findings suggest that top-down prediction during speech comprehension is related to overall auditory-perceptual acuity rather than independent boundary-cue-specific sensitivities.</p></abstract>
<kwd-group>
<kwd>auditory-perceptual acuity</kwd>
<kwd>final lengthening</kwd>
<kwd>gating paradigm</kwd>
<kwd>just-noticeable difference</kwd>
<kwd>pause</kwd>
<kwd>pitch</kwd>
<kwd>prosodic boundary</kwd>
<kwd>prosodic boundary cue</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>Deutsche Forschungsgemeinschaft</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100001659</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This original research was funded by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation)-Project-ID 317633480-SFB 1287. Jo&#x000E3;o Ver&#x000ED;ssimo has been funded by the Funda&#x000E7;&#x000E3;o para a Ci&#x000EA;ncia e a Tecnologia (FCT, Foundation for Science and Technology), grant UID/214/2025 to the Center of Linguistics of the University of Lisbon.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="64"/>
<page-count count="17"/>
<word-count count="12325"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Psycholinguistics</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="introduction" id="s1">
<title>Introduction</title>
<p>Prosodic phrasing refers to the organization of pitch, timing, rhythm, and intensity that accompanies spoken language. It guides listeners through an utterance by marking syntactic structure, signaling discourse relations, highlighting information structure, and conveying pragmatic and/or emotional meaning (e.g., <xref ref-type="bibr" rid="B13">Cole, 2015</xref>; <xref ref-type="bibr" rid="B20">Frazier et al., 2006</xref>; <xref ref-type="bibr" rid="B61">Wagner and Watson, 2010</xref>). A central function of prosodic phrasing is the segmentation of continuous speech into hierarchically organized units, enabling listeners to map the acoustic stream onto syntactic and semantic structure during comprehension (e.g., <xref ref-type="bibr" rid="B11">Clifton et al., 2002</xref>; <xref ref-type="bibr" rid="B20">Frazier et al., 2006</xref>).</p>
<p>Consider the sentences: &#x0201C;<italic>What&#x00027;s that ahead in the road?&#x0201D;</italic> and &#x0201C;<italic>What&#x00027;s that, a HEAD in the ROAD?&#x0201D;</italic> (<xref ref-type="bibr" rid="B37">Kjelgaard and Speer, 1999</xref>, p. 153), which contain identical segmental content. What distinguishes their meanings is the prosodic structure, specifically, where prosodic phrase boundaries occur and which words receive prominence. In the first version, continuous phrasing groups <italic>ahead in the road</italic> into one constituent, yielding the interpretation of something visible on the road ahead. In the second version, a boundary after <italic>that</italic> with nuclear accents on <italic>head</italic> and <italic>road</italic> signals a different syntactic organization, prompting listeners to interpret <italic>a head in the road</italic> as a noun phrase. This example illustrates how prosodic boundaries resolve structural ambiguities that cannot be disambiguated from segmental information alone (e.g., <xref ref-type="bibr" rid="B20">Frazier et al., 2006</xref>).</p>
<p>Listeners construct prosodic phrasing through real-time integration of incremental acoustic information with higher-level linguistic knowledge. This process involves both bottom-up processing&#x02014;the extraction of acoustic cues from the speech signal&#x02014;and top-down prediction&#x02014;the use of syntactic, semantic, and pragmatic expectations to anticipate upcoming prosodic structure (e.g., <xref ref-type="bibr" rid="B14">Cole et al., 2010</xref>; <xref ref-type="bibr" rid="B19">Ferreira and Karimi, 2015</xref>; <xref ref-type="bibr" rid="B34">Ji et al., 2024</xref>; <xref ref-type="bibr" rid="B61">Wagner and Watson, 2010</xref>). Understanding how listeners balance these two sources of information, and why individuals differ in this ability, remains a central question in prosodic processing research.</p>
<sec>
<title>Acoustic cues to prosodic boundaries</title>
<p>Listeners rely primarily on three acoustic cues to identify prosodic phrase boundaries: Pitch movements, silent pauses, and pre-boundary lengthening (e.g., <xref ref-type="bibr" rid="B13">Cole, 2015</xref>; <xref ref-type="bibr" rid="B36">Kentner and F&#x000E9;ry, 2013</xref>; <xref ref-type="bibr" rid="B47">Petrone et al., 2017</xref>). Pitch movements (e.g., final rises, falls, or pitch reset at phrase onset) provide the main spectral cues, while silent pauses and lengthening of vowels or syllables preceding a boundary (final lengthening) serve as major temporal cues (e.g., <xref ref-type="bibr" rid="B50">Schub&#x000F6; et al., 2023</xref>; <xref ref-type="bibr" rid="B56">Tyler and Cutler, 2009</xref>). These three prosodic boundary cues constitute the most robust and extensively studied boundary markers in German and are known to support reliable boundary perception and structural disambiguation (e.g., <xref ref-type="bibr" rid="B27">Hansen et al., 2023</xref>; <xref ref-type="bibr" rid="B36">Kentner and F&#x000E9;ry, 2013</xref>; <xref ref-type="bibr" rid="B47">Petrone et al., 2017</xref>; <xref ref-type="bibr" rid="B50">Schub&#x000F6; et al., 2023</xref>). Beyond these three prosodic boundary cues, prosodic boundaries can be marked by other acoustic and articulatory cues. These include intensity changes (e.g., <xref ref-type="bibr" rid="B38">Kochanski et al., 2005</xref>), segmental lengthening patterns beyond phrase-final position (e.g., <xref ref-type="bibr" rid="B8">Byrd and Saltzman, 2003</xref>), voice quality modulations (e.g., <xref ref-type="bibr" rid="B25">Gonz&#x000E1;lez et al., 2022</xref>), and domain-initial strengthening of post-boundary segments (e.g., <xref ref-type="bibr" rid="B10">Cho and Keating, 2009</xref>).</p>
<p>Across languages, pitch consistently signals prosodic phrasing, but generally carries less perceptual weight than temporal cues (e.g., <xref ref-type="bibr" rid="B15">Collier et al., 1993</xref>; <xref ref-type="bibr" rid="B22">Ganga et al., 2024</xref>; <xref ref-type="bibr" rid="B42">Lin and Fon, 2010</xref>). This asymmetry reflects the inherently greater salience of abrupt temporal events&#x02014;such as silence or marked segmental slowing&#x02014;compared to more gradually unfolding pitch movements. Although pitch is perceptually less dominant for marking local phrase boundaries, it serves many broader functions; pitch rises, among other things, guide listeners&#x00027; attention and facilitate prosodic chunking or memory processes (<xref ref-type="bibr" rid="B41">Lialiou et al., 2024</xref>). In contrast, silent pauses serve as highly reliable markers of local boundaries and often trigger categorical boundary judgments (e.g., <xref ref-type="bibr" rid="B43">M&#x000E4;nnel and Friederici, 2016</xref>; <xref ref-type="bibr" rid="B47">Petrone et al., 2017</xref>; <xref ref-type="bibr" rid="B63">Yang et al., 2014</xref>). Final lengthening on its own usually lacks sufficient perceptual salience to trigger boundary perception, but functions most effectively when accompanied by pitch and/or pause cues (e.g., <xref ref-type="bibr" rid="B22">Ganga et al., 2024</xref>; <xref ref-type="bibr" rid="B50">Schub&#x000F6; et al., 2023</xref>).</p>
<p>Rather than treating prosodic boundary cues in isolation, listeners integrate them incrementally in a non-additive way: When a pause is present, additional cues add little perceptual benefit, but when a pause is absent, pitch and pre-boundary lengthening become more informative (e.g., <xref ref-type="bibr" rid="B63">Yang et al., 2014</xref>). In German, pitch change and pre-boundary lengthening must co-occur to elicit robust boundary detection (e.g., <xref ref-type="bibr" rid="B31">Holzgrefe-Lang et al., 2016</xref>). Prosodic boundary cues also interact perceptually: Rising pitch increases the perceived duration of a syllable compared to level pitch, even when the objective duration of the syllable is constant. Additionally, the pitch contour of syllables flanking a silent interval modulates the perceived pause duration (also known as the auditory kappa effect <xref ref-type="bibr" rid="B12">Cohen et al., 1953</xref>). When the distance between pre-pause and post-pause pitch is greater (e.g., pre-boundary rise followed by low post-boundary pitch), the intervening silence is perceived as longer (<xref ref-type="bibr" rid="B5">Brugos and Barnes, 2014</xref>). Such interactions demonstrate that spectral and temporal cues jointly shape prosodic boundary perception through dynamic spectrotemporal integration.</p></sec>
<sec>
<title>Boundary perception in coordinate structures</title>
<p>In the present study, we investigate how listeners use prosodic boundary cues in coordinate name sequences, where prosodic information determines syntactic grouping. Consider the question &#x0201C;Who is arriving at the station?&#x0201D; The answer could be:</p>
<list list-type="simple">
<list-item><p>(1) <italic>[Moni und Lilli] &#x00023; und Manu</italic> (<italic>grouped</italic> condition with internal grouping; und = and)</p></list-item>
<list-item><p>or</p></list-item>
<list-item><p>(2) <italic>Moni und Lilli und Manu</italic> (<italic>ungrouped</italic> condition without internal grouping; und = and).</p></list-item>
</list>
<p>In (1), a prosodic boundary after the second name (marked here with &#x00023;, see also <xref ref-type="bibr" rid="B27">Hansen et al., 2023</xref>) creates an internal grouping, indicating that <italic>Moni</italic> and <italic>Lilli</italic> arrive together while <italic>Manu</italic> arrives separately. In (2), the absence of such a boundary implies that all three arrive together. Crucially, only the presence or absence of a prosodic boundary disambiguates the intended grouping while the segmental content is identical in both cases.</p>
<p>Coordinate name sequences offer an ideal testing ground for studying prosodic phrasing because they exhibit systematic acoustic variation tied to syntactic grouping but contain identical segmental content, that is, the syntactic structure is solely conveyed by means of prosody. <xref ref-type="bibr" rid="B36">Kentner and F&#x000E9;ry&#x00027;s (2013)</xref> Proximity/Similarity model formalizes how prosodic boundaries are distributed in such structures. The model predicts that in <italic>grouped</italic> sequences like <italic>[Name1 und Name2] &#x00023; und Name3</italic>, the Proximity principle weakens internal boundaries within the syntactic group (at or after <italic>Name1</italic>), while the Anti-Proximity principle strengthens boundaries at group edges (after <italic>Name2</italic>). Consequently, the <italic>grouped</italic> structure should exhibit a stronger prosodic boundary after <italic>Name2</italic> compared to the <italic>ungrouped</italic> structure (2) <italic>Name1 und Name2 und Name3</italic>, which lacks internal grouping. Empirical studies confirm that speakers mark grouping through systematic modulation of pitch, pause, and final lengthening, and that listeners reliably recover grouping from these prosodic boundary cues (e.g., <xref ref-type="bibr" rid="B22">Ganga et al., 2024</xref>; <xref ref-type="bibr" rid="B32">Huttenlauch et al., 2021</xref>; <xref ref-type="bibr" rid="B50">Schub&#x000F6; et al., 2023</xref>).</p>
<p>The properties mentioned above make coordinate structures valuable for examining the interplay between bottom-up and top-down processing in prosodic boundary perception. The question is not merely which boundary cues listeners use, but how much bottom-up prosodic information listeners need before they can generate reliable top-down predictions about the upcoming syntactic structure, specifically, whether the utterance belongs to a <italic>grouped</italic> (1) or <italic>ungrouped</italic> structure (2).</p>
<p><xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> addressed this question and examined how much prosodic information is necessary to predict the upcoming syntactic structure using a gating paradigm (see <xref ref-type="bibr" rid="B26">Grosjean, 1980</xref>). In this paradigm, listeners hear segments of an utterance which get progressively longer (&#x0201C;gates&#x0201D;), and each successive &#x0201C;gate&#x0201D; reveals more acoustic information. <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> presented syllable snippets of German coordinate name sequences, starting with <italic>Mo</italic> (Gate 1), progressing to <italic>Moni</italic> (Gate 2), then <italic>Moni und</italic> (Gate 3), <italic>Moni und Li</italic> (Gate 4)&#x02026;, through to the complete sequence <italic>Moni und Lilli und Manu</italic> (Gate 7). After each gate, listeners decided whether they predicted a <italic>grouped</italic> structure &#x0201C;<italic>[Moni und Lilli] und Manu&#x0201D;</italic> (example (1)) or an <italic>ungrouped</italic> structure &#x0201C;<italic>Moni und Lilli und Manu&#x0201D;</italic> (example (2)). The central question was whether listeners could exploit early, subtle bottom-up prosodic boundary cues to make top-down predictions about grouping before encountering the critical boundary after <italic>Name2</italic> itself, as reflected in the accuracy of their grouping decisions at each gate, and whether listeners differed in their predictive abilities.</p>
<p>Across participants, overall accuracy reached near ceiling when participants listened to <italic>Moni und Lilli</italic> (<italic>Name1 and Name2</italic>, Gate 5). However, based on the timing and stability of listeners&#x00027; responses across gates, <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> identified two distinct listener subgroups: Approximately 60% of participants seemed to update their grouping predictions as more and more prosodic information became available already from Gate 2 (&#x0201C;identification group&#x0201D;). In contrast, the remaining participants showed a &#x0201C;waiting&#x0201D; pattern, maintaining consistent responses until later gates (e.g., Gate 5) when clear boundary evidence had accumulated (&#x0201C;waiting group&#x0201D;). Both groups ultimately achieved similarly high accuracy, indicating that the &#x0201C;waiting group&#x0201D; either had reduced perceptual abilities and/or they employed a differential processing strategy. Because <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> always presented the gates in fixed ascending order (always starting from Gate 1 and progressing forward), their design could not determine whether this &#x0201C;waiting&#x0201D; pattern reflected genuine auditory-perceptual limitations or a deliberate strategic choice. Listeners who appeared to &#x0201C;wait&#x0201D; might have been unable to detect subtle early prosodic boundary cues, but they could just as well have been applying a conservative decision strategy, waiting for stronger evidence before committing to a prediction.</p>
<p>The present study therefore sought to disentangle these possibilities by examining whether individual differences in bottom-up auditory-perceptual acuity&#x02014;the ability to detect subtle prosodic differences&#x02014;explain why some listeners can generate reliable top-down predictions earlier than others.</p></sec>
<sec>
<title>The present study: aims and hypotheses</title>
<p>Since prosodic processing relies on the incremental detection of subtle prosodic boundary cues, such as pitch, pause, and final lengthening, differences in auditory-perceptual discrimination abilities (auditory-perceptual acuity) for these cues may be the critical factor driving the individual differences in the timing of correct boundary predictions observed by <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref>. Listeners with finer perceptual resolution for the respective acoustic dimensions should be better equipped to exploit early, subtle prosodic boundary cues for predicting upcoming syntactic structure. The present study investigates this hypothesis by directly linking bottom-up perceptual abilities to top-down boundary prediction performance in a gated paradigm.</p>
<p>We measured perceptual thresholds using Just-Noticeable-Difference (JND) tasks for pitch, pause, and final lengthening discrimination, providing an index of listeners&#x00027; perceptual resolution, more specifically, how small a change in each prosodic boundary cue dimension they can reliably detect. To test boundary prediction, we adapted <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref>&#x00027;s gating paradigm to investigate inter-individual differences in using bottom-up prosodic information for top-down predictive processes. Unlike <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref>&#x00027;s ascending presentation of successive gates, gates were presented in randomized order with the goal of reducing strategic effects such as deliberate withholding of responses. Instead, our experimental paradigm required participants to base each judgment solely on the acoustic evidence available at a certain gate.</p>
<p>We asked the following research question: How does auditory-perceptual acuity for prosodic boundary cues relate to the ability to predict syntactic structure (<italic>grouped</italic> vs. <italic>ungrouped</italic>) from partial prosodic information in gated speech?</p>
<p>We hypothesized that participants with higher auditory-perceptual acuity (as indicated by lower JND thresholds) for pitch, final lengthening, and pause would demonstrate an enhanced, that is, earlier prediction of the upcoming syntactic structure compared to participants with lower auditory-perceptual acuity (reflected in higher JND thresholds). This auditory-perceptual acuity advantage should be evident across gates, but particularly pronounced at early gates (Gates 2-4) where prosodic information is more subtle as compared to later Gates 5 and 7. Specifically, effects should be detectable already at Gate 2 (<italic>Name1</italic> only), because listeners who are able to discriminate smaller acoustic differences should be able to use even subtle prosodic information for their predictions.</p>
<p>To address limitations of accuracy measures specified in our preregistration (<ext-link ext-link-type="uri" xlink:href="https://osf.io/dgu7v">https://osf.io/dgu7v</ext-link>), we adopted a Signal Detection Theory framework to separate two distinct processes: Discriminability&#x02014;how well participants detect acoustic differences between <italic>grouped</italic> and <italic>ungrouped</italic> structures, and response bias&#x02014;their general tendency to predict one structure over another, regardless of acoustic evidence (e.g., <xref ref-type="bibr" rid="B28">Hautus et al., 2021</xref>; <xref ref-type="bibr" rid="B64">Zloteanu and Vuorre, 2024</xref>). This distinction is essential for our research question. If better auditory-perceptual acuity enables early exploitation of prosodic boundary cues, it should manifest primarily as enhanced discriminability. In other words, listeners with better auditory-perceptual acuity should be able to detect subtle prosodic differences that signal upcoming structure. However, analyzing performance using only accuracy conflates discriminability with response bias, obscuring whether better performance reflects improved acoustic sensitivity or simply response preferences. By modeling these components separately within a unified regression framework, we can test whether auditory-perceptual acuity specifically enhances sensitivity to prosodic boundary cues at early gates.</p>
<p>We examined the relation of bottom-up auditory-perceptual acuity and top-down prediction of the upcoming syntactic structure through two complementary Signal Detection Theory-based model setups: First, we tested each auditory-perceptual acuity measure separately to establish whether pitch acuity, pause acuity, and final lengthening acuity each determine boundary prediction performance. Second, we modeled all three auditory-perceptual acuities simultaneously. This combined modeling approach allowed us to determine whether any single prosodic boundary cue stands out as particularly important for boundary prediction, thus testing whether one auditory-perceptual acuity provides predictive power beyond what the others explain. If an auditory-perceptual acuity effect still remains after the other auditory-perceptual acuities have been statistically controlled for, this would indicate that the specific perceptual ability in question plays a particularly important role in prosodic phrasing. Conversely, if effects attenuate in the combined model, this would indicate that all three auditory-perceptual acuity measures share predictive variance.</p></sec></sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and methods</title>
<p>This study was preregistered (<ext-link ext-link-type="uri" xlink:href="https://osf.io/dgu7v">https://osf.io/dgu7v</ext-link>).</p>
<sec>
<title>General procedure</title>
<p>Participants were tested individually in a sound-attenuated booth. The written name sequences and response choice images were presented on a 1080 &#x000D7; 1920 pixel monitor, with keyboard input used to record responses. Auditory stimuli were presented via a Beyerdynamic DT-297 headset (80 Ohm headphones), connected to a Focusrite Scarlett 18i8 audio interface. All experimental procedures were controlled by custom Python 3.8 scripts (PyCharm, Windows 10).</p></sec>
<sec>
<title>Gating task</title>
<sec>
<title>Stimuli</title>
<p>The stimulus set (adapted from <xref ref-type="bibr" rid="B27">Hansen et al., 2023</xref>) comprised six coordinate three-name sequences, each containing three disyllabic German names connected by &#x0201C;und&#x0201D; (&#x0201C;and&#x0201D;). Within each sequence, the first two names consistently ended with an /i/ sound (e.g., Moni, Lilli, Leni, Nelli, Mimmi, Manni), while the third name ended in either /u/ or /a/ (e.g., Manu, Nina, Lola). Each sequence was produced under two prosodic stimulus conditions: (1) A <italic>grouped</italic> stimulus condition featuring a prosodic boundary after the second name (<italic>[Name1 and Name2] and Name3</italic>), and (2) an <italic>ungrouped</italic> stimulus condition without such a boundary (<italic>Name1 and Name2 and Name3</italic>).</p>
<p>The name sequences were derived from audio recordings produced in a prior study by <xref ref-type="bibr" rid="B32">Huttenlauch et al. (2021)</xref>, by four female speakers (mean age = 24 years, SD = 4.24, range: 21&#x02013;30 years). These recordings were selected based on high perceptual congruence between intended and perceived stimulus conditions (&#x02265;98%), as determined through a perception check where na&#x000EF;ve listeners categorized each recording from the complete production corpus as <italic>grouped</italic> or <italic>ungrouped</italic>.</p>
<p>Each name sequence recording was segmented into seven temporal gates of increasing duration, revealing progressively more prosodic information:</p>
<list list-type="bullet">
<list-item><p>Gate 1: <italic>Name1</italic> &#x02013; 1st syllable only (e.g., <italic>"Mo"</italic>)</p></list-item>
<list-item><p>Gate 2: <italic>Name1</italic> complete (e.g., &#x0201C;<italic>Moni&#x0201D;</italic>)</p></list-item>
<list-item><p>Gate 3: <italic>Name1</italic> &#x0002B; conjunction1 (e.g., &#x0201C;<italic>Moni und&#x0201D;</italic>)</p></list-item>
<list-item><p>Gate 4: <italic>Name1</italic> &#x0002B; conjunction1 &#x0002B; <italic>Name2</italic> &#x02014; 1st syllable (e.g., &#x0201C;<italic>Moni und Li&#x0201D;</italic>)</p></list-item>
<list-item><p>Gate 5: <italic>Name1</italic> &#x0002B; conjunction1 &#x0002B; <italic>Name2</italic> complete (e.g., &#x0201C;<italic>Moni und Lilli&#x0201D;</italic>)</p></list-item>
<list-item><p>Gate 6: <italic>Name1</italic> &#x0002B; conjunction1 &#x0002B; <italic>Name2</italic> &#x0002B; conjunction2 (e.g., <italic>"Moni und Lilli und"</italic>)</p></list-item>
<list-item><p>Gate 7: Complete sequence (e.g., &#x0201C;<italic>Moni und Lilli und Manu&#x0201D;</italic>)</p></list-item>
</list>
<p>Our approach deviates from <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref>, who employed all seven gates: We excluded Gate 1 because <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> found that performance at this gate was unstable. Some participants who scored above chance at Gate 1 dropped back to chance level at Gate 2, indicating that the single first syllable carries insufficient and unreliable prosodic information. We also excluded Gate 6 because it provided equivalent prosodic boundary cue information to Gate 7 (the full prosodic context), making it redundant for our analyses. Note that prosodic boundary cues in the <italic>grouped</italic> stimulus condition are maximal at/after the second name (see <xref ref-type="bibr" rid="B32">Huttenlauch et al., 2021</xref>).</p>
<p>Our experimental design yielded a total of 240 stimuli (4 speakers &#x000D7; 2 stimulus conditions &#x000D7; 5 gates &#x000D7; 6 name sequences).</p>
<sec>
<title>Prosodic boundary cue acoustics</title>
<p>Cue measurements and extraction: Following <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref>, we extracted prosodic boundary cue measurements from the <xref ref-type="bibr" rid="B32">Huttenlauch et al. (2021)</xref> recordings at two temporal locations: <italic>Name1</italic> (where early boundary-related cues emerge) and at or immediately following <italic>Name2</italic> (where late boundary cues accumulate). Measurements were obtained using Praat (<xref ref-type="bibr" rid="B3">Boersma and Weenink, 1992-2020</xref>) and included: (i) Pitch range, defined as the difference between f0 minimum and maximum across the first and second syllable (in semitones), measured separately for <italic>Name1</italic> and <italic>Name2</italic>; (ii) pause duration, defined as the duration of any silent interval following <italic>Name1</italic> or <italic>Name2</italic>, relative to total utterance duration (in percent); and (iii) final lengthening, defined as the duration of the final vowel relative to total name duration (in percent), also measured separately for <italic>Name1</italic> and <italic>Name2</italic>.</p>
<p>In <italic>grouped</italic> productions, prosodic boundary cues on <italic>Name1</italic> are expected to be attenuated due to proximity within the first prosodic group, whereas prosodic boundary cues on <italic>Name2</italic> are expected to be enhanced, marking the major prosodic boundary. Because prosodic boundary cues unfold over time, their alignment with specific gates is necessarily approximate: Pitch range and final lengthening are expressed most clearly at Gates 2 and 5, whereas pause perception requires the onset of subsequent speech material (Gates 3 and 7).</p>
<p><xref ref-type="fig" rid="F1">Figure 1</xref> presents the distribution of prosodic boundary cue strength for <italic>Name1</italic> and <italic>Name2</italic> separately for <italic>grouped</italic> and <italic>ungrouped</italic> stimulus conditions. Pitch range on <italic>Name1</italic> shows acoustic differentiation with modest distributional overlap between stimulus conditions, with <italic>grouped</italic> stimuli showing reduced pitch range relative to <italic>ungrouped</italic> stimuli (&#x00394; = &#x02212;3.32 st, lnBF<sub>10</sub> = 8.30, strong evidence). Final lengthening on <italic>Name1</italic> shows no consistent stimulus condition differences (&#x00394; = &#x02212;2.37 %, lnBF<sub>10</sub> = 0.63, inconclusive evidence, numerically favoring H1), and pauses after <italic>Name1</italic> were virtually absent in both stimulus conditions (&#x00394; = &#x02212;0.39 %, lnBF<sub>10</sub> = &#x02212;0.87, inconclusive evidence, numerically favoring H0). In contrast, prosodic boundary cues were more distinct on <italic>Name2</italic>. Pitch range showed a clear separation between stimulus conditions (&#x00394; = 5.37 st, lnBF<sub>10</sub> = 26.0, strong evidence), and pauses displayed categorical-like distributions: Minimal pausing in the <italic>ungrouped</italic> stimulus condition vs. substantial pauses in the <italic>grouped</italic> stimulus condition (&#x00394; = 16.1 %, lnBF<sub>10</sub> = 22.7, strong evidence). Final lengthening showed moderate stimulus condition differences (&#x00394; = 9.12 %, lnBF<sub>10</sub> = 8.51, strong evidence), though less consistently than pitch and pause cues. Overall, the pattern is consistent with stronger boundary marking at/after <italic>Name2</italic> in line with (<xref ref-type="bibr" rid="B36">Kentner and F&#x000E9;ry 2013</xref>). The stimulus condition differences at the different name positions were evaluated using Bayesian paired <italic>t</italic>-tests (<xref ref-type="bibr" rid="B49">Rouder et al., 2009</xref>), with grouped and ungrouped tokens paired by speaker and name sequence (<italic>n</italic> = 24 pairs per cue). lnBF<sub>10</sub> denotes the natural-logged Bayes Factor quantifying the strength of evidence for stimulus condition differences (values &#x0003E; 1 supporting a difference and values &#x0003C;-1 supporting the null; see the Statistical modeling section for interpretation thresholds), whereas the corresponding &#x00394; values index the magnitude of those differences.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Distributions show pitch range (semitones), pause duration (%), and final lengthening (%) measured in our gating task stimuli. These stimuli were drawn from the <xref ref-type="bibr" rid="B32">Huttenlauch et al. (2021)</xref> production study and selected for their high perceptual congruence between intended and perceived stimulus conditions across four speakers. Pitch range and final lengthening are measured at Name1 and Name2 positions; pause duration is measured after Name2. Grouped stimulus conditions are shown in yellow, ungrouped stimulus conditions in green. Dashed lines indicate the zero baseline; negative pitch range values reflect pitch falls. Violin plots show probability densities, boxplots show each median and quartiles.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="flang-05-1763160-g0001.tif">
<alt-text content-type="machine-generated">Violin plots showing the acoustic realization of three prosodic boundary cues in the gating task stimuli. Panels display pitch range, pause duration, and final lengthening, measured separately at Name1 and Name2 positions. Each panel compares grouped and ungrouped stimulus conditions. Grouped stimuli show reduced pitch range at Name1 and clearly increased pitch range, pause duration, and final lengthening at Name2, whereas ungrouped stimuli show weaker boundary marking. Pause durations after Name1 are near zero in both stimulus conditions. Violin shapes depict distribution density, boxplots indicate medians and quartiles, and dashed lines mark zero baselines. Colors distinguish grouped and ungrouped conditions.</alt-text>
</graphic>
</fig>
</sec></sec>
<sec>
<title>Procedure</title>
<p>Participants completed a binary decision gating task (originally introduced by <xref ref-type="bibr" rid="B26">Grosjean, 1980</xref>) judging whether each gated stimulus snippet corresponded to the <italic>grouped</italic> or <italic>ungrouped</italic> stimulus condition (see <xref ref-type="bibr" rid="B27">Hansen et al., 2023</xref>). Participants received written instructions on screen explaining that they would hear audio snippets of name sequences (e.g., only &#x0201C;<italic>Mimmi&#x0201D;</italic> or &#x0201C;<italic>Mimmi und Mo&#x0201D;</italic>). After each audio snippet, two pictograms appeared on screen, each symbolizing one of the two possible grouping structures. Participants&#x00027; task was to decide which pictogram best matched the grouping structure conveyed by the snippet they just heard, and to indicate their choice by pressing the corresponding arrow key (left or right) on the keyboard.</p>
<p><bold>Trial structure</bold>: Each trial began with a fixation cross (1 s) at screen center, followed by the presentation of a single gated audio snippet over headphones. After a 500 ms delay, two response pictograms appeared on screen, symbolizing the two possible grouping structures. One pictogram depicted two stick figures positioned close together with a third spatially separated, representing the <italic>grouped</italic> stimulus condition (two arriving together, one arriving separately). The other pictogram showed three equidistant stick figures without spatial separation, representing the <italic>ungrouped</italic> stimulus condition (all three arrive together). Arrows below each pictogram (pointing left/right) indicated the corresponding keyboard response. Participants responded using the designated keyboard keys after the audio presentation. The condition-key-mapping (<italic>grouped</italic>-left / <italic>ungrouped</italic>-right vs. <italic>grouped</italic>-right / <italic>ungrouped</italic>-left) was counterbalanced between participants but fixed within each participant. The visual response pictograms remained on screen until participants responded.</p>
<p><bold>Experimental procedure</bold>: The task began with a practice block of 10 randomized trials with visually presented accuracy feedback. Practice stimuli were gated snippets from another female speaker not included in the main task.</p>
<p>The main task consisted of four speaker blocks (60 trials per block), each containing recordings from a single speaker. The order of these blocks was randomized across participants. Within each block, all Gate 2&#x02013;5 stimuli for the respective speaker (<italic>N</italic> = 48) were presented in a randomized order first, followed by all Gate 7 stimuli (<italic>N</italic> = 12) also in random order. This design deviates from <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref>, who used a fixed ascending gate order from Gate 1 to Gate 7 for each trial. In the present paper, we randomized the Gates 2&#x02013;5 presentation to eliminate a strategic waiting behavior while maintaining decision difficulty, with Gate 7 serving as a fully informative baseline. Participants could take a short break between blocks. Each block lasted approximately 4 min.</p></sec></sec>
<sec>
<title>Just-Noticeable-Difference (JND) task</title>
<p>The JND tasks reported here follow the same procedure described in Hofmann et al. (Submitted).</p>
<sec>
<title>Stimuli</title>
<p>We developed three separate stimulus continua to measure participants&#x00027; auditory-perceptual acuity thresholds for the three prosodic boundary cues typically used in German for prosodic boundary marking: Pitch rise, pause duration, and final lengthening. Importantly, although these prosodic boundary cues originate from prosodic boundary contexts, this perception task did not assess boundary perception but rather individual auditory-perceptual acuity to the underlying acoustic cue strength.</p>
<p>The base stimuli for the continua were derived from original recordings used in a perception experiment by <xref ref-type="bibr" rid="B17">de Beer et al. (2022)</xref>, in which a phonetically trained female speaker produced coordinate three-name sequences with varied prosodic boundary realizations. From these recordings, we selected the tokens exhibiting maximal prosodic boundary cue expression, meaning those tokens where the acoustic realization on <italic>Name2</italic> (for pitch and final lengthening) or immediately following <italic>Name2</italic> (for pause) showed the strongest manifestation of the relevant prosodic boundary cue. Specifically, we selected the instance where (a) the pitch rise between the stressed and unstressed syllable of <italic>Name2</italic> showed the largest excursion, (b) a clear and extended silent interval followed <italic>Name2</italic>, or (c) the final segment of <italic>Name2</italic> was maximally lengthened.</p>
<p>These maximally realized prosodic boundary cue segments were extracted and used as base stimuli for constructing three JND continua using custom Praat scripts (<xref ref-type="bibr" rid="B3">Boersma and Weenink, 1992-2020</xref>). Each continuum spanned from the maximally expressed prosodic boundary cue to a minimally expressed prosodic boundary cue. Thus, the starting point of each continuum represented the original, clearly perceivable prosodic boundary cue (base stimulus), while the end point of each continuum (reference stimulus) represented an acoustically neutral version with minimal or no prosodic boundary cue expression. The intermediate steps formed comparison stimuli with systematically decreasing prosodic boundary cue strength.</p>
<p><bold>Pitch rise continuum</bold>: The base stimulus was <italic>Name2</italic> &#x02014; <italic>Nelli</italic>, produced with a pitch rise of 13 semitones. The pitch contour was progressively flattened from 13 to 0 semitones in 0.005-semitone increments, yielding a flat, non-rising contour as the reference stimulus.</p>
<p><bold>Pause duration continuum</bold>: The base stimulus was the coordinate phrase <italic>Name2 und Name3</italic> &#x02014; &#x0201C;<italic>Moni [PAUSE] und Lilli&#x0201D;</italic>, containing a 550 ms silent interval after <italic>Moni</italic>. The silence duration was shortened from 550 ms to 0 ms in 1 ms increments, ending with a reference stimulus without a pause.</p>
<p><bold>Final lengthening continuum</bold>: The base stimulus was <italic>Name2</italic> &#x02014; <italic>Mimmi</italic>, produced with a final vowel duration of 225 ms (approximately half of the total word duration). The final segment was progressively shortened in 0.3 ms increments until reaching 61 ms, resulting in roughly equal syllable durations (no perceivable final lengthening) as the reference stimulus.</p></sec>
<sec>
<title>Procedure</title>
<p>Auditory-perceptual acuity thresholds were measured using an AXB oddball discrimination task with an adaptive staircase procedure (based on <xref ref-type="bibr" rid="B51">Smith et al., 2020</xref>). Each participant completed three separate JND tasks, one per prosodic boundary cue (pitch rise, pause duration, final lengthening), administered in randomized order. Each task began with a short practice block with visual accuracy feedback until participants achieved four consecutive correct responses. No feedback was provided during experimental trials.</p>
<p><bold>Trial structure</bold>: In each trial, three auditory stimuli were presented in an AXB sequence (either AAB or ABB) with 500ms inter-stimulus intervals. On every trial, participants heard two tokens: A reference stimulus with minimal (or no) prosodic boundary cue expression (0 semitones pitch rise, 0 ms pause, 61 ms final lengthening) and a comparison stimulus with a detectable prosodic boundary cue strength (from the respective prosodic boundary cue continua). The acoustic difference between these two stimuli&#x02014;which we term the <italic>cue difference</italic>&#x02014;determined how easy or difficult discrimination was on that trial. Large <italic>cue differences</italic> made discrimination straightforward, while small <italic>cue differences</italic> approached the limits of perceptual discriminability.</p>
<p>The order of reference and comparison tokens was randomized, resulting in trials presenting either two identical reference stimuli and one comparison, or two identical comparison stimuli and one reference. Participants identified the odd-one-out by pressing the left arrow key (for ABB patterns) or right arrow key (for AAB patterns). Visual response prompts remained on screen until participants responded, showing corresponding arrow icons and schematic stimulus patterns (with &#x0201C;A&#x0201D; boxes in green, &#x0201C;B&#x0201D; boxes in black).</p>
<p><bold>Adaptive procedure</bold>: The adaptive staircase dynamically adjusted the <italic>cue difference</italic> presented on each trial to converge on each participant&#x00027;s discrimination threshold - the smallest acoustic difference they could reliably detect. Each staircase began with the maximum <italic>cue difference</italic> to ensure initial success: 13 semitones for pitch rise, 550ms for pause duration, and 164ms for final lengthening. After each trial, the <italic>cue difference</italic> for the subsequent trial was adjusted based on performance: Correct responses decreased the <italic>cue difference</italic> (making the next trial harder by bringing the comparison stimulus closer to the reference stimulus), while incorrect responses increased it (making the next trial easier by moving the comparison farther from the reference).</p>
<p>The magnitude of these adjustments, i.e., the step size, also changed dynamically throughout each staircase. Initial step sizes were large to allow rapid descent from the maximum starting difference toward the participant&#x00027;s approximate threshold region, then progressively decreased to enable precise threshold estimation: From 0.75 to 0.005 semitones for pitch rise, from 30 to 1 ms for pause duration, and from 7.5 to 0.3 ms for final lengthening. This combination of large initial steps (for efficiency) and small later steps (for precision) produced accurate threshold estimates within a reasonable number of trials.</p>
<p>The staircase initially followed a 1-down-1-up adjustment rule, where each correct response decreased the <italic>cue difference</italic> and each incorrect response (would have) increased it, allowing rapid initial convergence. After the first error, the procedure switched to a 2-down-1-up rule requiring 2 consecutive correct responses to decrease the <italic>cue difference</italic> but only 1 incorrect response to increase it. This asymmetric rule converges on a performance level of approximately 71% accuracy (<xref ref-type="bibr" rid="B40">Levitt, 1971</xref>), which provides a stable estimate of the smallest reliably detectable <italic>cue difference</italic> while avoiding ceiling or floor effects. A reversal occurred when the adjustment direction changed from decreasing to increasing <italic>cue difference</italic> or vice versa, indicating that the staircase had crossed the participant&#x00027;s threshold. The task stopped after either 120 trials or 18 reversals, whichever came first. JND threshold calculation was based on the <italic>cue differences</italic> at reversal points.</p></sec>
<sec>
<title>Data pre-processing</title>
<p>The JND threshold for each prosodic boundary cue was calculated as the mean of the six most stable consecutive reversal points, defined as the set of six consecutive reversals exhibiting the lowest standard deviation (<xref ref-type="bibr" rid="B6">Brunner et al., 2011</xref>; <xref ref-type="bibr" rid="B45">Oschkinat et al., 2022</xref>). JND thresholds represent the smallest acoustic difference a listener can discriminate; thus, lower thresholds indicate better perceptual ability. However, to facilitate interpretation throughout the analyses and to align with the intuitive meaning of &#x0201C;acuity&#x0201D; (where higher values indicate better ability), JND values were <italic>z</italic>-scored and direction-reversed such that higher auditory-perceptual acuity scores correspond to better perceptual sensitivity. These transformed thresholds are referred to throughout as pitch acuity, pause acuity, and final lengthening acuity, representing each participant&#x00027;s <italic>z</italic>-scored auditory-perceptual acuity for the respective prosodic boundary cue. <xref ref-type="fig" rid="F2">Figure 2</xref> displays the distributions of the <italic>z</italic>-scored JND thresholds for each prosodic boundary cue.</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Distribution of auditory-perceptual acuity measures for each prosodic boundary cue. Panels show <italic>z</italic>-scored JND thresholds for pitch rise, pause duration, and final lengthening. Only data from analyzed participants are shown (<italic>n</italic> = 57 pitch, n = 58 pause, <italic>n</italic> = 60 final lengthening).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="flang-05-1763160-g0002.tif">
<alt-text content-type="machine-generated">Violin plots showing distributions of auditory-perceptual acuity scores derived from z-scored and direction-reversed just-noticeable-difference thresholds. Separate panels represent pitch rise acuity, pause duration acuity, and final lengthening acuity. Higher values indicate better perceptual sensitivity. Each plot displays individual data points, distribution density, medians, and interquartile ranges. The distributions reveal substantial individual variability across all three auditory-perceptual acuity measures, with partially overlapping ranges but different central tendencies. Only data from participants included in the final analyses are shown, with sample sizes differing slightly across the three acuity measures.</alt-text>
</graphic>
</fig>
<p><xref ref-type="fig" rid="F3">Figure 3</xref> visualizes individual auditory-perceptual acuities for the tested prosodic boundary cue continua. Each column represents one participant (ordered left-to-right by overall mean acuity), and each row represents a prosodic boundary cue (pitch rise, pause duration, final lengthening). The different color gradients reflect standardized auditory-perceptual acuity scores relative to the sample mean (warm = higher than mean; cool = lower than mean). The figure reveals substantial variability in auditory-perceptual profiles, both across participants and across prosodic boundary cues. Some participants show consistently elevated (or reduced) sensitivity across all auditory-perceptual acuity measures (uniform color patterns within a column), whereas others exhibit selective strengths for specific prosodic boundary cues, that is, relatively higher auditory-perceptual acuity for some prosodic boundary cues but not others (mixed colors within a column).</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Individual differences in auditory-perceptual acuity across prosodic boundary cues. Each column corresponds to one participant (ordered by their mean acuity across cues), and each row corresponds to one prosodic boundary cue type (pitch rise, pause duration, final lengthening). Warm colors indicate higher auditory-perceptual acuity relative to the sample mean, cold colors indicate lower auditory-perceptual acuity, and light yellow represents values near the sample mean. Tiles marked with red crosses indicate missing auditory-perceptual acuity data for that participant-cue combination, reflecting task-specific exclusions from the JND analysis (<italic>n</italic> = 3 excluded for pitch rise, <italic>n</italic> = 2 for pause duration; see Participant exclusion criteria).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="flang-05-1763160-g0003.tif">
<alt-text content-type="machine-generated">Heatmap showing individual auditory-perceptual acuity profiles across three prosodic boundary cues. Columns represent participants ordered from lower to higher mean acuity, and rows represent pitch rise, pause duration, and final lengthening acuity. Color shading reflects z-scored auditory-perceptual acuity values, with cooler colors indicating lower acuity and warmer colors indicating higher acuity relative to the sample mean. Light yellow indicates values near the mean. Red crosses mark missing acuity values resulting from task-specific exclusions. The figure illustrates both shared acuity patterns across cues for some participants and cue-specific strengths or weaknesses for others.</alt-text>
</graphic>
</fig>
<p>As is visible in <xref ref-type="fig" rid="F3">Figure 3</xref>, the heatmap reveals both inter- and intra-individual patterns in auditory-perceptual acuity. Many participants display relatively consistent color patterns across prosodic boundary cues, suggesting shared sensitivity across prosodic boundary cues, while others show notable variation, indicating cue-specific strengths and weaknesses. This pattern is reflected in the moderate correlations between auditory-perceptual acuity measures: Pitch and pause acuity correlated at <italic>r</italic> = 0.52 [95% CI [0.3, 0.69], <italic>n</italic> = 56], pitch and final lengthening acuity at <italic>r</italic> = 0.30 [95% CI [0.04, 0.52], <italic>n</italic> = 57], and pause and final lengthening acuity at <italic>r</italic> = 0.36 [95% CI [0.12, 0.59], <italic>n</italic> = 58]. These correlations indicate overlapping but not identical perceptual sensitivities. Correlations were computed using Pearson&#x00027;s product-moment correlation between participants&#x00027; <italic>z</italic>-scored JND thresholds (auditory-perceptual acuity) for each cue, based on pairwise complete observations (<italic>n</italic> = 56&#x02013;58 per cue pair). Participants with higher auditory-perceptual acuity in one prosodic boundary cue tend to show higher auditory-perceptual acuity for the others and vice versa, but substantial individual variation remains, with some participants showing markedly different auditory-perceptual acuity profiles across the three acoustic dimensions.</p></sec></sec>
<sec>
<title>Participants</title>
<p>Sixty native German speakers (48 females) participated in the study, with a mean age of 24.78 years (SD = 6.07, range: 18&#x02013;49). Participants had no reported history of speech or language disorders, hearing impairments, or neurological or psychological conditions. They received either monetary reimbursement or course credit for completing two experimental sessions (approximately 2 h each), scheduled on the same or separate days based on availability. Of the two tasks reported here, the JND task was conducted in the first session, while the second session began with the gating task. The study was conducted in accordance with the Declaration of Helsinki and approved by the University of Potsdam Ethics Committee (approval code: 99/2020). Informed consent was obtained from all participants.</p>
<sec>
<title>Participant exclusion criteria</title>
<p><bold>Gating task</bold>: No participants were excluded. Following <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref>, we evaluated participant performance using three criteria: (a) Above chance accuracy at Gate 7 (&#x0003E; 50%), (b) accuracy above the group mean minus 2SD at Gate 7, and (c) no systematic response patterns. All participants met criteria (a) and (c). However, we did not apply criterion (b). <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> used the group mean minus 2SD threshold to identify participants with unusually low performance, potentially indicating poor task compliance. This criterion was not suitable for our randomized-gate design, which was inherently more difficult than their ascending design. Applying it would have removed four participants with high accuracy (85&#x02013;88%) who clearly demonstrated task compliance.</p>
<p><bold>JND task</bold>: No participants were excluded based on the pre-registered criterion for adaptive staircase performance (<xref ref-type="bibr" rid="B45">Oschkinat et al., 2022</xref>), which required JND thresholds to decrease below 70% of the initial <italic>cue difference</italic>. However, following data inspection, some participants exhibited extreme JND values for specific prosodic boundary cues (Hofmann et al., Submitted). We thus applied <italic>post-hoc</italic> exclusion criteria using the interquartile range (IQR) rule, removing participants whose JND scores fell below Q1 &#x02013; 2 &#x000D7; IQR or above Q3 &#x0002B; 2 &#x000D7; IQR for each prosodic boundary cue separately. This led to the exclusion of three participants from the JND pitch rise task, leaving <italic>N</italic> = 57, and two from the JND pause duration task, leaving <italic>N</italic> = 58. No exclusions were made for the JND final lengthening task (<italic>N</italic> = 60).</p></sec></sec></sec>
<sec id="s3">
<title>Statistical modeling</title>
<p>We analyzed binary responses (<italic>grouped</italic> vs. <italic>ungrouped</italic>) using Bayesian generalized mixed-effects regression within a Signal Detection Theory framework (<xref ref-type="bibr" rid="B64">Zloteanu and Vuorre, 2024</xref>), fitted with the <italic>brms</italic> package and the Stan programming language (<xref ref-type="bibr" rid="B7">Buerkner, 2018</xref>; <xref ref-type="bibr" rid="B53">Stan Development Team, 2020</xref>). Responses were modeled using a Bernoulli family with a probit link function. The outcome variable (response) was factor-coded with <italic>ungrouped</italic> as the reference level, such that the model estimated the probability of a <italic>grouped</italic> response. The probit link transforms predicted probabilities into <italic>z</italic>-scores on the standard normal distribution. On this scale, a coefficient of 1 represents 1 standard deviation (SD) shift in the underlying decision variable, corresponding to a higher or lower probability of responding <italic>grouped</italic>. This transformation allows model coefficients to be interpreted as the extent to which each predictor shifts a participant&#x00027;s internal decision tendency toward or away from responding <italic>grouped</italic>.</p>
<p>To address our primary hypothesis, we fitted four Bayesian Signal Detection Theory probit regression models: Three separate models, each including one auditory-perceptual acuity (pitch, pause, or final lengthening acuity) as a predictor, and one combined model including all three auditory-perceptual acuities simultaneously. The fixed-effects structure for each model comprised gate (levels 2, 3, 4, 5, 7), stimulus condition (<italic>grouped</italic> vs. <italic>ungrouped</italic>), the relevant auditory-perceptual acuity measure(s), and all two-way and three-way interactions among these predictors. The combined model included the same structure but with terms for pitch acuity, pause acuity, and final lengthening acuity (as well as their interactions with gate and stimulus condition), allowing us to estimate the unique contribution of each auditory-perceptual acuity while accounting for shared variance among them (see <xref ref-type="bibr" rid="B58">Ver&#x000ED;ssimo, 2023</xref>; <xref ref-type="bibr" rid="B62">Wurm and Fisicaro, 2014</xref>).</p>
<p>Stimulus condition was sum-coded (<italic>grouped</italic> = &#x0002B;0.5, <italic>ungrouped</italic> = &#x02013;0.5), such that the stimulus condition coefficient directly reflected discriminability (how well participants distinguished between the two stimulus conditions). Interactions involving stimulus condition (e.g., gate &#x000D7; stimulus condition and auditory-perceptual acuity &#x000D7; stimulus condition) represent changes in discrimination as a function of accumulated prosodic information or individual auditory-perceptual acuity. In contrast, model terms that do not involve stimulus condition capture response bias, or more concretely, participants&#x00027; tendency to respond <italic>grouped</italic> or <italic>ungrouped</italic> independent of the actual stimulus condition. Specifically, the intercept reflects overall response bias, while gate and auditory-perceptual acuity main effects represent how this bias changes across gates or with auditory-perceptual acuity level. The factor gate (levels 2, 3, 4, 5, 7) was coded using (centered) sliding-difference contrasts (Gate 3&#x02013;2, Gate 4&#x02013;3, Gate 5&#x02013;4, Gate 7&#x02013;5), quantifying how discrimination and bias change as additional acoustic information becomes available. Since auditory-perceptual acuity measures were <italic>z</italic>-scored (mean = 0, SD = 1), coefficients represent effects associated with a 1SD difference in auditory-perceptual acuity.</p>
<p>Following (<xref ref-type="bibr" rid="B1">Barr et al. (2013)</xref>, we aimed for maximal random-effects structures but constrained complexity to ensure model identifiability given the available data (<xref ref-type="bibr" rid="B2">Bates et al., 2015</xref>). The final models included random intercepts for subjects, items, and speakers, random slopes for gate and stimulus condition by subject, and random slopes for gate, stimulus condition, and the relevant auditory-perceptual acuity by item, with correlated random effects. Speaker was included only as a random intercept (not as a random-slope term), since variance estimates become unreliable with fewer than five grouping levels and we only had four speakers (<xref ref-type="bibr" rid="B4">Bolker, 2015</xref>).</p>
<p>Model results reported in the main text are based on weakly-informative priors, with normal distributions centered at zero and standard deviations varying by parameter type (see <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref> for specific values). These priors were chosen to rule out implausible extremes while allowing large effects in either direction, following current recommendations (see <xref ref-type="bibr" rid="B23">Gelman et al., 2008</xref>; <xref ref-type="bibr" rid="B24">Ghosh et al., 2018</xref>; <xref ref-type="bibr" rid="B44">McElreath, 2020</xref>; <xref ref-type="bibr" rid="B57">Vasishth et al., 2018</xref>). Prior predictive checks confirmed that these choices produced reasonable data-level predictions. The detailed prior specifications for all models are provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>.</p>
<p>Hypothesis testing was performed using Bayes Factors, which quantify evidence strength for an effect by comparing the model (i.e., the alternative hypothesis, H1) including the effect to a null model (i.e., the null hypothesis, H0), where the effect is excluded. Since Bayes Factors are sensitive to prior specifications, we conducted a sensitivity analysis with five different prior configurations, ranging from narrower (moderate and strong informative) to wider (moderate wide and wide) settings compared to our default weakly-informative priors. We calculated natural-logged Bayes Factors (lnBF<sub>10</sub>) using the Savage-Dickey method (<xref ref-type="bibr" rid="B18">Dickey and Lientz, 1970</xref>; <xref ref-type="bibr" rid="B60">Wagenmakers et al., 2010</xref>), where values &#x0003E; 1 indicate evidence for H1 (the respective effect), values &#x0003C;&#x02013;1 support H0 (absence of the effect), values between &#x02013;1 and 1 are inconclusive, and values &#x0003E; 3 represent strong evidence for H1 (<xref ref-type="bibr" rid="B33">Jeffreys, 1991</xref>; <xref ref-type="bibr" rid="B35">Kass and Raftery, 1995</xref>; <xref ref-type="bibr" rid="B59">Ver&#x000ED;ssimo, 2025</xref>). Effects are considered reliable when they meet these thresholds (lnBF<sub>10</sub> &#x0003E; 1 or &#x0003E; 3) in the base model and show consistent direction and magnitude across prior-sensitivity analyses.</p>
<p>Model convergence was judged by <inline-formula><mml:math id="M1"><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover><mml:mo>&#x02264;</mml:mo><mml:mn>1</mml:mn><mml:mo>.</mml:mo><mml:mn>01</mml:mn></mml:mrow></mml:math></inline-formula>, adequate effective sample sizes, and stable trace plots. Finally, model quality was assessed with posterior predictive checks by comparing model predictions to the observed data distribution.</p></sec>
<sec sec-type="results" id="s4">
<title>Results</title>
<p>Below, we report estimates from the three separate auditory-perceptual acuity models (pitch acuity, pause acuity, final lengthening acuity), unless otherwise noted. The complete results for both, the separate models and the combined model, can be found in <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>. Since the auditory-perceptual acuity measures are uncorrelated with our experimental factors (stimulus condition, gate), estimates for effects not involving auditory-perceptual acuity are nearly identical across specifications (see fixed effects tables in <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>). For effects involving auditory-perceptual acuity, we report both, separate and combined model estimates to reveal how auditory-perceptual acuity effects change when modeled independently vs. simultaneously (i.e., when pitch acuity, pause acuity, and final lengthening acuity are simultaneously included as predictors in the same statistical model).</p>
<sec>
<title>Overall discriminability</title>
<p>Participants demonstrated strong discriminability between <italic>grouped</italic> and <italic>ungrouped</italic> stimulus conditions. Across all three separate auditory-perceptual acuity models, the main effect of stimulus condition was large and consistent (pitch acuity model: <italic>b</italic> = 1.93 probit units, 95% CI [1.54, 2.28], lnBF<sub>10</sub> = 37.07; pause acuity model: <italic>b</italic> = 1.92, 95% CI [1.54, 2.25], lnBF<sub>10</sub> = 40.45; final lengthening acuity model: <italic>b</italic> = 1.87, 95% CI [1.51, 2.20], lnBF<sub>10</sub> = 42.10). These estimates represent performance averaged across gates and evaluated at mean auditory-perceptual acuity, reflecting the mean-centering of the auditory-perceptual acuity predictors. These results provide robust evidence that participants could reliably distinguish the two prosodic boundary stimulus conditions, confirming that the bottom-up acoustic differences between stimulus conditions were effective in eliciting differential perceptual responses and validating the experimental paradigm.</p></sec>
<sec>
<title>Discriminability changes from gate to gate</title>
<p><xref ref-type="fig" rid="F4">Figure 4</xref> presents the discriminability trajectory across gates, illustrating the gate &#x000D7; stimulus condition interaction. These effects capture how discriminability evolves with increasing gates, reflecting the general temporal pattern of discriminability improvement from one gate to the next, averaged across all participants regardless of individual differences in auditory-perceptual acuity. Since higher gates contain more acoustic information than lower gates, the interaction reveals the points in time at which critical prosodic information becomes available. Our analysis included Gates 2&#x02013;5 and Gate 7, excluding Gate 1 due to unstable performance patterns <xref ref-type="bibr" rid="B27">Hansen et al., 2023</xref>) and Gate 6 because it provided equivalent prosodic boundary cue information to Gate 7, making it redundant given that prosodic boundary cues in the <italic>grouped</italic> stimulus condition are maximal at/after the second name <xref ref-type="bibr" rid="B32">Huttenlauch et al., 2021</xref>).</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Discriminability (dprime) across gates, at mean auditory-perceptual acuity levels, showing progressive improvement in discriminating between stimulus conditions as more and more acoustic material becomes available. dprime quantifies how well listeners distinguish grouped from ungrouped stimuli at each gate independent of response bias, derived from the probit model&#x00027;s estimated group mean discriminability. Points with error bars show mean dprime and 95% credible intervals at each gate for pitch (blue circles), pause (yellow triangles), and final lengthening (red squares) auditory-perceptual acuity models. Dashed lines indicate trajectories across gates.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="flang-05-1763160-g0004.tif">
<alt-text content-type="machine-generated">Line plot showing discriminability between grouped and ungrouped stimulus conditions across gating positions, evaluated at mean auditory-perceptual acuity. The x-axis represents gates 2 to 7, thus from early to late gates, and the y-axis shows discriminability in probit units. Separate trajectories are shown for pitch, pause, and final lengthening models. Discriminability remains relatively stable at early gates and increases sharply when the full second name becomes available (at gate 5), with further improvement at the final gate 7 when pause information can be perceived. Points represent mean discriminability estimates, error bars indicate 95% credible intervals, and dashed lines connect gate-to-gate trajectories.</alt-text>
</graphic>
</fig>
<p>As can be seen in <xref ref-type="fig" rid="F4">Figure 4</xref>, starting from the initial name at Gate 2 (e.g., &#x0201C;<italic>Moni&#x0201D;</italic>), discriminability (dprime) remained relatively stable throughout the addition of the first conjunction at Gate 3 (e.g., &#x0201C;<italic>Moni und&#x0201D;</italic>) and the first syllable of the second name at Gate 4 (e.g., &#x0201C;<italic>Moni und Li&#x0201D;</italic>). A sharp increase in discriminability occurred when the complete second name became available at Gate 5 (e.g., &#x0201C;<italic>Moni und Lilli&#x0201D;</italic>), providing full access to pitch range and final lengthening cues after <italic>Name2</italic>. Discriminability improved further at Gate 7 (e.g., &#x0201C;<italic>Moni und Lilli und Manu&#x0201D;</italic>), when the full three-name coordinate structure was revealed and the pause after <italic>Name2</italic> became perceivable. This temporal trajectory emerged consistently across all three separate auditory-perceptual acuity models.</p>
<p>The gate-by-gate contrasts confirmed this pattern: Performance at Gate 3 vs. 2 showed small improvements in discriminability with inconclusive evidence for the effect: Pitch acuity model: <italic>b</italic> = 0.21, 95% CI [0.02, 0.39], lnBF<sub>10</sub> = 0.18; pause acuity model: <italic>b</italic> = 0.20, 95% CI [0.01, 0.39], lnBF<sub>10</sub> = &#x02013;0.04; final lengthening acuity model: <italic>b</italic> = 0.22, 95% CI [0.03, 0.41], lnBF<sub>10</sub> = 0.58. The discriminability improvement from Gate 4 vs. 3 was even smaller and non evident: Pitch acuity model: <italic>b</italic> = 0.03, 95% CI [&#x02013;0.17, 0.24], lnBF<sub>10</sub> = &#x02013;2.30; pause acuity model: <italic>b</italic> = 0.05, 95% CI [&#x02013;0.14, 0.25], lnBF<sub>10</sub> = &#x02013;2.22; final lengthening acuity model: <italic>b</italic> = 0.03, 95% CI [&#x02013;0.17, 0.22], lnBF<sub>10</sub> = &#x02013;2.37.</p>
<p>A critical shift, however, occurred when performance at Gate 5 was compared against performance at Gate 4: Pitch acuity model: <italic>b</italic> = 1.85, 95% CI [1.34, 2.30], lnBF<sub>10</sub> = 36.53; pause acuity model: <italic>b</italic> = 1.85, 95% CI [1.33, 2.30], lnBF<sub>10</sub> = 37.84; final lengthening acuity model: <italic>b</italic> = 1.83, 95% CI [1.31, 2.28], lnBF<sub>10</sub> = 36.80. This was followed by a continued improvement at Gate 7: Pitch acuity model: <italic>b</italic> = 1.66, 95% CI [1.14, 2.16], lnBF<sub>10</sub> = 9.32, pause acuity model: <italic>b</italic> = 1.57, 95% CI [1.09, 2.05], lnBF<sub>10</sub> = 8.41, final lengthening acuity model: <italic>b</italic> = 1.48, 95% CI [1.08, 1.88], lnBF<sub>10</sub> = 28.68.</p></sec>
<sec>
<title>Discriminability modulation by auditory-perceptual acuities</title>
<p><xref ref-type="fig" rid="F5">Figure 5</xref> displays the relationship between auditory-perceptual acuities and discriminability across the separate and combined models, illustrating the stimulus condition &#x000D7; auditory-perceptual acuity interactions. These effects captured whether individual differences in auditory-perceptual acuities explain variability in overall discriminability (i.e., across all gates). Specifically, they examined whether participants with higher auditory-perceptual acuity showed an enhanced ability to distinguish <italic>grouped</italic> from <italic>ungrouped</italic> stimuli based on the partial prosodic information available in the gated stimuli compared to participants with lower auditory-perceptual acuity. We examined these relationships using two complementary analytical strategies: Fitting separate models for each auditory-perceptual acuity measure (pitch, pause, final lengthening acuity), and fitting a combined model including all three auditory-perceptual acuities together. The separate models establish whether each auditory-perceptual acuity predicts boundary discrimination when examined in isolation. The combined model additionally reveals whether any auditory-perceptual acuity provides unique predictive power beyond the others, or whether effects attenuate due to shared variance among the auditory-perceptual acuity measures (see <xref ref-type="bibr" rid="B58">Ver&#x000ED;ssimo, 2023</xref>; <xref ref-type="bibr" rid="B62">Wurm and Fisicaro, 2014</xref>).</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Discriminability (dprime) as a function of auditory-perceptual acuity across separate and combined models. Each panel shows how discriminability relates to <bold>(A)</bold> pitch acuity, <bold>(B)</bold> pause acuity, and <bold>(C)</bold> final lengthening acuity, measured in probit units. Colored lines show separate models for each auditory-perceptual acuity type; gray lines show the combined model including all three auditory-perceptual acuities simultaneously. Shaded areas represent 95% credible intervals.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="flang-05-1763160-g0005.tif">
<alt-text content-type="machine-generated">Three line plots showing the relationship between auditory-perceptual acuity and discriminability across separate and combined statistical models. Panels display pitch acuity, pause acuity, and final lengthening acuity on the x-axis, with discriminability on the y-axis. Colored lines represent models in which each auditory-perceptual acuity measure is tested separately, while gray lines represent the combined model including all three acuities simultaneously. Shaded regions indicate 95% credible intervals. Discriminability increases with higher auditory-perceptual acuity in the separate models, but slopes are substantially reduced in the combined model, indicating shared predictive variance.</alt-text>
</graphic>
</fig>
<p>The colored regression lines in <xref ref-type="fig" rid="F5">Figure 5</xref> show the separate models; the gray regression lines show the combined model. When examined separately, all three auditory-perceptual acuities demonstrated clear positive relationships with discriminability, evidenced by the steeper colored slopes. The substantially flatter gray slopes reveal that when all auditory-perceptual acuities are accounted for together in the combined model, the individual effects attenuated. This pattern suggests that all three auditory-perceptual acuity measures share considerable predictive variance.</p>
<p><bold>Separate acuity models</bold>: When tested separately, both, pause acuity [<italic>b</italic> = 0.29, 95% CI [0.11, 0.47], lnBF<sub>10</sub> = 2.29] and final lengthening acuity [<italic>b</italic> = 0.25, 95% CI [0.07, 0.44], lnBF<sub>10</sub> = 1.21] showed evidence for effects on boundary discriminability, indicating that participants with better sensitivity to these temporal prosodic boundary cues demonstrated enhanced ability to distinguish <italic>grouped</italic> from <italic>ungrouped</italic> stimulus conditions. Pitch acuity showed a slightly more modest contribution [<italic>b</italic> = 0.22, 95% CI [0.03, 0.42], lnBF<sub>10</sub> = 0.22], with inconclusive evidence, suggesting that pitch sensitivity may be less central to this particular boundary detection task.</p>
<p><bold>Combined acuities model</bold>: When all three auditory-perceptual acuities were modeled simultaneously, each individual effect became substantially attenuated: Pause acuity dropped from <italic>b</italic> = 0.29 to <italic>b</italic> = 0.17 [95% CI [&#x02013;0.07, 0.40], lnBF<sub>10</sub> = &#x02013;1.12], final lengthening acuity dropped from <italic>b</italic> = 0.25 to <italic>b</italic> = 0.11 [95% CI [&#x02013;0.11, 0.33], lnBF<sub>10</sub> = &#x02013;1.69], and pitch acuity similarly dropped from <italic>b</italic> = 0.22 to <italic>b</italic> = 0.11 [95% CI [&#x02013;0.11, 0.33], lnBF<sub>10</sub> = &#x02013;1.74]. Evidence actually favored the null hypothesis for all three auditory-perceptual acuity types, suggesting that none of the three auditory-perceptual acuities provides a unique discriminative advantage when the others are accounted for.</p></sec>
<sec>
<title>Gate-specific discriminability modulation by auditory-perceptual acuities</title>
<p><xref ref-type="fig" rid="F6">Figure 6</xref> visualizes the relationship between auditory-perceptual acuity and discriminability across gates, illustrating the stimulus condition &#x000D7; auditory-perceptual acuity &#x000D7; gate interaction. These effects captured whether the relationship between auditory-perceptual acuity and boundary discriminability differed across gates as acoustic information accumulated. In particular, we examined whether auditory-perceptual acuity advantages were particularly pronounced at early gates where prosodic boundary cues are more subtle. As with the two-way interactions, we examined these relationships using separate models (testing each auditory-perceptual acuity individually) and a combined model (testing all three simultaneously) to determine whether any auditory-perceptual acuity shows gate-specific effects beyond the others.</p>
<fig position="float" id="F6">
<label>Figure 6</label>
<caption><p>Discriminability (dprime) by auditory-perceptual acuity level and gate position. <bold>(A&#x02013;C)</bold> show separate models (each auditory-perceptual acuity modeled independently); <bold>(D&#x02013;F)</bold> show the combined model (all auditory-perceptual acuities modeled simultaneously). Points represent median dprime with 95% credible intervals at three auditory-perceptual acuity levels, with darker colors indicating lower auditory-perceptual acuity (mininum, ca. &#x02013;2SD) and lighter colors indicating higher auditory-perceptual acuity (maximum, ca. &#x0002B;1SD). Dotted lines connect estimates across gates. Blue = pitch acuity, yellow = pause acuity, red = final lengthening acuity.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="flang-05-1763160-g0006.tif">
<alt-text content-type="machine-generated">Six line plots showing discriminability across gates as a function of auditory-perceptual acuity level. The top row displays separate models for pitch, pause, and final lengthening acuity, and the bottom row shows the corresponding combined model results. The x-axis represents gate number and the y-axis shows discriminability in probit units. Lines represent minimum, mean, and maximum auditory-perceptual acuity levels. Discriminability increases with later gates (gates 5 and 7) and higher acuity levels, but the separation between acuity levels remains relatively constant across all gates. Points indicate median estimates with 95% credible intervals, connected by dotted lines.</alt-text>
</graphic>
</fig>
<p>The trajectories in <xref ref-type="fig" rid="F6">Figure 6</xref> show that discriminability improves from early to later gates (consistent with the gate &#x000D7; stimulus condition interactions), but critically, the separation between higher and lower auditory-perceptual acuity levels remains consistent across all gates. Participants with higher auditory-perceptual acuity demonstrated better discrimination at every gate, rather than showing particularly strong advantages at early gates where prosodic boundary cues are subtle.</p>
<p><bold>Separate acuity models</bold>: All three-way interactions showed evidence against modulation across gates, with effect sizes ranging from -0.40 to 0.21 probit units and lnBF<sub>10</sub> values from &#x02013;2.40 to &#x02013;1.03. An exception occurred with final lengthening acuity at Gate 7 vs. 5, where evidence was inconclusive [<italic>b</italic> = &#x02013;0.24, 95% CI [&#x02013;0.60, 0.13], lnBF<sub>10</sub> = &#x02013;0.82], representing the largest three-way interaction observed in the separate models.</p>
<p><bold>Combined acuities model</bold>: The pattern was similar to those in the separate auditory-perceptual acuity models, with all three-way interactions showing evidence against gate-specific modulation (lnBF<sub>10</sub> from &#x02013;2.31 to &#x02013;1.11). Again, the final lengthening acuity &#x000D7; Gate 7 vs. 5 &#x000D7; stimulus condition interaction yielded inconclusive evidence [<italic>b</italic> = &#x02013;0.45, 95% CI [&#x02013;0.92, &#x02013;0.01], lnBF<sub>10</sub> = 0.50], consistent with the separate model finding.</p>
<p>Simple effects analysis revealed that auditory-perceptual acuity effects remained present across all gates. This demonstrates that the lack of evidence for three-way interactions reflects uniform auditory-perceptual acuity facilitation rather than absent effects. Thus, better auditory-perceptual acuity enhances discriminability consistently across gates rather than providing differential advantages at specific gates.</p></sec>
<sec>
<title>Response bias</title>
<p>Response bias reflects participants&#x00027; tendency to favor one response option over another, independent of their actual discrimination performance. These analyses thus examine whether participants showed systematic preferences for <italic>grouped</italic> or <italic>ungrouped</italic> responses and whether such biases varied with auditory-perceptual acuity or gate progression.</p>
<p>Participants showed no systematic response preferences overall (intercepts near zero, all lnBF<sub>10</sub> &#x0003C;&#x02013;1). Auditory-perceptual acuity did not influence baseline bias (all lnBF<sub>10</sub> &#x0003C;&#x02013;1.5). A small shift in bias emerged at Gate 3 vs. 2, with participants becoming slightly more likely to respond <italic>grouped</italic> (<italic>b</italic> = 0.22&#x02013;0.24, lnBF<sub>10</sub> &#x0003E; 2.8), while other gate transitions showed inconclusive effects (all lnBF<sub>10</sub> between &#x02013;0.73 and 0.36). Furthermore, auditory-perceptual acuity did not systematically modulate bias changes across gates: Most gate &#x000D7; auditory-perceptual acuity interactions showed evidence against effects (<italic>b</italic> = &#x02013;0.09 to 0.01, lnBF<sub>10</sub> from &#x02013;2.33 to &#x02013;1.14), except for the Gate 7 vs. 5 &#x000D7; auditory-perceptual acuity interaction, which showed inconclusive evidence (<italic>b</italic> = 0.14 &#x02013; 0.16, lnBF<sub>10</sub> = between 0.04 and 0.44).</p></sec></sec>
<sec sec-type="discussion" id="s5">
<title>Discussion</title>
<p>This study investigated how auditory-perceptual acuity for prosodic boundary cues relates to the ability to predict grouping structure (i.e., syntactic grouping) from partial prosodic information in gated speech. We modified the design from <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> who identified substantial individual differences in boundary prediction using a similar gating paradigm: In their study, approximately 60% of participants updated their predictions incrementally from early gates, while the remaining 40% maintained consistent responses until later gates when clear boundary evidence had accumulated. However, the ascending gate presentation used by <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> made it impossible to distinguish whether &#x0201C;waiting&#x0201D; listeners genuinely lacked perceptual abilities or whether they strategically withheld responses. We addressed this issue by measuring individual differences in auditory-perceptual acuity for pitch, pause, and final lengthening discrimination in a randomized-gate paradigm, testing whether facilitation was uniform across gates or gate-specific as acoustic information unfolds.</p>
<p>Overall, participants successfully predicted <italic>grouped</italic> vs. <italic>ungrouped</italic> structures from prosodic information. The large main effect of stimulus condition confirmed that the acoustic differences between stimulus conditions enabled reliable prediction and that the experimental paradigm worked as intended. Prediction performance improved systematically as more acoustic information became available across gates, with the critical shift occurring at Gate 5 when the complete second name (<italic>Name2</italic>) became available, demonstrating that listeners required sufficient pitch and final lengthening cue information to reliably predict the grouping structure. As visualized in <xref ref-type="fig" rid="F1">Figure 1</xref>, pitch range distributions showed a moderate distinction between <italic>grouped</italic> and <italic>ungrouped</italic> stimulus conditions across both name positions, while final lengthening displayed clear distributional differences only at <italic>Name2</italic>, making this the earliest point at which both prosodic boundary cue types provided reliable boundary information.</p>
<p>With respect to our research question on how auditory-perceptual acuity for prosodic boundary cues relates to predicting syntactic structure from partial prosodic information, we found that participants with higher auditory-perceptual acuity demonstrated better structural prediction. When tested separately, both pause acuity and final lengthening acuity facilitated structural prediction, whereas pitch acuity did not show clear evidence for a facilitatory effect. However, when all three auditory-perceptual acuity measures were modeled simultaneously, the effects attenuated, that is, each individual auditory-perceptual acuity effect was reduced by approximately fifty percent and none of them retained statistical support. This attenuation indicates that the three auditory-perceptual acuity measures share considerable predictive variance rather than contributing independently, consistent with their moderate intercorrelations.</p>
<p>Crucially, this facilitation pattern was observed across all gates rather than being particularly pronounced at the early gates where the acoustic information is weakest. This contradicts our hypothesis that auditory-perceptual acuity would provide its strongest advantages when subtle prosodic boundary cue distinctions require fine-grained perceptual resolution, with these advantages diminishing as acoustic evidence accumulates. Instead, better bottom-up auditory-perceptual acuity provides general processing advantages throughout the accumulation of evidence rather than selectively enhancing prediction when prosodic boundary cues are ambiguous.</p>
<p>Response bias analyses confirmed that participants&#x00027; baseline response preferences were not influenced by auditory-perceptual acuity. Participants with higher auditory-perceptual acuity did not adopt different response strategies nor did they favor one grouping structure over the other. Instead, they predicted the upcoming grouping structure more effectively at all gates based on the available prosodic information. This supports the idea that auditory-perceptual acuities enhance prosodic boundary prediction through perceptual sensitivity rather than decision-level strategies.</p>
<p>Our findings extend prior research on prosodic processing by showing that individual differences in bottom-up auditory-perceptual acuity relate to top-down prediction of syntactic structure during incremental sentence comprehension (e.g., <xref ref-type="bibr" rid="B14">Cole et al., 2010</xref>; <xref ref-type="bibr" rid="B19">Ferreira and Karimi, 2015</xref>; <xref ref-type="bibr" rid="B61">Wagner and Watson, 2010</xref>). While previous studies have emphasized the role of acoustic boundary cues such as pitch movements, silent pauses, and final lengthening in boundary perception (e.g., <xref ref-type="bibr" rid="B9">Cangemi et al., 2015</xref>; <xref ref-type="bibr" rid="B22">Ganga et al., 2024</xref>; <xref ref-type="bibr" rid="B31">Holzgrefe-Lang et al., 2016</xref>; <xref ref-type="bibr" rid="B47">Petrone et al., 2017</xref>; <xref ref-type="bibr" rid="B50">Schub&#x000F6; et al., 2023</xref>), they have largely focused on group-level effects or prosodic boundary cue interactions without accounting for perceptual variability across listeners. Our findings are novel in that they link isolated auditory-perceptual discrimination thresholds for prosodic boundary cues to predictive processing in a gated paradigm. The shared variance among auditory-perceptual acuities indicates integrated rather than isolated cue-specific mechanisms, which contribute to enhanced top-down boundary prediction. This is consistent with current views on prosody processing emphasizing a more integrated, spectrotemporal framework for prosodic boundary perception (e.g., <xref ref-type="bibr" rid="B5">Brugos and Barnes, 2014</xref>; <xref ref-type="bibr" rid="B12">Cohen et al., 1953</xref>), where auditory-perceptual acuity may act as a domain-general facilitator of prosodic phrasing. Theoretically, these results are consistent with predictive coding models of speech perception (e.g., <xref ref-type="bibr" rid="B46">Park et al., 2018</xref>; <xref ref-type="bibr" rid="B48">Preisig and Meyer, 2025</xref>; <xref ref-type="bibr" rid="B52">Sohoglu et al., 2012</xref>), which posit that listeners generate top-down expectations about upcoming structure based on bottom-up sensory input. Our results suggest that finer auditory-perceptual resolution may equip listeners to better exploit unfolding prosodic information, thereby supporting the incremental syntactic analysis of the utterance (e.g., <xref ref-type="bibr" rid="B11">Clifton et al., 2002</xref>; <xref ref-type="bibr" rid="B20">Frazier et al., 2006</xref>), particularly in ambiguous contexts like coordinate structures (e.g., <xref ref-type="bibr" rid="B36">Kentner and F&#x000E9;ry, 2013</xref>).</p>
<p>Our finding that auditory-perceptual acuity effects are not confined exclusively to the earliest gates, but persist across the unfolding signal, has implications for the timing of prosodic-syntactic integration during grouping prediction. Classical immediate-integration accounts (e.g., <xref ref-type="bibr" rid="B55">Tanenhaus et al., 1995</xref>) predict that prosodic information should influence syntactic parsing as soon as it becomes available, whereas delayed-integration accounts (see <xref ref-type="bibr" rid="B16">Cutler et al., 1997</xref>) assume that prosodic cues are initially buffered and only affect syntactic commitments once sufficient evidence has accumulated. Neurophysiological evidence supports rapid sensitivity to prosodic boundary cues: ERP studies show early neural responses to prosodic boundaries, indexed by the Closure Positive Shift (CPS), which can emerge as soon as boundary-related prosodic information becomes available and does not depend on the presence of pauses or later syntactic input (e.g., <xref ref-type="bibr" rid="B31">Holzgrefe-Lang et al., 2016</xref>; <xref ref-type="bibr" rid="B54">Steinhauer et al., 1999</xref>; see also <xref ref-type="bibr" rid="B30">Holzgrefe et al., 2013</xref>). CPS effects have been observed even when boundary perception relies on subtle cue combinations such as pitch change and final lengthening, indicating early neural integration of prosodic structure. Our behavioral findings complement this evidence by showing that auditory-perceptual acuity provides a sustained processing advantage across information accumulation, rather than a sharp early-gate effect. This pattern is most consistent with continuous integration models (e.g., <xref ref-type="bibr" rid="B39">Kuperberg and Jaeger, 2016</xref>), in which prosodic and syntactic information are incrementally integrated and individual differences in perceptual precision modulate processing continuously rather than at discrete decision points, in line with predictive-processing accounts emphasizing continuous precision weighting (e.g., <xref ref-type="bibr" rid="B21">Friston, 2010</xref>).</p>
<p>Some limitations warrant consideration. First, our perceptual JND AXB threshold tasks capture isolated prosodic boundary cue discrimination but may not reflect how listeners integrate multiple acoustic cues in natural speech. Second, the shared variance among auditory-perceptual acuity measures leaves open whether this reflects domain-general auditory sensitivity or correlated but distinct perceptual abilities; larger samples or orthogonal prosodic boundary cue manipulations are needed to clarify this. Future work should involve larger cohorts and combine behavioral measures with neurophysiologic methods to identify the neural mechanisms underlying prosodic boundary cue processing. Examining the interaction of bottom-up perceptual abilities and top-down prediction in more naturalistic, multi-cue contexts would additionally help to determine whether the shared behavioral variance reflects common neural resources or coordinated but partly independent processes.</p></sec>
<sec sec-type="conclusions" id="s6">
<title>Conclusion</title>
<p>In conclusion, individual differences in auditory-perceptual acuity affect how well listeners exploit prosodic information for structural prediction, providing consistent advantages regardless of the available acoustic information strength. Importantly, listeners do not process prosodic boundary cues independently but rather integrate them incrementally during processing. Our findings also suggest that the individual differences in exploiting bottom-up prosodic information for top-down syntactic prediction observed by <xref ref-type="bibr" rid="B27">Hansen et al. (2023)</xref> likely reflect underlying differences in auditory-perceptual abilities rather than differences in task strategies or cue-specific processing mechanisms.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>All materials, data, and reproducible analysis code are available through the Open Science Framework: experimental code for the gating task (<ext-link ext-link-type="uri" xlink:href="https://osf.io/ehu8g/">https://osf.io/ehu8g/</ext-link>), experimental code for the JND task (<ext-link ext-link-type="uri" xlink:href="https://osf.io/mqy2p/">https://osf.io/mqy2p/</ext-link>) and analysis scripts with data (<ext-link ext-link-type="uri" xlink:href="https://osf.io/4yb7j/">https://osf.io/4yb7j/</ext-link>).</p>
</sec>
<sec sec-type="ethics-statement" id="s8">
<title>Ethics statement</title>
<p>The studies involving humans were approved by University of Potsdam Ethics Committee (approval code: 99/2020). The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>AH: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Project administration, Software, Validation, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. IW: Funding acquisition, Project administration, Resources, Supervision, Writing &#x02013; review &#x00026; editing, Conceptualization. OT: Funding acquisition, Project administration, Writing &#x02013; review &#x00026; editing, Conceptualization. SH: Funding acquisition, Project administration, Writing &#x02013; review &#x00026; editing, Conceptualization. JV: Formal analysis, Methodology, Supervision, Validation, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<ack><title>Acknowledgments</title><p>We thank our research assistants for their help with the data collection, and all of our participants for their time and effort and willingness to be part of this research project.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest. The author IW declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="ai-statement" id="s11">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. Assistance from ChatGPT (OpenAI) was used for language editing and formatting consistency. Assistance from Claude (Anthropic) was used for experimental and analysis code formatting and commenting. All content and data analyses were verified by the author(s).</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s12">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s13">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/flang.2026.1763160/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/flang.2026.1763160/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barr</surname> <given-names>D. J.</given-names></name> <name><surname>Levy</surname> <given-names>R.</given-names></name> <name><surname>Scheepers</surname> <given-names>C.</given-names></name> <name><surname>Tily</surname> <given-names>H. J.</given-names></name></person-group> (<year>2013</year>). <article-title>Random effects structure for confirmatory hypothesis testing: keep it maximal</article-title>. <source>J. Mem. Lang.</source> <volume>68</volume>, <fpage>255</fpage>&#x02013;<lpage>278</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jml.2012.11.001</pub-id><pub-id pub-id-type="pmid">24403724</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Bates</surname> <given-names>D.</given-names></name> <name><surname>Kliegl</surname> <given-names>R.</given-names></name> <name><surname>Vasishth</surname> <given-names>S.</given-names></name> <name><surname>Baayen</surname> <given-names>H.</given-names></name></person-group> (<year>2015</year>). <source>Parsimonious Mixed Models</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/pdf/1506.04967">http://arxiv.org/pdf/1506.04967</ext-link> (Accessed January 16, 2026).</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="web"><person-group person-group-type="author"><name><surname>Boersma</surname> <given-names>P.</given-names></name> <name><surname>Weenink</surname> <given-names>D.</given-names></name></person-group> (<year>1992-2020</year>). <source>Praat: Doing Phonetics by Computer [Computer Program]</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://www.praat.org/">http://www.praat.org/</ext-link> (Accessed January 16, 2026).</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bolker</surname> <given-names>B. M.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Linear and generalized linear mixed models,&#x0201D;</article-title> in <source>Ecological Statistics</source>, eds. G. A. Fox, S. Negrete-Yankelevich, and V. J. Sosa (Oxford: Oxford University Press/Oxford Academic), <fpage>309</fpage>&#x02013;<lpage>333</lpage>. doi: <pub-id pub-id-type="doi">10.1093/acprof:oso/9780199672547.003.0014</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brugos</surname> <given-names>A.</given-names></name> <name><surname>Barnes</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Effects of dynamic pitch and relative scaling on the perception of duration and prosodic grouping in American English</article-title>. <source>Speech Prosody </source> <volume>2014</volume>, <fpage>388</fpage>&#x02013;<lpage>392</lpage>. doi: <pub-id pub-id-type="doi">10.21437/SpeechProsody.2014-65</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brunner</surname> <given-names>J.</given-names></name> <name><surname>Ghosh</surname> <given-names>S. S.</given-names></name> <name><surname>Hoole</surname> <given-names>P.</given-names></name> <name><surname>Matthies</surname> <given-names>M.</given-names></name> <name><surname>Tiede</surname> <given-names>M.</given-names></name> <name><surname>Perkell</surname> <given-names>J. S.</given-names></name></person-group> (<year>2011</year>). <article-title>The influence of auditory acuity on acoustic variability and the use of motor equivalence during adaptation to a perturbation</article-title>. <source>J. Speech Lang. Hear. Res.</source> <volume>54</volume>, <fpage>727</fpage>&#x02013;<lpage>739</lpage>. doi: <pub-id pub-id-type="doi">10.1044/1092-4388(2010/09-0256)</pub-id><pub-id pub-id-type="pmid">20966388</pub-id></mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Buerkner</surname> <given-names>P.-C.</given-names></name></person-group> (<year>2018</year>). <article-title>Advanced bayesian multilevel modeling with the r package brms</article-title>. <source>R J.</source> <volume>10</volume>, <fpage>395</fpage>&#x02013;<lpage>411</lpage>. doi: <pub-id pub-id-type="doi">10.32614/RJ-2018-017</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Byrd</surname> <given-names>D.</given-names></name> <name><surname>Saltzman</surname> <given-names>E.</given-names></name></person-group> (<year>2003</year>). <article-title>The elastic phrase: modeling the dynamics of boundary-adjacent lengthening</article-title>. <source>J. Phonet.</source> <volume>31</volume>, <fpage>149</fpage>&#x02013;<lpage>180</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0095-4470(02)00085-2</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Cangemi</surname> <given-names>F.</given-names></name> <name><surname>Kr&#x000FC;ger</surname> <given-names>M.</given-names></name> <name><surname>Grice</surname> <given-names>M.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Listener-specific perception of speaker-specific productions in intonation,&#x0201D;</article-title> in <source>Individual Differences in Speech Production and Perception</source> (<publisher-loc>Frankfurt</publisher-loc>: <publisher-name>Peter Lang</publisher-name>), <fpage>123</fpage>&#x02013;<lpage>145</lpage>.</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cho</surname> <given-names>T.</given-names></name> <name><surname>Keating</surname> <given-names>P.</given-names></name></person-group> (<year>2009</year>). <article-title>Effects of initial position vs. prominence in English</article-title>. <source>J. Phonet.</source> <volume>37</volume>, <fpage>466</fpage>&#x02013;<lpage>485</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.wocn.2009.08.001</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Clifton</surname> <given-names>C.</given-names></name> JR <name><surname>Carlson</surname> <given-names>K.</given-names></name> <name><surname>Frazier</surname> <given-names>L.</given-names></name></person-group> (<year>2002</year>). <article-title>Informative prosodic boundaries</article-title>. <source>Lang. Speech</source> <volume>45</volume>, <fpage>87</fpage>&#x02013;<lpage>114</lpage>. doi: <pub-id pub-id-type="doi">10.1177/00238309020450020101</pub-id><pub-id pub-id-type="pmid">12613557</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cohen</surname> <given-names>J.</given-names></name> <name><surname>Hansel</surname> <given-names>C. E.</given-names></name> <name><surname>Sylvester</surname> <given-names>J. D.</given-names></name></person-group> (<year>1953</year>). <article-title>A new phenomenon in time judgment</article-title>. <source>Nature</source> <volume>172</volume>:<fpage>901</fpage>. doi: <pub-id pub-id-type="doi">10.1038/172901a0</pub-id><pub-id pub-id-type="pmid">13111224</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cole</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>Prosody in context: a review</article-title>. <source>Lang. Cogn. Neurosci.</source> <volume>30</volume>, <fpage>1</fpage>&#x02013;<lpage>31</lpage>. doi: <pub-id pub-id-type="doi">10.1080/23273798.2014.963130</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cole</surname> <given-names>J.</given-names></name> <name><surname>Mo</surname> <given-names>Y.</given-names></name> <name><surname>Baek</surname> <given-names>S.</given-names></name></person-group> (<year>2010</year>). <article-title>The role of syntactic structure in guiding prosody perception with ordinary listeners and everyday speech</article-title>. <source>Lang. Cogn. Process.</source> <volume>25</volume>, <fpage>1141</fpage>&#x02013;<lpage>1177</lpage>. doi: <pub-id pub-id-type="doi">10.1080/01690960903525507</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Collier</surname> <given-names>R. P. G.</given-names></name> <name><surname>de Pijper</surname> <given-names>J. R.</given-names></name> <name><surname>Sanderman</surname> <given-names>A. A.</given-names></name></person-group> (<year>1993</year>). <article-title>&#x0201C;Perceived prosodic boundaries and their phonetic correlates,&#x0201D;</article-title> in <source>Human Language Technology</source>, ed. Morgan Kaufmann Publishers, Inc. (San Francisco, CA: Morgan Kaufmann Publishers, Inc.), <fpage>341</fpage>&#x02013;<lpage>345</lpage>.</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cutler</surname> <given-names>A.</given-names></name> <name><surname>Dahan</surname> <given-names>D.</given-names></name> <name><surname>van Donselaar</surname> <given-names>W.</given-names></name></person-group> (<year>1997</year>). <article-title>Prosody in the comprehension of spoken language: a literature review</article-title>. <source>Lang. Speech</source> <volume>40</volume>(<issue> Pt 2</issue>), <fpage>141</fpage>&#x02013;<lpage>201</lpage>. doi: <pub-id pub-id-type="doi">10.1177/002383099704000203</pub-id><pub-id pub-id-type="pmid">9509577</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>de Beer</surname> <given-names>C.</given-names></name> <name><surname>Hofmann</surname> <given-names>A.</given-names></name> <name><surname>Regenbrecht</surname> <given-names>F.</given-names></name> <name><surname>Huttenlauch</surname> <given-names>C.</given-names></name> <name><surname>Wartenburger</surname> <given-names>I.</given-names></name> <name><surname>Obrig</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Production and comprehension of prosodic boundary marking in persons with unilateral brain lesions</article-title>. <source>J. Speech Lang. Hear. Res</source>. <volume>65</volume>, <fpage>4774</fpage>&#x02013;<lpage>4796</lpage>. doi: <pub-id pub-id-type="doi">10.1044/2022_JSLHR-22-00258</pub-id><pub-id pub-id-type="pmid">36455138</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dickey</surname> <given-names>J. M.</given-names></name> <name><surname>Lientz</surname> <given-names>B. P.</given-names></name></person-group> (<year>1970</year>). <article-title>The weighted likelihood ratio, sharp hypotheses about chances, the order of a markov chain</article-title>. <source>Ann. Math. Stat.</source> <volume>41</volume>, <fpage>214</fpage>&#x02013;<lpage>226</lpage>. doi: <pub-id pub-id-type="doi">10.1214/aoms/1177697203</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ferreira</surname> <given-names>F.</given-names></name> <name><surname>Karimi</surname> <given-names>H.</given-names></name></person-group> (<year>2015</year>). <article-title>Prosody, performance, and cognitive skill: evidence from individual differences</article-title>. <source>Explicit Implicit Prosody Sent. Process.</source> <volume>46</volume>, <fpage>119</fpage>&#x02013;<lpage>132</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-3-319-12961-7_7</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Frazier</surname> <given-names>L.</given-names></name> <name><surname>Carlson</surname> <given-names>K.</given-names></name> <name><surname>Clifton</surname> <given-names>C. JR.</given-names></name></person-group> (<year>2006</year>). <article-title>Prosodic phrasing is central to language comprehension</article-title>. <source>Trends Cogn. Sci.</source> <volume>10</volume>, <fpage>244</fpage>&#x02013;<lpage>249</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.tics.2006.04.002</pub-id><pub-id pub-id-type="pmid">16651019</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Friston</surname> <given-names>K.</given-names></name></person-group> (<year>2010</year>). <article-title>The free-energy principle: a unified brain theory?</article-title> <source>Nat. Rev. Neurosci.</source> <volume>11</volume>, <fpage>127</fpage>&#x02013;<lpage>138</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nrn2787</pub-id><pub-id pub-id-type="pmid">20068583</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ganga</surname> <given-names>R.</given-names></name> <name><surname>Geutjes</surname> <given-names>J.</given-names></name> <name><surname>van Niekerk</surname> <given-names>E.</given-names></name> <name><surname>Reshetnikova</surname> <given-names>V.</given-names></name> <name><surname>Chen</surname> <given-names>A.</given-names></name></person-group> (<year>2024</year>). <article-title>Processing prosodic boundaries in Dutch coordinated constructions</article-title>. <source>Speech Prosody </source> <volume>2024</volume>, <fpage>985</fpage>&#x02013;<lpage>989</lpage>. doi: <pub-id pub-id-type="doi">10.21437/SpeechProsody.2024-199</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gelman</surname> <given-names>A.</given-names></name> <name><surname>Jakulin</surname> <given-names>A.</given-names></name> <name><surname>Pittau</surname> <given-names>M. G.</given-names></name> <name><surname>Su</surname> <given-names>Y.-S.</given-names></name></person-group> (<year>2008</year>). <article-title>A weakly informative default prior distribution for logistic and other regression models</article-title>. <source>Ann. Appl. Stat.</source> <volume>2</volume>, <fpage>1360</fpage>&#x02013;<lpage>1383</lpage>. doi: <pub-id pub-id-type="doi">10.1214/08-AOAS191</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ghosh</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Mitra</surname> <given-names>R.</given-names></name></person-group> (<year>2018</year>). <article-title>On the use of cauchy prior distributions for Bayesian Logistic Regression</article-title>. <source>Bayesian Anal.</source> <volume>13</volume>, <fpage>359</fpage>&#x02013;<lpage>383</lpage>. doi: <pub-id pub-id-type="doi">10.1214/17-BA1051</pub-id></mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gonz&#x000E1;lez</surname> <given-names>C.</given-names></name> <name><surname>Weissglass</surname> <given-names>C.</given-names></name> <name><surname>Bates</surname> <given-names>D.</given-names></name></person-group> (<year>2022</year>). <article-title>Creaky voice and prosodic boundaries in Spanish: an acoustic study</article-title>. <source>Stud. Hisp. Lusophone Ling.</source> <volume>15</volume>, <fpage>33</fpage>&#x02013;<lpage>65</lpage>. doi: <pub-id pub-id-type="doi">10.1515/shll-2022-2055</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Grosjean</surname> <given-names>F.</given-names></name></person-group> (<year>1980</year>). <article-title>Spoken word recognition processes and the gating paradigm</article-title>. <source>Percept. Psychophys.</source> <volume>28</volume>, <fpage>267</fpage>&#x02013;<lpage>283</lpage>. doi: <pub-id pub-id-type="doi">10.3758/BF03204386</pub-id><pub-id pub-id-type="pmid">7465310</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hansen</surname> <given-names>M.</given-names></name> <name><surname>Huttenlauch</surname> <given-names>C.</given-names></name> <name><surname>de Beer</surname> <given-names>C.</given-names></name> <name><surname>Wartenburger</surname> <given-names>I.</given-names></name> <name><surname>Hanne</surname> <given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>Individual differences in early disambiguation of prosodic grouping</article-title>. <source>Lang. Speech</source> <volume>66</volume>, <fpage>706</fpage>&#x02013;<lpage>733</lpage>. doi: <pub-id pub-id-type="doi">10.1177/00238309221127374</pub-id><pub-id pub-id-type="pmid">36250333</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hautus</surname> <given-names>M. J.</given-names></name> <name><surname>Macmillan</surname> <given-names>N. A.</given-names></name> <name><surname>Creelman</surname> <given-names>C. D.</given-names></name></person-group> (<year>2021</year>). <source>Detection Theory</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Routledge</publisher-name>. doi: <pub-id pub-id-type="doi">10.4324/9781003203636</pub-id></mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="web"><collab>Hofmann A. Tuomainen O. Hanne S. Ver&#x000ED;ssimo J. Wartenburger I. (submitted). The Prosodic Perception-Production Link: Impact of Auditory-Perceptual Acuity on Prosodic Cueproduction Under Cognitive Load: Manuscript Under Review. Available online at: <ext-link ext-link-type="uri" xlink:href="https://osf.io/brw2t">https://osf.io/brw2t</ext-link> (Accessed January 16, 2026).</collab></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Holzgrefe</surname> <given-names>J.</given-names></name> <name><surname>Wellmann</surname> <given-names>C.</given-names></name> <name><surname>Petrone</surname> <given-names>C.</given-names></name> <name><surname>Truckenbrodt</surname> <given-names>H.</given-names></name> <name><surname>H&#x000F6;hle</surname> <given-names>B.</given-names></name> <name><surname>Wartenburger</surname> <given-names>I.</given-names></name></person-group> (<year>2013</year>). <article-title>Brain response to prosodic boundary cues depends on boundary position</article-title>. <source>Front. Psychol.</source> <volume>4</volume>:<fpage>421</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyg.2013.00421</pub-id><pub-id pub-id-type="pmid">23882234</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Holzgrefe-Lang</surname> <given-names>J.</given-names></name> <name><surname>Wellmann</surname> <given-names>C.</given-names></name> <name><surname>Petrone</surname> <given-names>C.</given-names></name> <name><surname>R&#x000E4;ling</surname> <given-names>R.</given-names></name> <name><surname>Truckenbrodt</surname> <given-names>H.</given-names></name> <name><surname>H&#x000F6;hle</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>How pitch change and final lengthening cue boundary perception in german: converging evidence from ERPs and prosodic judgements</article-title>. <source>Lang. Cogn. Neurosci.</source> <volume>31</volume>, <fpage>904</fpage>&#x02013;<lpage>920</lpage>. doi: <pub-id pub-id-type="doi">10.1080/23273798.2016.1157195</pub-id></mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huttenlauch</surname> <given-names>C.</given-names></name> <name><surname>de Beer</surname> <given-names>C.</given-names></name> <name><surname>Hanne</surname> <given-names>S.</given-names></name> <name><surname>Wartenburger</surname> <given-names>I.</given-names></name></person-group> (<year>2021</year>). <article-title>Production of prosodic cues in coordinate name sequences addressing varying interlocutors</article-title>. <source>Lab. Phonol.: J. Assoc. Lab. Phonol.</source> <volume>12</volume>:<fpage>1</fpage>. doi: <pub-id pub-id-type="doi">10.5334/labphon.221</pub-id></mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Jeffreys</surname> <given-names>H.</given-names></name></person-group> (<year>1991</year>). <source>Theory of Probability (2nd Edn., repr)</source>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>.</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ji</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Yang</surname> <given-names>X.</given-names></name></person-group> (<year>2024</year>). <article-title>Age effects on prosodic boundary perception</article-title>. <source>Psychol. Aging</source> <volume>39</volume>, <fpage>262</fpage>&#x02013;<lpage>274</lpage>. doi: <pub-id pub-id-type="doi">10.1037/pag0000811</pub-id><pub-id pub-id-type="pmid">38829339</pub-id></mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kass</surname> <given-names>R. E.</given-names></name> <name><surname>Raftery</surname> <given-names>A. E.</given-names></name></person-group> (<year>1995</year>). <article-title>Bayes factors</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>90</volume>, <fpage>773</fpage>&#x02013;<lpage>795</lpage>. doi: <pub-id pub-id-type="doi">10.1080/01621459.1995.10476572</pub-id></mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kentner</surname> <given-names>G.</given-names></name> <name><surname>F&#x000E9;ry</surname> <given-names>C.</given-names></name></person-group> (<year>2013</year>). <article-title>A new approach to prosodic grouping</article-title>. <source>Ling. Rev.</source> <volume>30</volume>, <fpage>277</fpage>&#x02013;<lpage>311</lpage>. doi: <pub-id pub-id-type="doi">10.1515/tlr-2013-0009</pub-id></mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kjelgaard</surname> <given-names>M. M.</given-names></name> <name><surname>Speer</surname> <given-names>S. R.</given-names></name></person-group> (<year>1999</year>). <article-title>Prosodic facilitation and interference in the resolution of temporary syntactic closure ambiguity</article-title>. <source>J. Mem. Lang.</source> <volume>40</volume>, <fpage>153</fpage>&#x02013;<lpage>194</lpage>. doi: <pub-id pub-id-type="doi">10.1006/jmla.1998.2620</pub-id></mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kochanski</surname> <given-names>G.</given-names></name> <name><surname>Grabe</surname> <given-names>E.</given-names></name> <name><surname>Coleman</surname> <given-names>J.</given-names></name> <name><surname>Rosner</surname> <given-names>B.</given-names></name></person-group> (<year>2005</year>). <article-title>Loudness predicts prominence: fundamental frequency lends little</article-title>. <source>J. Acoust. Soc. Am.</source> <volume>118</volume>, <fpage>1038</fpage>&#x02013;<lpage>1054</lpage>. doi: <pub-id pub-id-type="doi">10.1121/1.1923349</pub-id><pub-id pub-id-type="pmid">16158659</pub-id></mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kuperberg</surname> <given-names>G. R.</given-names></name> <name><surname>Jaeger</surname> <given-names>T. F.</given-names></name></person-group> (<year>2016</year>). <article-title>What do we mean by prediction in language comprehension?</article-title> <source>Lang. Cogn. Neurosci.</source> <volume>31</volume>, <fpage>32</fpage>&#x02013;<lpage>59</lpage>. doi: <pub-id pub-id-type="doi">10.1080/23273798.2015.1102299</pub-id><pub-id pub-id-type="pmid">27135040</pub-id></mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Levitt</surname> <given-names>H.</given-names></name></person-group> (<year>1971</year>). <article-title>Transformed up-down methods in psychoacoustics</article-title>. <source>J. Acoust. Soc. Am.</source> <volume>49</volume>, <fpage>467</fpage>&#x02013;<lpage>477</lpage>. doi: <pub-id pub-id-type="doi">10.1121/1.1912375</pub-id><pub-id pub-id-type="pmid">5541744</pub-id></mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lialiou</surname> <given-names>M.</given-names></name> <name><surname>Grice</surname> <given-names>M.</given-names></name> <name><surname>R&#x000F6;hr</surname> <given-names>C. T.</given-names></name> <name><surname>Schumacher</surname> <given-names>P. B.</given-names></name></person-group> (<year>2024</year>). <article-title>Auditory processing of intonational rises and falls in German: rises are special in attention orienting</article-title>. <source>J. Cogn. Neurosci.</source> <volume>36</volume>, <fpage>1099</fpage>&#x02013;<lpage>1122</lpage>. doi: <pub-id pub-id-type="doi">10.1162/jocn_a_02129</pub-id><pub-id pub-id-type="pmid">38358004</pub-id></mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>H.-Y.</given-names></name> <name><surname>Fon</surname> <given-names>J.</given-names></name></person-group> (<year>2010</year>). <article-title>Perception on pitch reset at discourse boundaries</article-title>. <source>Interspeech</source> <volume>2010</volume>, <fpage>1225</fpage>&#x02013;<lpage>1228</lpage>. doi: <pub-id pub-id-type="doi">10.21437/Interspeech.2010-388</pub-id></mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>M&#x000E4;nnel</surname> <given-names>C.</given-names></name> <name><surname>Friederici</surname> <given-names>A. D.</given-names></name></person-group> (<year>2016</year>). <article-title>Neural correlates of prosodic boundary perception in German preschoolers: if pause is present, pitch can go</article-title>. <source>Brain Res.</source> <volume>1632</volume>, <fpage>27</fpage>&#x02013;<lpage>33</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.brainres.2015.12.009</pub-id><pub-id pub-id-type="pmid">26683081</pub-id></mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>McElreath</surname> <given-names>R.</given-names></name></person-group> (<year>2020</year>). <source>Statistical Rethinking</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Chapman and Hall/CRC</publisher-name>. doi: <pub-id pub-id-type="doi">10.1201/9780429029608</pub-id></mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Oschkinat</surname> <given-names>M.</given-names></name> <name><surname>Hoole</surname> <given-names>P.</given-names></name> <name><surname>Falk</surname> <given-names>S.</given-names></name> <name><surname>Dalla Bella</surname> <given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>Temporal malleability to auditory feedback perturbation is modulated by rhythmic abilities and auditory acuity</article-title>. <source>Front. Hum. Neurosci.</source> <volume>16</volume>:<fpage>885074</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fnhum.2022.885074</pub-id><pub-id pub-id-type="pmid">36188179</pub-id></mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Park</surname> <given-names>H.</given-names></name> <name><surname>Thut</surname> <given-names>G.</given-names></name> <name><surname>Gross</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Predictive entrainment of natural speech through two fronto-motor top-down channels</article-title>. <source>Lang. Cogn. Neurosci.</source> <volume>35</volume>, <fpage>739</fpage>&#x02013;<lpage>751</lpage>. doi: <pub-id pub-id-type="doi">10.1080/23273798.2018.1506589</pub-id><pub-id pub-id-type="pmid">32939354</pub-id></mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Petrone</surname> <given-names>C.</given-names></name> <name><surname>Truckenbrodt</surname> <given-names>H.</given-names></name> <name><surname>Wellmann</surname> <given-names>C.</given-names></name> <name><surname>Holzgrefe-Lang</surname> <given-names>J.</given-names></name> <name><surname>Wartenburger</surname> <given-names>I.</given-names></name> <name><surname>H&#x000F6;hle</surname> <given-names>B.</given-names></name></person-group> (<year>2017</year>). <article-title>Prosodic boundary cues in German: evidence from the production and perception of bracketed lists</article-title>. <source>J. Phonet.</source> <volume>61</volume>, <fpage>71</fpage>&#x02013;<lpage>92</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.wocn.2017.01.002</pub-id></mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Preisig</surname> <given-names>B. C.</given-names></name> <name><surname>Meyer</surname> <given-names>M.</given-names></name></person-group> (<year>2025</year>). <article-title>Predictive coding and dimension-selective attention enhance the lateralization of spoken language processing</article-title>. <source>Neurosci. Biobehav. Rev.</source> <volume>172</volume>:<fpage>106111</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neubiorev.2025.106111</pub-id><pub-id pub-id-type="pmid">40118260</pub-id></mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rouder</surname> <given-names>J. N.</given-names></name> <name><surname>Speckman</surname> <given-names>P. L.</given-names></name> <name><surname>Sun</surname> <given-names>D.</given-names></name> <name><surname>Morey</surname> <given-names>R. D.</given-names></name> <name><surname>Iverson</surname> <given-names>G.</given-names></name></person-group> (<year>2009</year>). <article-title>Bayesian t tests for accepting and rejecting the null hypothesis</article-title>. <source>Psychonomic Bull. Rev.</source> <volume>16</volume>, <fpage>225</fpage>&#x02013;<lpage>237</lpage>. doi: <pub-id pub-id-type="doi">10.3758/PBR.16.2.225</pub-id><pub-id pub-id-type="pmid">19293088</pub-id></mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schub&#x000F6;</surname> <given-names>F.</given-names></name> <name><surname>Zerbian</surname> <given-names>S.</given-names></name> <name><surname>Hanne</surname> <given-names>S.</given-names></name> <name><surname>Wartenburger</surname> <given-names>I.</given-names></name></person-group> (<year>2023</year>). <source>Prosodic Boundary Phenomena</source>. Berlin: Language Science Press. doi: <pub-id pub-id-type="doi">10.5281/zenodo.7777469</pub-id></mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Smith</surname> <given-names>D. J.</given-names></name> <name><surname>Stepp</surname> <given-names>C.</given-names></name> <name><surname>Guenther</surname> <given-names>F. H.</given-names></name> <name><surname>Kearney</surname> <given-names>E.</given-names></name></person-group> (<year>2020</year>). <article-title>Contributions of auditory and somatosensory feedback to vocal motor control</article-title>. <source>J. Speech Lang. Hear. Res.</source> <volume>63</volume>, <fpage>2039</fpage>&#x02013;<lpage>2053</lpage>. doi: <pub-id pub-id-type="doi">10.1044/2020_JSLHR-19-00296</pub-id><pub-id pub-id-type="pmid">32603626</pub-id></mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sohoglu</surname> <given-names>E.</given-names></name> <name><surname>Peelle</surname> <given-names>J. E.</given-names></name> <name><surname>Carlyon</surname> <given-names>R. P.</given-names></name> <name><surname>Davis</surname> <given-names>M. H.</given-names></name></person-group> (<year>2012</year>). <article-title>Predictive top-down integration of prior knowledge during speech perception</article-title>. <source>J. Neurosci.</source> <volume>32</volume>, <fpage>8443</fpage>&#x02013;<lpage>8453</lpage>. doi: <pub-id pub-id-type="doi">10.1523/JNEUROSCI.5069-11.2012</pub-id><pub-id pub-id-type="pmid">22723684</pub-id></mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="web"><collab>Stan Development Team</collab> (<year>2020</year>). <source>RStan: The R Interface to Stan</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://mc-stan.org/">https://mc-stan.org/</ext-link> (Accessed January 16, 2026).</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Steinhauer</surname> <given-names>K.</given-names></name> <name><surname>Alter</surname> <given-names>K.</given-names></name> <name><surname>Friederici</surname> <given-names>A. D.</given-names></name></person-group> (<year>1999</year>). <article-title>Brain potentials indicate immediate use of prosodic cues in natural speech processing</article-title>. <source>Nat. Neurosci.</source> <volume>2</volume>, <fpage>191</fpage>&#x02013;<lpage>196</lpage>. doi: <pub-id pub-id-type="doi">10.1038/5757</pub-id><pub-id pub-id-type="pmid">10195205</pub-id></mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tanenhaus</surname> <given-names>M. K.</given-names></name> <name><surname>Spivey-Knowlton</surname> <given-names>M. J.</given-names></name> <name><surname>Eberhard</surname> <given-names>K. M.</given-names></name> <name><surname>Sedivy</surname> <given-names>J. C.</given-names></name></person-group> (<year>1995</year>). <article-title>Integration of visual and linguistic information in spoken language comprehension</article-title>. <source>Science</source> <volume>268</volume>, <fpage>1632</fpage>&#x02013;<lpage>1634</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.7777863</pub-id><pub-id pub-id-type="pmid">7777863</pub-id></mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tyler</surname> <given-names>M. D.</given-names></name> <name><surname>Cutler</surname> <given-names>A.</given-names></name></person-group> (<year>2009</year>). <article-title>Cross-language differences in cue use for speech segmentation</article-title>. <source>J. Acoust. Soc. Am.</source> <volume>126</volume>, <fpage>367</fpage>&#x02013;<lpage>376</lpage>. doi: <pub-id pub-id-type="doi">10.1121/1.3129127</pub-id><pub-id pub-id-type="pmid">19603893</pub-id></mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vasishth</surname> <given-names>S.</given-names></name> <name><surname>Nicenboim</surname> <given-names>B.</given-names></name> <name><surname>Beckman</surname> <given-names>M. E.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name> <name><surname>Kong</surname> <given-names>E. J.</given-names></name></person-group> (<year>2018</year>). <article-title>Bayesian data analysis in the phonetic sciences: a tutorial introduction</article-title>. <source>J. Phonet.</source> <volume>71</volume>, <fpage>147</fpage>&#x02013;<lpage>161</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.wocn.2018.07.008</pub-id><pub-id pub-id-type="pmid">30197458</pub-id></mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ver&#x000ED;ssimo</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>When fixed and random effects mismatch: another case of inflation of evidence in non-maximal models</article-title>. <source>Comput. Brain Behav.</source> <volume>6</volume>, <fpage>84</fpage>&#x02013;<lpage>101</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s42113-022-00152-3</pub-id></mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ver&#x000ED;ssimo</surname> <given-names>J.</given-names></name></person-group> (<year>2025</year>). <article-title>A gentle introduction to Bayesian statistics, with applications to bilingualism research</article-title>. <source>Linguistic, Approach, Biling.</source> <volume>15</volume>, <fpage>453</fpage>&#x02013;<lpage>486</lpage>. doi: <pub-id pub-id-type="doi">10.1075/lab.24027.ver</pub-id></mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wagenmakers</surname> <given-names>E.-J.</given-names></name> <name><surname>Lodewyckx</surname> <given-names>T.</given-names></name> <name><surname>Kuriyal</surname> <given-names>H.</given-names></name> <name><surname>Grasman</surname> <given-names>R.</given-names></name></person-group> (<year>2010</year>). <article-title>Bayesian hypothesis testing for psychologists: a tutorial on the savage-dickey method</article-title>. <source>Cogn. Psychol.</source> <volume>60</volume>, <fpage>158</fpage>&#x02013;<lpage>189</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cogpsych.2009.12.001</pub-id><pub-id pub-id-type="pmid">20064637</pub-id></mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wagner</surname> <given-names>M.</given-names></name> <name><surname>Watson</surname> <given-names>D. G.</given-names></name></person-group> (<year>2010</year>). <article-title>Experimental and theoretical advances in prosody: a review</article-title>. <source>Lang. Cogn. Process.</source> <volume>25</volume>, <fpage>905</fpage>&#x02013;<lpage>945</lpage>. doi: <pub-id pub-id-type="doi">10.1080/01690961003589492</pub-id><pub-id pub-id-type="pmid">22096264</pub-id></mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wurm</surname> <given-names>L. H.</given-names></name> <name><surname>Fisicaro</surname> <given-names>S. A.</given-names></name></person-group> (<year>2014</year>). <article-title>What residualizing predictors in regression analyses does (and what it does not do)</article-title>. <source>J. Mem. Lang.</source> <volume>72</volume>, <fpage>37</fpage>&#x02013;<lpage>48</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jml.2013.12.003</pub-id></mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Shen</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name></person-group> (<year>2014</year>). <article-title>How listeners weight acoustic cues to intonational phrase boundaries</article-title>. <source>PLoS ONE</source> <volume>9</volume>:<fpage>e102166</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0102166</pub-id><pub-id pub-id-type="pmid">25019156</pub-id></mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zloteanu</surname> <given-names>M.</given-names></name> <name><surname>Vuorre</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>A tutorial for deception detection analysis or: how I learned to stop aggregating veracity judgments and embraced signal detection theory mixed models</article-title>. <source>J. Nonverbal Behav.</source> <volume>48</volume>, <fpage>161</fpage>&#x02013;<lpage>185</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10919-024-00456-x</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/9156/overview">Matthew W. Crocker</ext-link>, Saarland University, Germany</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3317851/overview">Filiz Tezcan</ext-link>, Maastricht University, Netherlands</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3318646/overview">Svetlana Vetchinnikova</ext-link>, University of Helsinki, Finland</p>
</fn>
</fn-group>
</back>
</article>