<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychiatry</journal-id>
<journal-title>Frontiers in Psychiatry</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychiatry</abbrev-journal-title>
<issn pub-type="epub">1664-0640</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyt.2024.1347913</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Psychiatry</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Processing of prosodic cues of uncertainty in autistic and non-autistic adults: a study based on articulatory speech synthesis</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Bellinghausen</surname>
<given-names>Charlotte</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2279735"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Schr&#xf6;der</surname>
<given-names>Bernhard</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2734035"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rauh</surname>
<given-names>Reinhold</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/597445"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Riedel</surname>
<given-names>Andreas</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/266126"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dahmen</surname>
<given-names>Paula</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Birkholz</surname>
<given-names>Peter</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tebartz van Elst</surname>
<given-names>Ludger</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/188331"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fangmeier</surname>
<given-names>Thomas</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2726393"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Institute of German Studies, University of Duisburg-Essen</institution>, <addr-line>Duisburg</addr-line>, <country>Germany</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Child and Adolescent Psychiatry, Psychotherapy, and Psychosomatics, Medical Center &#x2013; University of Freiburg, Faculty of Medicine, University of Freiburg</institution>, <addr-line>Freiburg</addr-line>, <country>Germany</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Psychiatry and Psychotherapy, Medical Center &#x2013; University of Freiburg, Faculty of Medicine, University of Freiburg</institution>, <addr-line>Freiburg</addr-line>, <country>Germany</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Luzerner Psychiatrie, Ambulante Dienste</institution>, <addr-line>Luzern</addr-line>, <country>Switzerland</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Institute of Acoustics and Speech Communication, Technische Universit&#xe4;t Dresden</institution>, <addr-line>Dresden</addr-line>, <country>Germany</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Antonio M. Persico, University of Modena and Reggio Emilia, Italy</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Elisabetta Genovese, University of Modena and Reggio Emilia, Italy</p>
<p>Loredana Schettino, Free University of Bozen-Bolzano, Italy</p>
<p>Simon Betz, Bielefeld University, Germany</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Charlotte Bellinghausen, <email xlink:href="mailto:charlotte.bellinghausen@uni-due.de">charlotte.bellinghausen@uni-due.de</email>
</p>
</fn>
<fn fn-type="other" id="fn003">
<p>&#x2020;ORCID: Charlotte Bellinghausen, <uri xlink:href="https://orcid.org/0000-0001-7831-8754">https://orcid.org/0000-0001-7831-8754</uri>; Bernhard Schr&#xf6;der, <uri xlink:href="https://orcid.org//0000-0002-7847-1393">https://orcid.org//0000-0002-7847-1393</uri>; Reinhold Rauh, <uri xlink:href="https://orcid.org/0000-0003-3053-1163">https://orcid.org/0000-0003-3053-1163</uri>; Andreas Riedel, <uri xlink:href="https://orcid.org/0009-0001-4977-2752">https://orcid.org/0009-0001-4977-2752</uri>; Peter Birkholz, <uri xlink:href="https://orcid.org/0000-0003-0167-8123">https://orcid.org/0000-0003-0167-8123</uri>; Ludger Tebart van Elst, <uri xlink:href="https://orcid.org/0000-0002-2274-5447">https://orcid.org/0000-0002-2274-5447</uri>; Thomas Fangmeier, <uri xlink:href="https://orcid.org/0009-0007-7966-9215">https://orcid.org/0009-0007-7966-9215</uri>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>10</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1347913</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>12</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>09</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Bellinghausen, Schr&#xf6;der, Rauh, Riedel, Dahmen, Birkholz, Tebartz van Elst and Fangmeier</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Bellinghausen, Schr&#xf6;der, Rauh, Riedel, Dahmen, Birkholz, Tebartz van Elst and Fangmeier</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>We investigated the prosodic perception of uncertainty cues in adults with Autism Spectrum Disorder (ASD) compared to neurotypical adults (NTC).</p>
</sec>
<sec>
<title>Method</title>
<p>We used articulatory synthetic speech to express uncertainty in a human-machine scenario by varying the three acoustic cues <italic>pause</italic>, <italic>intonation</italic>, and <italic>hesitation</italic>. Twenty-eight adults with ASD and 28 NTC adults rated each answer for uncertainty, naturalness, and comprehensibility.</p>
</sec>
<sec>
<title>Results</title>
<p>Both groups reliably perceived different levels of uncertainty. Stimuli were rated as less uncertain by the ASD group, but not significantly. Only when we pooled the recipients&#x2019; ratings for all three cues, did we find a significant group difference. In terms of reaction time, we observed longer reaction times in the ASD group compared to the neurotypical comparison group for the uncertainty level hesitation &amp; strong intonation, but the differences were not significant after Bonferroni correction. Furthermore, our results showed a significant group difference between the correlation of uncertainty and naturalness, i.e. the correlation in the ASD group is significantly lower than in the NTC group. Obtained effect size estimates can inform sample size calculations in future studies for the reliable identification of group differences.</p>
</sec>
<sec>
<title>Discussion</title>
<p>In future work, we would like to further investigate the interaction of all three cues and uncertainty perception. It would be interesting to further vary the duration of the pause and also to use different types of fillers. From a developmental perspective, uncertainty perception should also be investigated in children and adolescents with ASD.</p>
</sec>
</abstract>
<kwd-group>
<kwd>speech perception</kwd>
<kwd>autism spectrum disorder</kwd>
<kwd>prosody</kwd>
<kwd>uncertainty</kwd>
<kwd>emotion perception</kwd>
<kwd>theory of mind</kwd>
</kwd-group>
<counts>
<fig-count count="6"/>
<table-count count="7"/>
<equation-count count="0"/>
<ref-count count="118"/>
<page-count count="21"/>
<word-count count="14533"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Autism</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>We present an empirical study investigating the perception of uncertainty cues in adults with ASD compared to the NTC group. To generate our material, we used articulatory speech synthesis with varying prosodic uncertainty features. The utterances were presented to the participants and they were asked to rate them. We consider the ascription of (un)certainty as a part of affective ToM and assume that (u)certainty can be expressed prosodically without interaction with syntactic or semantic features of an utterance. Thus, its effect can be studied in isolation. In the following introductory section, we provide the theoretical background of our research goal and outline the state of research on the role of prosody perception in ASD. This includes studies of emotion perception, speech synthesis perception, and uncertainty perception in both human-human and human-machine interaction.</p>
<p>According to DSM-5 (<xref ref-type="bibr" rid="B1">1</xref>) and ICD-11 (<xref ref-type="bibr" rid="B2">2</xref>) Autism Spectrum Disorder (ASD) is classified as a neurodevelopmental disorder with severe impairments in the domains of social communication and restrictive repetitive behaviors/interests. The prevalence is approximately 1% (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>). The male-female ratio in well-ascertained epidemiological samples is about 3:1. However, there are concerns about under-reporting in girls and women [cf. (<xref ref-type="bibr" rid="B1">1</xref>): 64]. The etiology of ASD shows a strong genetic component as well as other causes (<xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>In this article we will focus on Autism Spectrum Disorder without accompanying intellectual impairment (ASD without II). A description of ASD without II can be found, for example, in Riedel (<xref ref-type="bibr" rid="B5">5</xref>) and in Vogeley (<xref ref-type="bibr" rid="B6">6</xref>). In the area of language processing, syntactic and semantic processing are barely affected in ASD without intellectual impairment taking into account the semiotic dimensions according to Morris (<xref ref-type="bibr" rid="B7">7</xref>), but problems in pragmatic interpretation are often found [ (<xref ref-type="bibr" rid="B8">8</xref>): 4f.]. For example, adults with ASD without II often have difficulty understanding non-lexicalized metaphors as assessed by the Freiburg Questionnaire of Linguistic Pragmatics (FQLP) (<xref ref-type="bibr" rid="B9">9</xref>). Although in the literature, often problems in general pragmatic processing in ASD without intellectual impairment are described, it has been shown that the pragmatic abilities for hearers with ASD without II differ between pragmatic domains [cf. (<xref ref-type="bibr" rid="B10">10</xref>): 114, see also (<xref ref-type="bibr" rid="B11">11</xref>)]. In terms of syntactic processing, Durrleman et&#xa0;al. (<xref ref-type="bibr" rid="B12">12</xref>) tested relative clause comprehension in autistic participants with and without reported language delay. They found that the participants with reported language delay had more difficulty with subject relatives than those without language delay. It should be noted here that we assume that syntax is more likely to be impaired in autistic individuals with delayed language development. However, in the case of autism without intellectual impairment pragmatics is the focus of our research interest.</p>
<p>Several empirical studies investigated the prosodic competence of participants with ASD without II. The term prosody is defined as &#x201c;[ &#x2026; ] a set of higher-level organizational structures that account for variations in pitch, loudness, duration, spectral tilt, segment reduction and their associated articulatory parameters&#x201d; [ (<xref ref-type="bibr" rid="B13">13</xref>): 327].</p>
<p>At the interface of syntax and pragmatics, the work of Martzoukou et&#xa0;al. (<xref ref-type="bibr" rid="B14">14</xref>, <xref ref-type="bibr" rid="B15">15</xref>) suggested evidence of problems with the use of prosody in syntactic processing. Similarly, Terzi et&#xa0;al. (<xref ref-type="bibr" rid="B16">16</xref>) reported difficulties at the interface of morpho-syntax with pragmatics and prosody in ASD without intellectual impairment.</p>
<p>However, several studies have focused on the perception and production of prosody and its signaling of pragmatic and emotional features of utterances. In the following, we present selected previous studies that have investigated the role of prosody in both speech production and speech perception in order to place our empirical study in a theoretical context. Since prosody has different linguistic and paralinguistic functions [cf. (<xref ref-type="bibr" rid="B17">17</xref>): 326], we refer first to linguistic functions such as the marking/perception of information status, i.e. structural prosodic functions [for an overview see (<xref ref-type="bibr" rid="B18">18</xref>)]. For example, prosody can be used to indicate the information status of a sentence (<xref ref-type="bibr" rid="B19">19</xref>). Afterwards we will discuss paralinguistic functions of prosody such as emotion expression/perception, i.e. affective prosodic functions [for an overview see (<xref ref-type="bibr" rid="B20">20</xref>)]. An overview of linguistic prosody in ASD is given in Grice et&#xa0;al. (<xref ref-type="bibr" rid="B21">21</xref>). In this work, the various functions of prosody are described in more detail. Depending on the prosodic function, there are differences between the ASD and NTC (neurotypical control) groups.<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref>
</p>
<p>In terms of structural prosody skills in ASD, Shriberg et&#xa0;al. (<xref ref-type="bibr" rid="B22">22</xref>) reported for accentuation that speakers with ASD without II aged 10-50 years were less likely to use stress and phrasing appropriately compared to NTC. Similarly, Paul et&#xa0;al. (<xref ref-type="bibr" rid="B23">23</xref>) reported difficulties in stress production and also in speech perception more often in the ASD group without II compared to the NTC in speakers aged 14-21 years. In addition, Kiss et&#xa0;al. (<xref ref-type="bibr" rid="B24">24</xref>) found significant differences in global pitch distribution comparing children aged 4 to 9 years and a NTC group using the CSLU Autism Speech Corpus. Nadig and Shaw (<xref ref-type="bibr" rid="B25">25</xref>) observed a higher pitch range in speakers aged 8-14 years old with ASD without II in contrast to the NTC, but neurotypical students did not rate this as increased pitch variation. Wehrle et&#xa0;al. (<xref ref-type="bibr" rid="B26">26</xref>) also found a tendency for adults with ASD to have a higher pitch range compared to the NTC. For both prosody perception and production, Diehl and Paul (<xref ref-type="bibr" rid="B27">27</xref>) showed that children and adolescents aged 8-16 years with ASD without II required more time to imitate intonation patterns than the NTC.</p>
<p>For adults with ASD without II, the perception study by Grice et&#xa0;al. (<xref ref-type="bibr" rid="B28">28</xref>) suggested evidence that adults with ASD showed a reduced sensitivity to intonation and consequently based their judgments less on the word pronunciation in comparison to neurotypical adult hearers. Instead, word frequency was more important than intonation for decoding of information structure (i.e. the division of sentences into new and known information) in autistic hearers. In contrast, Globerson et&#xa0;al. (<xref ref-type="bibr" rid="B29">29</xref>) found no differences between adult hearers with and without ASD using prosody for pragmatic focus interpretation (i.e. the detection of new information in a sentence)<xref ref-type="fn" rid="fn2">
<sup>2</sup>
</xref>. The groups also did not differ in psychoacoustic tests. In contrast to that, the group with ASD performed less accurately on both the acoustic prosody recognition task and the facial emotion recognition task.</p>
<p>In their systematic review of linguistic prosody in ASD, Grice et&#xa0;al. (<xref ref-type="bibr" rid="B21">21</xref>) examined both production and perception of prosodic functions in grammar and pragmatics, as well as emotion. They categorize prosodic functions on a scale of more &#x201c;formal&#x201d; (rule-based) functions and more &#x201c;intuitive&#x201d; (highly context-dependent functions). Lexical stress, lexical tones and grammatical functions of prosody belong to the most formal functions, marking of intentions and emotions to the most intuitive [cf. (<xref ref-type="bibr" rid="B21">21</xref>): 2-5]. The results for perception suggested that the more intuitive aspects of prosody are more difficult in ASD, i.e. perceiving information status, intention, and emotional state. In contrast, the more formal aspects of prosody such as lexical and syntactic functions appear to be relatively unaffected [cf. (<xref ref-type="bibr" rid="B21">21</xref>): 6-8]. No clear overarching pattern was found for prosody production [cf. (<xref ref-type="bibr" rid="B21">21</xref>): 12]. However, there was a tendency for differences in general prosodic characteristics in speech production [see (<xref ref-type="bibr" rid="B21">21</xref>):13].</p>
<p>In conclusion, the results of the presented studies on structural prosody in ASD are not clear with respect to group differences. This can be explained by the different functions of prosody. Prosodic uncertainty marking is not one of the &#x2018;formal aspects&#x2019; of prosody and we would therefore expect to see stronger differences between groups.</p>
<p>After reviewing previous studies of prosody production and perception in hearers with ASD without II, we turn to affective prosody skills in ASD and refer to previous work on the expression and recognition of emotion in ASD. The reason for this is that emotions and epistemic states can also be expressed through prosody [cf. (<xref ref-type="bibr" rid="B32">32</xref>): 48]. This is relevant to our current experimental study in which we express different degrees of intended uncertainty by means of prosody using articulatory speech synthesis (<xref ref-type="bibr" rid="B33">33</xref>)<xref ref-type="fn" rid="fn3">
<sup>3</sup>
</xref>.</p>
<p>In their Facial Recognition Task, Doi et&#xa0;al. (<xref ref-type="bibr" rid="B34">34</xref>) generated varying degrees of anger, happiness, and sadness, as well as a neutral face. In the Emotional Prosody Recognition Task, a naturally spoken Japanese utterance was presented in an angry, happy, and sad way of speaking at different intensities Also a neutral acoustic stimulus was used [cf. (<xref ref-type="bibr" rid="B34">34</xref>): 2102 ff.]. The adults in the ASD group performed worse at recognizing angry and sad faces and voices. There was an effect of emotional intensity on emotion recognition. For facial expression recognition, there was a lower recognition in the ASD group compared to the NTC group for the stimuli of intermediate emotional intensities [cf. (<xref ref-type="bibr" rid="B34">34</xref>): 2109].</p>
<p>Hsu and Xu (<xref ref-type="bibr" rid="B35">35</xref>) used the articulatory speech synthesizer VocalTractLab (<xref ref-type="bibr" rid="B36">36</xref>) to produce modal, breathy, and pressed voices in Mandarin. Hearers with ASD without II and a NTC were asked to judge body size, emotion (happiness, anger, and neutral emotion) and attitude [cf. (<xref ref-type="bibr" rid="B35">35</xref>): 1925]. The results showed that the adolescents with ASD were less sensitive to auditory manipulation than their neurotypical peers [cf. (<xref ref-type="bibr" rid="B35">35</xref>): 1927]. However, to our knowledge, uncertainty perception has not been investigated using articulatory speech synthesis. We will use this type of speech synthesis to model uncertainty and test its influence on uncertainty perception in our empirical study.</p>
<p>In two meta-analyses of facial emotion recognition (<xref ref-type="bibr" rid="B37">37</xref>, <xref ref-type="bibr" rid="B38">38</xref>), participants with ASD showed significantly poorer performance in recognizing basic emotions compared to the NTC group for a subset of basic emotions. However, Scheerer et&#xa0;al. (<xref ref-type="bibr" rid="B39">39</xref>) found that autistic and typically developing children were accurate in matching emotional voice clips to emotion words, but autistic children had difficulty in matching emotional voice clips to emotional faces. Lartseva et&#xa0;al. (<xref ref-type="bibr" rid="B40">40</xref>) likewise document the presence of impairments in emotional language processing in individuals with ASD. These appear to be fairly independent of stimulus complexity, task complexity, and sensory modality as well as the level of language development. Lui et&#xa0;al. (<xref ref-type="bibr" rid="B41">41</xref>) investigated the role of psychoacoustic abilities in affective prosody recognition in autistic adults. Their results indicated that psychoacoustic abilities were used as a compensatory mechanism for deficits in higher-order processing of emotional signals in social interactions.</p>
<p>In our recent study (<xref ref-type="bibr" rid="B42">42</xref>) we presented a systematic analysis of 12 selected studies on emotion perception for the auditory and/or visual modality. The analysis revealed that in most cases basic emotions according to Ekman (<xref ref-type="bibr" rid="B43">43</xref>) were tested exclusively or in combination with complex emotions. The results generally showed a difference in perception between the ASD and NTC groups for the different modalities with only two studies showing no difference in visual emotion perception.</p>
<p>In their systematic review of affective prosody recognition in ASD concerning basic emotions according to Ekman (<xref ref-type="bibr" rid="B43">43</xref>), Zhang et&#xa0;al. (<xref ref-type="bibr" rid="B44">44</xref>) investigated potential factors for differences in study results comparing ASD and NTC groups. Their results showed that the level of difficulty in affective prosody recognition experienced by hearers with ASD varied across basic emotions.</p>
<p>As the aforementioned studies on emotion perception in ASD have shown divergent results regarding differences in emotion processing between autistic and non-autistic hearers, we believe that further research is needed in this area. The studies mentioned above have in common that mainly basic emotions according to Ekman (<xref ref-type="bibr" rid="B43">43</xref>) were investigated. In our work, we focus on uncertainty as a non-prototypical emotion. To our knowledge, there is a research gap regarding the perception of uncertainty in ASD. With our study, we hope to contribute to the understanding of how uncertainty is processed as a non-prototypical emotion by hearers with ASD and thus fill this research gap.</p>
<p>Next, we will further motivate why we consider the perception of uncertainty conveyed by prosodic cues in ASD to be a particular interest. We assume that uncertainty refers to the statement in the utterance of the prosodic information. The speaker&#x2019;s belief state, including the perceived uncertainty, is part of the hearer&#x2019;s ToM [see Theory of Mind; (<xref ref-type="bibr" rid="B45">45</xref>)]. We regard the attribution of uncertainty to another person, i.e. the speaker, as a case of <italic>affective</italic> ToM, but with reference to a <italic>proposition</italic> (a statement or a fact about the speaker is uncertain), i.e. to a conceptual content.</p>
<p>Uncertainty could therefore be understood as an affective propositional attitude. In philosophy, psychology, linguistics, and cognitive science, propositional attitudes are understood as the mental phenomena expressed by sentences such as <italic>Galileo believes that the earth moves</italic> and <italic>Pia hopes that it will rain</italic> (i.e. the belief about the movement of earth and the hope of rain). Even if propositional attitudes are discussed critically, it is agreed that they are mental phenomena and play a central role in our everyday practice of describing, explaining, and predicting others and ourselves [cf. (<xref ref-type="bibr" rid="B46">46</xref>)]. Even basic emotions according to Ekman (<xref ref-type="bibr" rid="B43">43</xref>) such as fear or surprise can be attitudes towards propositions, e.g. <italic>fearing that one shall be killed in an avalanche</italic> or <italic>being surprised that New York is further south than Rome.</italic> A discussion of propositional attitude approach to emotions is given in Cudney (<xref ref-type="bibr" rid="B47">47</xref>). According to Giannakidou and Mari (<xref ref-type="bibr" rid="B48">48</xref>), emotion attitudes appear as gradable psychological attitudes, i.e. <italic>be happy</italic>, <italic>be surprised</italic>, <italic>be angry</italic>, and are assumed to be factive.</p>
<p>In the field of prosody research, Hirschberg [ (<xref ref-type="bibr" rid="B49">49</xref>): 532] notes that the variation in prosody influences the interpretation of linguistic phenomena in many languages. Speakers can also use prosody to indicate the propositional attitude they have towards a certain proposition when uttering a sentence expressing that proposition [see also (<xref ref-type="bibr" rid="B49">49</xref>): 532].</p>
<p>As already noted above, uncertainty is a complex phenomenon. When we refer to uncertainty we mean uncertainty in answers in question-answer situations as will be explained below. Thus, the aim of our study is to empirically investigate the perception of uncertainty in autistic hearers in order to get a broader picture of emotion processing in ASD.</p>
<p>Next, we will explain the theoretical background of the communication of uncertainty in face-to-face communication in neurotypical hearers. Then we will further explain the motivation for our empirical investigation in hearers with ASD.</p>
<sec id="s1_1">
<label>1.1</label>
<title>Communication of uncertainty</title>
<p>The expression and perception of uncertainty is essential in communication [cf. (<xref ref-type="bibr" rid="B50">50</xref>): 8]. As remarked in Wollermann [ (<xref ref-type="bibr" rid="B51">51</xref>): 80f.], uncertainty can generally be regarded as a non-prototypical emotion [see also (<xref ref-type="bibr" rid="B52">52</xref>)]. Kuhltau (<xref ref-type="bibr" rid="B53">53</xref>) categorizes uncertainty in cognitive terms. Furthermore, uncertainty can be considered from an epistemic point of view in communication (<xref ref-type="bibr" rid="B54">54</xref>). A discussion of whether epistemic emotions are metacognitive can be found in Carruthers (<xref ref-type="bibr" rid="B55">55</xref>).</p>
<p>Following Wollermann [ (<xref ref-type="bibr" rid="B51">51</xref>): 80], we assume that speakers and hearers communicate uncertainty in question-answer situations: communication partner A asks communication partner B a question. B is uncertain about the answer and expresses this uncertainty. A uses these uncertainty cues to decode B&#x2019;s utterance and concludes that B is uncertain [cf. (<xref ref-type="bibr" rid="B51">51</xref>): 80]. It should be noted explicitly here that uncertainty is a complex phenomenon that encompasses different dimensions and definitions [see also (<xref ref-type="bibr" rid="B56">56</xref>): 138]. However, as noted above, we focus on uncertainty in responses to questions in communicative situations. We begin by referring to previous studies that have investigated the production and perception of uncertainty. We then discuss ideas of ToM and relate them to ASD in order to provide the theoretical background for our empirical study of uncertainty perception in ASD.</p>
<p>Smith and Clark (<xref ref-type="bibr" rid="B57">57</xref>) used the <italic>Feeling of Knowing</italic> paradigm following Hart (<xref ref-type="bibr" rid="B58">58</xref>) in order to test memory processes in adults in question-answering situations. Empirical results showed that uncertainty was marked, among other cues, lexically by the use of phrases such as &#x201c;I guess&#x201d; and by fillers such as &#x201c;uh&#x201d; and &#x201c;um&#x201d;. On the prosodic level pauses and rising intonation were observed as prosodic indicators of uncertainty [cf. (<xref ref-type="bibr" rid="B57">57</xref>): 32ff., see also (<xref ref-type="bibr" rid="B51">51</xref>): 82f.]. In order to test the perception of another speaker, Brennan and Williams (<xref ref-type="bibr" rid="B59">59</xref>) defined the <italic>Feeling of Another&#x2019;s Knowing</italic> paradigm. They reproduced the study of Smith and Clark (<xref ref-type="bibr" rid="B57">57</xref>). In a further step, they used the audio material for listening evaluation. It was found that lexical hedges, rising intonation and delay contributed to the perception of uncertainty [cf. (<xref ref-type="bibr" rid="B59">59</xref>): 383; see also (<xref ref-type="bibr" rid="B51">51</xref>): 83]. Swerts and Krahmer (<xref ref-type="bibr" rid="B60">60</xref>) investigated the production and perception of uncertainty in the audio, visual, and audiovisual conditions. Uncertainty in answers was recognized in all three conditions, but recognition was easier in the audiovisual condition than in the unimodal conditions.</p>
<p>From a developmental perspective, Krahmer and Swerts (<xref ref-type="bibr" rid="B61">61</xref>) tested 7-8 year old neurotypical children and adults for the perception and production of uncertainty in question-answer situations in audiovisual speech. Uncertain utterances produced by adult speakers were recognized more accurately than children&#x2019;s uncertain utterances by both children and adults as hearers. In addition, adults performed better than children in the recognition of uncertainty.</p>
<p>After referring to studies on uncertainty perception and production, we now provide the relevant background on ToM for our empirical study. Premack and Woodruff [ (<xref ref-type="bibr" rid="B45">45</xref>): 515] define ToM as follows: &#x201c;An individual has a theory of mind if he imputes mental states to himself and others&#x201d;. The concept of ToM also known as &#x201c;mind reading&#x201d; refers to the understanding of one&#x2019;s own thoughts and feelings and those of others, and is central for human social interaction and communication. There is empirical evidence that it develops very early in human ontogeny [cf. (<xref ref-type="bibr" rid="B62">62</xref>): 1357]. An overview of ToM can be found, for example, in Astington and Dack (<xref ref-type="bibr" rid="B63">63</xref>) and in Leslie (<xref ref-type="bibr" rid="B64">64</xref>).</p>
<p>According to Kamp-Becker and B&#xf6;lte [ (<xref ref-type="bibr" rid="B65">65</xref>): 40], children with ASD often have serious problems executing theory of mind tasks. In their seminal work, Baron-Cohen and et&#xa0;al. (<xref ref-type="bibr" rid="B66">66</xref>) discussed whether the autistic child has a ToM. Their study and that of Happ&#xe9; (<xref ref-type="bibr" rid="B67">67</xref>) suggested that children with ASD had problems in passing false-belief-tasks.<xref ref-type="fn" rid="fn4">
<sup>4</sup>
</xref> However, it has to be discussed critically if a general ToM deficit occurs in individuals with ASD. As Chevallier [ (<xref ref-type="bibr" rid="B70">70</xref>): 4825] remarks there is evidence that there are problems related to ToM in ASD on the basis of standard false belief tasks or other more fine-grained tests. However, the characteristics of these impairments are still debated, i.e. if it is a primary or simply consecutive to more basic deficits [cf. (<xref ref-type="bibr" rid="B70">70</xref>): 4825]. Furthermore, the study by Tager-Flusberg (<xref ref-type="bibr" rid="B71">71</xref>) suggested that autistic participants who had passed a standard test with first-order false belief tasks, were even able to solve more complex second-order belief tasks when processing demands were reduced. In addition, the work of Iao and Leekam (<xref ref-type="bibr" rid="B72">72</xref>) showed that difficulties with the false representation tasks in children with ASD could not be explained by executive functions or language impairments. This may provide evidence to support the position that children with ASD may not have a specific theory of mind deficit.</p>
<p>As Gabriel et&#xa0;al. [ (<xref ref-type="bibr" rid="B69">69</xref>): 534] pointed out, ToM is a complex phenomenon that can be divided into cognitive and affective ToM [e.g., (<xref ref-type="bibr" rid="B73">73</xref>)]. On the one hand affective ToM refers to the representation of implications about emotions. On the other hand cognitive ToM is a term that describes implications about knowledge, intentions, and beliefs [cf. (<xref ref-type="bibr" rid="B69">69</xref>): 534]. For early adolescence, there was a correlation between both types of ToM and attention. There was also a correlation between cognitive ToM and language comprehension on the one hand, and a correlation between affective ToM and verbal intelligence, verbal fluency, and verbal flexibility. In middle and late adolescence, both types of ToM were correlated with affective intelligence. On the other hand, there was a correlation between cognitive ToM and working memory, figural intelligence, and language comprehension. Thus, the results for cognitive and affective ToM showed a developmental step in middle adolescence. There were also gender differences in cognitive ToM [cf. (<xref ref-type="bibr" rid="B69">69</xref>): 533].</p>
<p>Raimo et&#xa0;al. (<xref ref-type="bibr" rid="B74">74</xref>) investigated both types of ToM in neurotypical individuals during adulthood. According to Raimo et&#xa0;al. [ (<xref ref-type="bibr" rid="B74">74</xref>): 10], the decline of the affective component of ToM occurs earlier in adulthood (from the age of 60) than the cognitive component (from the age of 70). This decline in the first age group is related to the ability to infer others&#x2019; emotions and to decode emotional expressions in the nonverbal modality, rather than to the ability to infer emotional mental states from social stories in the verbal modality. In the older group, the decline is independent of the verbal or nonverbal modality of the task used [cf. (<xref ref-type="bibr" rid="B74">74</xref>): 10].</p>
<p>It should be noted that these two subtypes of ToM, i.e. affective vs. cognitive ToM, are not always clearly distinguished. The demarcation is not always consistent and is not always sharp. We talk about needs which have rather an emotional component, e.g. when there is a need for getting comfort, or a cognitive character, e.g. when we are curious about something.</p>
<p>We now turn to previous studies of affective and cognitive ToM in ASD. Begeer et&#xa0;al. (<xref ref-type="bibr" rid="B75">75</xref>) investigated affective ToM and tested children&#x2019;s understanding of emotions based on counterfactual reasoning.<xref ref-type="fn" rid="fn5">
<sup>5</sup>
</xref> The autistic children had problems in explaining emotions based on downward counterfactual reasoning (i.e. contentment and relief) compared to the neurotypical children. In contrast, there were no group differences in emotions based on upward counterfactual reasoning (i.e. disappointment and regret). The results also showed a relationship between second-order false-belief reasoning and children&#x2019;s understanding of second-order counterfactual emotions for the neurotypical comparison group. However, children with ASD were more likely to rely on their general intellectual abilities [cf. (<xref ref-type="bibr" rid="B75">75</xref>): 301].</p>
<p>Scheeren et&#xa0;al. (<xref ref-type="bibr" rid="B77">77</xref>) tested comprehension of social stories containing second-order false belief display rules, double bluff, faux pas, and sarcasm. They found that children and adolescents with ASD performed as well as the NTC group. The age effect was consistent with adolescents performing better than children. Success on advanced ToM tasks was also determined by age, verbal abilities, and general reasoning abilities.</p>
<p>Similarly, Kimhi&#x2019;s (<xref ref-type="bibr" rid="B78">78</xref>) review showed that language and verbal abilities, as well as general reasoning, facilitated better ToM comprehension in ASD [cf. <xref ref-type="bibr" rid="B78">78</xref>: 340]. They also noted that ToM is a critical factor in children&#x2019;s socio-cognitive development (cf. (<xref ref-type="bibr" rid="B78">78</xref>): 339).</p>
<p>There is currently some debate as to whether or not the feeling of uncertainty (and its supposed opposite, the feeling of certainty) belongs specifically to the category of so-called &#x201c;epistemic emotions&#x201d; in particular or can be considered as an emotion at all [see Meylan (<xref ref-type="bibr" rid="B79">79</xref>) for a con position, and Silva (<xref ref-type="bibr" rid="B80">80</xref>) for a pro position]. Whatever its exact nature, there is broad agreement that the feeling of uncertainty is an affective mental state. For example, Morriss et&#xa0;al. [ (<xref ref-type="bibr" rid="B81">81</xref>): 2] emphasize that &#x201c;current theoretical models posit that uncertainty is aversive in and of itself and is consequently more likely to engage the behavioral inhibition system responsible for stress and associated negative emotional states, particularly anxiety and fear&#x201d; [for a more detailed discussion see Morriss et&#xa0;al., (<xref ref-type="bibr" rid="B81">81</xref>)]. Consistent with this, the glossary of mental state terms in the well-known Reading-the-Mind-in-the-Eyes test (<xref ref-type="bibr" rid="B82">82</xref>, <xref ref-type="bibr" rid="B83">83</xref>), which participants are asked to consult when they are unsure of the meaning of a response option, recurs on the concepts of feelings of certainty and uncertainty.</p>
<p>Andres-Roqueta and Katsos (<xref ref-type="bibr" rid="B11">11</xref>) investigated pragmatic skills in children with and without ASD. The tasks consisted of a linguistic-pragmatics task requiring competence with structural language and a social-pragmatics task requiring competence with ToM. They reported similar performance on structural pragmatics between the group with ASD and the NTC, but a lower performance on social pragmatics, which the authors explain with difficulties in ToM [cf. (<xref ref-type="bibr" rid="B11">11</xref>): 1494].</p>
<p>At this point, we would also like to address the link between ToM and compensation strategies [e.g. (<xref ref-type="bibr" rid="B84">84</xref>, <xref ref-type="bibr" rid="B85">85</xref>)]. Livingston et&#xa0;al. [(<xref ref-type="bibr" rid="B84">84</xref>): 102] give the following example for compensation strategies: If a difficulty in distinguishing lies from jokes is masked by copying the behavior of others (e.g. laughing), compensation would mean that a conscious rule is developed: if someone makes a nonliteral statement and laughs, it is probably a joke. Otherwise it is probably a lie.</p>
<p>The following observations, which we describe in the next three sections, come from our clinical practice: Socio-cognitive tasks can be solved either intuitively-automatically or cognitively-deliberatively. The following example illustrates this: When a happy face is perceived, the intuitive automatic solution would be &#x201c;the face shows happiness&#x201d;. In the case of the cognitively-deliberative solution, different features are combined for interpretation, such as the cheek-raiser and the lip corner puller. This corresponds to the compensatory strategy used by autistic people which can be used to circumvent problems in the socio-cognitive area. However, it requires a great deal of effort on the part of the autistic person. The disadvantage of most of the experiments is that one can concentrate on the tasks and solve them in a cognitive-deliberative way.</p>
<p>Adults with ASD often learn to read the mental states of their fellow human beings via cognitive compensation when they are consciously thinking about them. Most experimental designs can be solved in this way. This could explain the results showing no significant difference in speech interpretation between the ASD and NTC groups.</p>
<p>For neurotypical people, the construction of a ToM often occurs unconsciously, i.e. when they are not thinking about it. An example would be the perception of mental states of hearers during a speaker&#x2019;s lecture. In our clinical experience, this is not the case for people with ASD, as their focus needs to shift to consciously inferring the mental states of others.</p>
<p>In the research on disfluencies in speech two types of pauses are often discussed: silent pauses and filled pauses [cf. (<xref ref-type="bibr" rid="B86">86</xref>): 49; see also (<xref ref-type="bibr" rid="B87">87</xref>)]. As Rose [ (<xref ref-type="bibr" rid="B86">86</xref>): 49] points out, silent pauses are periods of non-articulation by the speaker, whereas filled pauses are periods of articulation of non-propositional content and also conform to language-specific conventions. Filled pauses are also often referred to as hesitations [for a discussion of the variation in terminology of filled pauses see Belz, (<xref ref-type="bibr" rid="B88">88</xref>): 1].</p>
<p>Silent and filled pauses have in common that they are used for speech planning and self-repair [cf. Rose, (<xref ref-type="bibr" rid="B86">86</xref>): 49]. Silent pauses are used for breathing and for marking syntactic structures, whereas filled pauses are periods of articulation of non-propositional content [cf. (<xref ref-type="bibr" rid="B86">86</xref>): 49] and are relevant for turn holding [see (<xref ref-type="bibr" rid="B89">89</xref>)].<sup>
<xref ref-type="fn" rid="fn6">
<sup>6</sup>
</xref>
</sup>
</p>
<p>According to Belz [ (<xref ref-type="bibr" rid="B91">91</xref>): 41], filled pauses may serve as hesitation markers, repair markers, turn holding markers and others. The work of Wehrle et&#xa0;al. (<xref ref-type="bibr" rid="B92">92</xref>) with adults with ASD without intellectual impairment showed that a higher proportion of filled pause tokens were produced with the canonical level pitch contour by the NTC group compared to the autistic speakers.</p>
<p>The pragmatic difference between silent and filled pauses is less relevant for us because the right to speak does not play a role in our scenario. Nevertheless, we test whether filled and silent pauses differ in terms of the attribution of uncertainty. We use a combination of silent and filled pauses to realize particularly long and conspicuous hesitations.</p>
<p>At the phonetic level, the study by Betz et&#xa0;al. (<xref ref-type="bibr" rid="B93">93</xref>) suggested that the position of the extension in noun phrases such as &#x2018;the green tree&#x2019; influences uncertainty perception. The results showed the following: Firstly, hearers interpreted lengthening in the initial position of a word as uncertainty about the semantic domain represented by the word itself. Secondly, hearers interpreted lengthening in the final position within the word as uncertainty about the semantic domain represented by the following content word [cf. (<xref ref-type="bibr" rid="B93">93</xref>): 3993]. As we used only one-word utterances in our study (un)certainty must be ascribed by the hearer to the information conveyed by this word.</p>
<p>Termis like hesitation and (dis)fluency are used differently in the literature [see (<xref ref-type="bibr" rid="B94">94</xref>)]. In our study, we use the term <italic>hesitation</italic> to refer to particles like &#x201c;uh&#x201d; which we also refer to as <italic>fillers</italic>. <italic>Hesitation</italic> and <italic>pause</italic> are each defined as independent variables for optimal manipulation of the synthetic signal [see (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>)]. However, we are aware that the hesitation particle and pause often form a unit in spoken utterances.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Nine different combinations of the three cues pause, hesitation and intonation.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Pause</th>
<th valign="middle" align="center">Hesitation</th>
<th valign="middle" align="center">Intonation</th>
<th valign="middle" align="center">Level</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">Certainty (Cer)</td>
</tr>
<tr>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">Hesitation (Hes)</td>
</tr>
<tr>
<td valign="top" align="center">+</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">Pause (Pau)</td>
</tr>
<tr>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">Intonation 1 (Into1)</td>
</tr>
<tr>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">Intonation 2 (Into2)</td>
</tr>
<tr>
<td valign="top" align="center">+</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">HesPau</td>
</tr>
<tr>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">HesInto2</td>
</tr>
<tr>
<td valign="top" align="center">+</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">PauInto2</td>
</tr>
<tr>
<td valign="top" align="center">+</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">+</td>
<td valign="top" align="center">PauInto2Hes</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In our study the aim was to investigate whether the hearer attributes uncertainty to the speaker solely on the basis of prosodic information. As already mentioned, we regard the attribution of uncertainty to another person, i.e. the speaker, as a case of affective ToM, but with reference to conceptual content (a statement or a fact which the speaker is uncertain about). It is important to note that in our scenario the speech signal is synthetic, as we expressed different degrees of intended uncertainty through prosody using articulatory speech synthesis (<xref ref-type="bibr" rid="B33">33</xref>). The uncertain synthetic utterance served as an answer in the form of a statement to a question in a brief human-machine scenario. We will refer to previous studies in which uncertainty was modeled using a speech synthesizer.</p>
</sec>
<sec id="s1_2">
<label>1.2</label>
<title>Modelling and perception of uncertainty in human-machine-communication</title>
<p>In the context of human-machine interaction, the question arises as to whether speech synthesis should be enriched with emotional expressions [for a recent discussion of the role of emotions in synthetic speech see (<xref ref-type="bibr" rid="B95">95</xref>)]. According to Murray and Arnott (<xref ref-type="bibr" rid="B96">96</xref>), one aspect of the naturalness of the synthetic utterance is that the emotional state of the speaker contributes to the variability of synthetic speech; emotional expressions are regarded as pragmatic variations in speech. Artificial question-answering systems may follow in order to maintain user trust by expressing the degree of uncertainty attached to the provided answers (<xref ref-type="bibr" rid="B97">97</xref>). According to Sz&#xe9;kely et&#xa0;al. [ (<xref ref-type="bibr" rid="B98">98</xref>): 804], the expression and communication of a system&#x2019;s internal uncertainty is a key to successful human-robot interaction.<sup>
<xref ref-type="fn" rid="fn7">
<sup>7</sup>
</xref>
</sup>
</p>
<p>In previous studies, disfluent speech for acoustic speech synthesis has been modeled using filled pauses (<xref ref-type="bibr" rid="B99">99</xref>) and also of filled pauses and lexical fillers (<xref ref-type="bibr" rid="B100">100</xref>) in unit selection speech synthesis.<xref ref-type="fn" rid="fn8">
<sup>8</sup>
</xref> In both studies, the activation of hesitations was not perceived differently with respect to naturalness from deactivation. H&#xf6;nemann and Wagner (<xref ref-type="bibr" rid="B102">102</xref>) modeled uncertainty in speech synthesis as one of four emotional states by using features of prosody and voice quality. Furthermore, in the study of &#x15a;zekely et&#xa0;al. (<xref ref-type="bibr" rid="B98">98</xref>) the perception of uncertainty in synthetic speech was tested by using a synthesis method based on a DNN (deep neural network). Decreased vocal effort, filled pauses and prolongation of function words contributed to an increase in the degree of perceived uncertainty. For an overview of the role of hesitations in spoken dialogue systems, see Betz (<xref ref-type="bibr" rid="B103">103</xref>).</p>
<p>In traditional approaches for speech synthesis evaluation [e.g. (<xref ref-type="bibr" rid="B104">104</xref>)], the quality of synthetic speech was assessed, among other measures, by hearers&#x2019; judgments. Typically, hearers were asked to rate the naturalness and comprehensibility of the synthetic speech [cf. (<xref ref-type="bibr" rid="B104">104</xref>): 1012].</p>
<p>In our work, we used the concept of measuring naturalness and comprehensibility to evaluate the synthetic utterances. It should be noted that Wagner et&#xa0;al. (<xref ref-type="bibr" rid="B105">105</xref>) discussed the current state of the art in TTS evaluation and presented a new research program for speech synthesis evaluation in a paper published after we had collected the data for this study. The authors suggested that contextual appropriateness plays a crucial role in speech synthesis evaluation. They argued that the specific application and listening situation needs to be taken into account [cf. (<xref ref-type="bibr" rid="B105">105</xref>): 105].</p>
<p>For our research goal, however, we were interested in testing whether the articulatory synthetic utterances were perceived as natural. Our aim was not to evaluate the synthetic utterances, but to perceptually test whether the utterances were natural and understandable, in order to rule out that these dimensions function as confounding variables. Furthermore, the purpose of the fictive machine application in our experimental scenario remains too vague to assess contextual appropriateness.</p>
<p>In our previous work on uncertainty perception (<xref ref-type="bibr" rid="B106">106</xref>&#x2013;<xref ref-type="bibr" rid="B108">108</xref>) different degrees of intended uncertainty were modeled with articulatory speech synthesis (<xref ref-type="bibr" rid="B33">33</xref>) and tested whether neurotypical adult hearers were able to discriminate between the degrees of uncertainty. The synthetic answers were part of a human-machine scenario in which the question was spoken by a human and the answer was the synthetic utterance. The acoustic cues rising intonation, pause and hesitation particle (&#x201c;uh&#x201d;) were systematically varied in Lasarcyk et&#xa0;al. (<xref ref-type="bibr" rid="B106">106</xref>) and in Wollermann et&#xa0;al. (<xref ref-type="bibr" rid="B107">107</xref>). Students from the University of Duisburg-Essen, as neurotypical hearers, were asked to judge the synthetic answers in terms of uncertainty and naturalness.<xref ref-type="fn" rid="fn9">
<sup>9</sup>
</xref> In both works an additive principle of the uncertainty cues was described, i.e. the combination of two cues led to a higher level of perceived uncertainty than single cues. The study by Lasarcyk et&#xa0;al. (<xref ref-type="bibr" rid="B106">106</xref>) showed no significant difference between judgments when comparing the relative contribution of the single cues <italic>intonation</italic> vs. <italic>filler</italic>. Similarly, in Wollermann et&#xa0;al. (<xref ref-type="bibr" rid="B107">107</xref>), the single cues <italic>pause</italic> vs. <italic>filler</italic> were not rated significantly differently in terms of perceived uncertainty, but <italic>intonation</italic> was rated significantly more strongly regarding uncertainty than <italic>pause</italic>. Both Lasarcyk et&#xa0;al. (<xref ref-type="bibr" rid="B106">106</xref>) and Wollermann et&#xa0;al. (<xref ref-type="bibr" rid="B107">107</xref>) found no correlation between the ratings of uncertainty and the naturalness of the stimuli.</p>
<p>The material used in our pilot study (<xref ref-type="bibr" rid="B109">109</xref>) was based on the material of our previous studies (<xref ref-type="bibr" rid="B106">106</xref>, <xref ref-type="bibr" rid="B107">107</xref>). In the following, when we refer to our pilot study we mean the study described by Bellinghausen et&#xa0;al. (<xref ref-type="bibr" rid="B109">109</xref>). However, we created new articulatory speech utterances with the revised version of Vocal Tract Lab (<xref ref-type="bibr" rid="B33">33</xref>) conveying different degrees of uncertainty. The answer to each question was generated by varying <italic>pause</italic>, <italic>intonation</italic>, and <italic>hesitation</italic> as acoustic cues. In the perception task, 28 neurotypical student hearers rated each answer on a rating scale in terms of uncertainty, naturalness and comprehensibility. The results indicated different contributions of acoustic cues to uncertainty perception. The effect of <italic>intonation</italic> and <italic>hesitation</italic> was more evident than the effect of <italic>pause</italic>. We observed an additive principle of the three cues, i.e. the more cues of intended uncertainty were activated, the higher was the perceived degree of uncertainty. The implications can be summarized as follows: In our study, we were able to model different degrees of intended uncertainty using articulatory speech synthesis by different combinations of pause, hesitation and intonation. Neurotypical adult hearers, i.e. students from the University of Duisburg-Essen, were generally able to discriminate the different levels in perception, although the relative contribution of the acoustic cues varied.</p>
</sec>
</sec>
<sec id="s2">
<label>2</label>
<title>Method</title>
<p>In the current study, we aim to apply our experimental paradigm for measuring prosodic uncertainty in neurotypical hearers in our pilot study (<xref ref-type="bibr" rid="B109">109</xref>) to the investigation of prosody perception in autistic adult hearers. Thus, this study presents a feasibility study. We will present acoustic cues of uncertainty generated by articulatory speech synthesis to autistic adult listeners. To incorporate the developmental perspective, future work could modify the method to test autistic children and adolescents (see the Discussion).</p>
<sec id="s2_1">
<label>2.1</label>
<title>Goal and research question</title>
<p>Our central research question was the following: Is there a group difference in the perception of uncertainty between hearers without and with ASD? We assumed that the prosodic marking of uncertainty in the speech signal has an effect on the perception on the side of the hearer. As mentioned above, we consider the attribution of uncertainty as part of the affective ToM with respect to a propositional content, here the answer given in a short question-answer scenario. Furthermore, we hypothesized that the marking of uncertainty is less dependent on the structure and semantics of the utterance than other prosodic phenomena such as focus [for an empirical investigation of focus theories see (<xref ref-type="bibr" rid="B30">30</xref>); 51]. Therefore, there is less interaction with syntactic and semantic processing and the information conveyed by prosody can hardly be induced by other linguistic information.</p>
<p>With our study we hope to contribute to the understanding of prosodic processing in autistic adult hearers by focusing on uncertainty as an emotional expression.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Hypotheses</title>
<p>Our primary hypothesis was as follows: There are significant differences in the perception of uncertainty between the ASD group and the NTC group. A low level of expressed intended uncertainty would be perceived as less uncertain by the ASD group than by the NTC group.</p>
<p>The secondary hypothesis was based on the results of our previous studies (<xref ref-type="bibr" rid="B106">106</xref>, <xref ref-type="bibr" rid="B107">107</xref>) and was as follows: There would be a monotonic direct relationship between the number of prosodic uncertainty cues and participants&#x2019; ratings of uncertainty, regardless of group membership.</p>
<p>We used naturalness and intelligibility as quality measures for speech synthesis to see to what extent differences in naturalness (perception) can act as confounding variables. In our previous studies (<xref ref-type="bibr" rid="B106">106</xref>, <xref ref-type="bibr" rid="B107">107</xref>) we only measured naturalness as a standard method for evaluating uncertain synthetic speech. In the current work, we include both naturalness and intelligibility as possible confounding variables.</p>
<p>The quality of speech synthesis may vary under different conditions. We include these two factors in addition to uncertainty in the listeners&#x2019; evaluation.</p>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Material</title>
<p>We use the material that we have already tested in our pilot study (<xref ref-type="bibr" rid="B109">109</xref>). To express different intended levels of uncertainty, utterances generated by the articulatory speech synthesizer (<xref ref-type="bibr" rid="B33">33</xref>) were used. This allowed us to manipulate specific prosodic parameters while minimizing the influence of unintended variation compared to natural speech.<xref ref-type="fn" rid="fn10">
<sup>10</sup>
</xref>
</p>
<p>We chose the articulatory speech synthesizer VocalTractLab 2.2 by Birkholz (<xref ref-type="bibr" rid="B33">33</xref>) to generate high quality speech sounds while manipulating the parameters of the time-varying laryngeal and supra-laryngeal actions [cf. (<xref ref-type="bibr" rid="B109">109</xref>): 39]. The synthesizer has several components. To simulate the articulation process, 23 parameters control the geometric 3D model of a male vocal tract (<xref ref-type="bibr" rid="B110">110</xref>). A self-oscillating model of the vocal folds (<xref ref-type="bibr" rid="B36">36</xref>) is controlled by six parameters to specify the following features: subglottal pressure, fundamental frequency, and the rest shape of the glottis. The movements of the models of the vocal tract and the vocal folds are controlled by a gestural score. In this way, it is possible to manually adjust the movements for each word and to use different prosodic features for speech generation [cf. (<xref ref-type="bibr" rid="B109">109</xref>): 39].</p>
<p>In contrast to the articulatory speech synthesizer used here, state-of-the-art unit selection or neural synthesizers usually do not allow the individual manipulation of prosodic parameters such as f0 without causing involuntary changes in other prosodic parameters (e.g. voice quality) or articulation at the same time. This would make the specific assessment of the perceptual effect of individual prosodic parameters unreliable. Another way to manipulate prosodic parameters would have been to use a voice morphing method such as the change gender function in Praat (<xref ref-type="bibr" rid="B111">111</xref>), but this may introduce small acoustic artefacts in the manipulated signal, depending on the properties of the original signal (e.g. the irregularity of the voice).</p>
<p>The synthetic utterances were part of short question-answer pairs embedded in a human-machine interaction scenario designed to motivate the use of synthetic speech. The scenario was presented to the participants as follows: The question in German language was spoken by a natural voice (<italic>Was siehst Du?/What do you see</italic>)<italic>?</italic> and asked by a research assistant who showed pictures of fruit and vegetable objects to an image recognition robot. The synthetic answer, such as <italic>Bananen/Bananas</italic>, was given by the robot. The robot recognized the items with a certain level of confidence and was able to express uncertainty about the recognition in its answer. The critical stimuli were the following trisyllabic one word sentences in German: <italic>Bananen/bananas, Limetten/limes, Melonen/melons, Tomaten/tomatoes</italic> [cf. (<xref ref-type="bibr" rid="B109">109</xref>) 40]. We have opted for one-word sentences because they represent the smallest meaningful unit for an answer. In total, there were nine different levels of intended uncertainty, i.e. all possible combinations of the three cues <italic>pause</italic>, <italic>hesitation</italic> and <italic>intonation</italic> [see (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>)]. In addition, the following one-word phrases were used as distractors (without uncertainty cues) to the synthetic speech signal in order to minimize learning effects when the recipients judged the critical stimuli: <italic>Birnen/pears, Blaubeeren/blueberries, Bohnen/beans, Erdbeeren/strawberries, Gurken/cucumbers, Knoblauch/garlic, Mandarinen/mandarins, Orangen/oranges and Paprika/paprica</italic> [cf. (<xref ref-type="bibr" rid="B109">109</xref>) 40].</p>
<p>Following Bellinghausen et&#xa0;al. [ (<xref ref-type="bibr" rid="B109">109</xref>): 40], we describe below the three cues <italic>pause</italic>, <italic>hesitation</italic> and <italic>intonation</italic> used to generate the experimental stimuli.</p>
<p>Pause: This cue refers to the time between the question and the answer. For each level of intended uncertainty, a default silent pause of 1 s was used between the question and the answer. When the pause was activated (pause[+]), we used either a silent pause of 4 s as strongly marked pause or a filled pause,<xref ref-type="fn" rid="fn11">
<sup>11</sup>
</xref> i.e. the hesitation &#xe4;h/uh with a duration of 0.37 followed by a silent pause of 3.632 s giving a total duration of 4 s [cf. (<xref ref-type="bibr" rid="B109">109</xref>): 40].</p>
<p>It has to be noted that the pause can have other functions than expressing uncertainty. In this scenario, it could also be interpreted as the robot&#x2019;s processing time while producing the synthetic utterance. In our previous study (<xref ref-type="bibr" rid="B109">109</xref>) it emerged from the text comments that the robot was obviously considered to be uncertain. However, due to the close relationship between uncertainty and processing time, these two aspects cannot be separated.</p>
<p>Hesitation: The hesitation particle <italic>&#xe4;h/uh</italic> was either present (hes[+]) or absent (hes[-]) [cf. (<xref ref-type="bibr" rid="B109">109</xref>) 40].</p>
<p>Intonation: The intended level of certainty was expressed by a falling contour with a difference of 8 ST (semitones) between the highest pitch on the stressed syllable of the word and the lowest pitch at the end of the utterance. In addition, two intonation contours were used to express intended uncertainty. In the level <italic>Into1</italic>, the pitch of the last syllable rises by 8 ST (semitones) above the lowest pitch in the first syllable for moderate uncertainty, and in <italic>Into2</italic> it rises by 13 ST for intended strong uncertainty [see also (<xref ref-type="bibr" rid="B109">109</xref>) 40]. Different intonation contours for the critical stimulus <italic>Bananen</italic> are shown in <xref ref-type="fig" rid="f1">
<bold>Figures&#xa0;1</bold>
</xref>&#x2013;<xref ref-type="fig" rid="f3">
<bold>3</bold>
</xref>. The pitch contour on the left side is the question uttered by a human speaker. On the right the pitch contour of the synthetic answer is shown.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Intonation contour for the question &#x201c;Was siehst Du/What do you see?&#x201d; (left side) and for the answer Bananen/Bananas; level: Certainty (Cer) [see also (<xref ref-type="bibr" rid="B109">109</xref>): 41].</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-15-1347913-g001.tif"/>
</fig>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Intonation contour for the question &#x201c;Was siehst Du/What do you see?&#x201d; (left side) and for the answer Bananen/Bananas; level: Intonation 1 (int) [see also (<xref ref-type="bibr" rid="B109">109</xref>): 41].</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-15-1347913-g002.tif"/>
</fig>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Intonation contour for the question &#x201c;Was siehst Du/What do you see?&#x201d; (left side) and for the answer Bananen/Bananas; three intended levels of uncertainty:; level: Intonation 2 (Into2) [see also (<xref ref-type="bibr" rid="B109">109</xref>): 41].</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-15-1347913-g003.tif"/>
</fig>
<p>The number of critical stimuli was 36 (4 one-word utterances x 9 conditions). There were also 9 distractors and one practice trial <italic>Was siehst Du?/What do you see?</italic> The stimulus <italic>Rosinen/Raisins</italic> was presented at the beginning of the experiment. In order to minimize the influence of participants&#x2019; learning effects on their perceptual judgments, we constructed four task sets. Each critical item occurred only once within the four sets. Thus, each task set consisted of the practice trial, nine critical items complemented by nine distractors; the order of presentation of critical trials and distractors was randomized in advance. In this way, each participant had to work on one task set of 19 trials with question-answer pairs. Within each group, the four task sets were counterbalanced across participants (see Appendix for the experimental design). As we wanted to use as many potentially relevant questionnaires and control tests as possible in the feasibility study, we had to limit the number of trials in the prosody test to n=1 per condition, so that the experimental session would not be too long and become too strenuous, especially with regard to our patients.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Participants</title>
<p>56 participants (age range: 18-65, IQ &gt; 80) with German as their first language took part in the study. The ASD group consisted of 28 adults (12 female, 16 male) diagnosed according to ICD-10 criteria (F84.0 Childhood autism, F84.1 Atypical autism, F84.5 Asperger syndrome). Only for the ASD group the ADOS-2, Module 4 was used (scale Communication + Social Interaction Total: <italic>M</italic>=8.04, <italic>SD</italic>=4.46). There were also 28 neurotypical adults (14 female) in the NTC. As shown in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, there were no significant group differences in terms of age, gender, and IQ. In terms of autistic symptomatology, the ASD group had significantly higher values on the two self-report measures SRS-2 Adult Self-Report (ASD: <italic>M</italic>=112.50, <italic>SD</italic>=28.50; NTC: <italic>M</italic>=33.89, <italic>SD</italic>=19.07; <italic>t</italic>
<sub>(54)</sub>= 12.13, <italic>p</italic>&#xa0;&lt;.001) and the AQ [ASD: <italic>M</italic>=38.61, <italic>SD</italic>=7.16; NTC: <italic>M</italic>=13.29, <italic>SD</italic>=6.42; <italic>t</italic>
<sub>(54)</sub>= 13.93, <italic>p</italic> &lt;.001)].</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Sample characteristics: Age, gender, IQ, and autistic symptomatology for the ASD and the NTC group.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center" rowspan="2"/>
<th valign="top" colspan="3" align="center">ASD</th>
<th valign="top" colspan="3" align="center">NTC</th>
<th valign="top" align="center" rowspan="2">Test statistic</th>
<th valign="top" align="center" rowspan="2">
<italic>p</italic>
</th>
</tr>
<tr>
<th valign="top" align="center">
<italic>n</italic>
</th>
<th valign="top" align="center">
<italic>M</italic>
</th>
<th valign="top" align="center">
<italic>SD</italic>
</th>
<th valign="top" align="center">
<italic>n</italic>
</th>
<th valign="top" align="center">
<italic>M</italic>
</th>
<th valign="top" align="center">
<italic>SD</italic>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">
<bold>Age</bold>
</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">44.68</td>
<td valign="top" align="center">11.68</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">41.61</td>
<td valign="top" align="center">14.04</td>
<td valign="top" align="center">
<italic>t</italic>
<sub>(54)</sub> = 0.89</td>
<td valign="top" align="center">.378</td>
</tr>
<tr>
<th valign="top" colspan="9" align="left">IQ</th>
</tr>
<tr>
<td valign="middle" align="left">CFT 20-R</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">111.36</td>
<td valign="top" align="center">21.68</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">108.61</td>
<td valign="top" align="center">12.92</td>
<td valign="top" align="center">
<italic>t</italic>
<sub>(54)</sub> = 0.58</td>
<td valign="top" align="center">.567</td>
</tr>
<tr>
<td valign="middle" align="left">MWT-B</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">113.79</td>
<td valign="top" align="center">15.65</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">117.32</td>
<td valign="top" align="center">14.34</td>
<td valign="top" align="center">
<italic>t</italic>
<sub>(54)</sub>= -0.88</td>
<td valign="top" align="center">.382</td>
</tr>
<tr>
<td valign="middle" align="left">
<bold>SRS-2 Adult Self-Report</bold>
</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">112.50</td>
<td valign="top" align="center">28.50</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">33.89</td>
<td valign="top" align="center">19.07</td>
<td valign="top" align="center">
<italic>t</italic>
<sub>(54)</sub>= 12.13</td>
<td valign="top" align="center">&lt;.001</td>
</tr>
<tr>
<td valign="middle" align="left">
<bold>AQ</bold>
</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">38.61</td>
<td valign="top" align="center">7.16</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">13.29</td>
<td valign="top" align="center">6.42</td>
<td valign="top" align="center">
<italic>t</italic>
<sub>(54)</sub>= 13.93</td>
<td valign="top" align="center">&lt;.001</td>
</tr>
<tr>
<td valign="middle" align="left">
<bold>ADOS-2 Module 4</bold>
</td>
<td valign="middle" align="center">28</td>
<td valign="middle" align="center">8.04</td>
<td valign="middle" align="center">4.46</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
<td valign="middle" align="left"/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>ASD, Autism Spectrum Disorder; NTC, Neurotypical Controls.</p>
</fn>
<fn>
<p>Gender ratio (m:f): ASD: 12: 16; NTC: 14: 14;&#x3c7;2(1) = 0.287, p = .592.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Participants were screened for eligibility with regard to inclusion and exclusion criteria prior to the study. Exclusion criteria for the study participants were an IQ &lt; 80, non-native speaker of German, as well as an acute depressive episode, psychotic symptoms or suicidal tendencies.</p>
<p>Regarding the language abilities of the autistic participants, it is noted that they completed the CFT 20-R and MWT-B test [see (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>)].The Basic Intelligence Test (CFT) is considered culturally fair because it is based on non-verbal and illustrative test tasks. It measures basic mental ability (g-factor) independent of socio-cultural and educational influences. The CFT 20-R consists of two similarly structured test parts with the four subtests: Series Continuation, Classification, Matrices, and four Topological Conclusions. The Multiple Choice Vocabulary Intelligence Test (MWT-B) measures general vocabulary. Intelligence levels. For each item, the candidate has to find the correct German word from five given words and four nonsense words.</p>
<p>The study took place at the Department of Psychiatry and Psychotherapy of the Medical Center - University of Freiburg, Germany. Participants with ASD were recruited through the outpatient clinic or from inpatient wards or after their discharge, and through the website and notices of the autism outpatient clinic.</p>
</sec>
<sec id="s2_2_3">
<label>2.2.3</label>
<title>Procedure</title>
<p>The following instruments [see (<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>)] were performed as part of the study: both the ASD and the NTC group completed self-report questionnaires on the AQ, EQ, SRS-2 Self-Report, BDI-II, BVAQ and FQLP prior to the examination. Furthermore, the SCL-90-S was administered to the NTC group only. Interviews about the psychotic symptoms and two IQ tests, the CFT 20-R and the MWT-B, were also administered to both groups before the examination. In addition, the diagnosis of the ASD group was confirmed by the ADOS-2.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Instruments and test procedures used.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center" rowspan="2"/>
<th valign="top" colspan="2" align="center">Groups</th>
</tr>
<tr>
<th valign="top" align="center">ASD</th>
<th valign="top" align="center">NTC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">IQ (CFT 20-R &amp; MWT-B)</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">Prosody test</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">Audiometric test</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">Minimum Pitch Discrimination test</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">Minimum Pitch Change test</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">Recording of medications</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">ADOS-2</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">
</td>
</tr>
<tr>
<td valign="top" align="left">Interview on psychotic symptoms</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">AQ (self-report)</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">EQ (self-report)</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">SRS-2 (self-report)</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">BDI-II (self-report)</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">BVAQ (self-report)</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">Freiburg Language Pragmatics Questionnaire (self-report)</td>
<td valign="top" align="center">x</td>
<td valign="top" align="center">x</td>
</tr>
<tr>
<td valign="top" align="left">SCL-90 (self-report)</td>
<td valign="top" align="center">
</td>
<td valign="top" align="center">x</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>ASD, Autism Spectrum Disorder; NTC, Neurotypical Controls.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The AQ, EQ, SRS-2 self-report and FQLP questionnaires were used to characterize autistic symptoms. The BVAQ was collected because of possible alexithymic symptoms, which are more common in ASS. The ADOS was only collected from the ASS group in order to describe the communicative and social-interactive behavior of this group. The SCL-90 was only used in the NTC group to detect signs of psychiatric disorders.</p>
<p>Participants were informed of the aim and the procedure of the study, and a short interview was conducted to exclude possible psychotic symptoms for the participants with ASD. All participants signed an informed consent. The study was approved by the ethics committee (EK-Freiburg: 558/17). It was conducted in accordance with the Declaration of Helsinki. The experimental session included a prosody test, a complementary audiometry test, a pitch discrimination task and a pitch change task assessing sensory pitch perception. Data were collected during a two-hour individual session with the participants.</p>
<sec id="s2_2_3_1">
<label>2.2.3.1</label>
<title>Prosody test</title>
<p>In the prosody test, participants were presented with short question-answer pairs consisting of the natural language question <italic>Was siehst Du?/What do you see?</italic> and the articulatory synthetic utterance serving as an answer, e.g. <italic>Bananen/bananas</italic>. The synthetic response instantiated one of the nine experimental conditions in which the three cues <italic>intonation</italic>, <italic>pause</italic>, and <italic>hesitation</italic> were either present or absent [see (<xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>)].</p>
<p>The prosody test was presented to the participants via a computer program (see Appendix for the experimental design). Each participant completed 19 trials (nine levels of intended uncertainty plus nine distractors following an example stimulus). Each question-answer pair was played only once. Participants were asked to rate a) uncertainty b) naturalness, and c) comprehensibility of the synthetic response on a 5-point rating scale (1 = <italic>uncertain/little natural/little comprehensible</italic> and 5 = <italic>certain/very natural/very comprehensible</italic>). In contrast to Bellinghausen et&#xa0;al. (<xref ref-type="bibr" rid="B109">109</xref>), the reaction time was also measured when rating the response.</p>
<p>As discussed in the introduction, we measure not only the perception of certain prosodic features in terms of perceived uncertainty, but also their effect on naturalness and comprehensibility.</p>
</sec>
<sec id="s2_2_3_2">
<label>2.2.3.2</label>
<title>Audiometry</title>
<p>An audiometry test from Electronica-Technologies was used to ensure that the prosodic stimuli used were reliably recognized by the participants, and that they had no significant hearing loss. Each ear was tested separately. Sine tones (250 Hz, 500 Hz, 1000 Hz, 2000 Hz, 3000 Hz, 4000 Hz, 8000 Hz) were presented at increasing loudness via headphones.</p>
</sec>
<sec id="s2_2_3_3">
<label>2.2.3.3</label>
<title>Minimal pitch discrimination and change</title>
<p>Following Globerson et&#xa0;al. (<xref ref-type="bibr" rid="B29">29</xref>), minimal pitch discrimination was used to investigate whether two sine tones of only slightly different frequency could be perceived as different. Thus, the level of the minimal perceived tone difference could have a significant influence on the perception of prosodic intonation [see also (<xref ref-type="bibr" rid="B29">29</xref>)]. The difference between the two tones amounted to 200 Hz at the beginning and was reduced to the minimum pitch difference perceived by the participant. Thus, if hearers can only perceive large differences between the reference and the comparison tone, this could have a significant impact on the perception of prosodic intonation [see also (<xref ref-type="bibr" rid="B29">29</xref>)].</p>
<p>The minimum pitch change detection for each participant was determined by assessing the course of a tone rising or falling in frequency. The test started with tone movements of 12 Hz up or down from the starting tone of 200 Hz. For reduction of the pitch change the same staircase function as for the pitch discrimination task was used according to Globerson et&#xa0;al. (<xref ref-type="bibr" rid="B29">29</xref>).</p>
<p>By testing pitch discrimination and pitch change detection, we wanted to ensure that basic auditory perception is not impaired in hearers with ASD without II and thus could be excluded from influencing prosody perception. Therefore, both pitch tests served as a kind of control condition in order to rule out the possibility that putative group differences could be explained by differences in mere low-level auditory processing.</p>
</sec>
</sec>
<sec id="s2_2_4">
<label>2.2.4</label>
<title>Statistical analysis</title>
<p>For the sample characteristics, group differences in age and IQ were tested using t-tests, and group differences in gender were tested applying the chi-square test. Since significant deviations from normality could be expected for all other variables (minimum pitch discrimination and change, ratings of uncertainty, naturalness and comprehensibility, and their corresponding response time variables), we conducted robust tests as described and recommended by Field and Wilcox (<xref ref-type="bibr" rid="B112">112</xref>), Mair and Wilcox (<xref ref-type="bibr" rid="B113">113</xref>) and Wilcox (<xref ref-type="bibr" rid="B114">114</xref>).</p>
<p>Robust methods address two key properties of a statistical test: the probability of a false positive, also known as a Type I error, and power, the probability of detecting true differences between groups (or a true association between two or more variables). They attempt to overcome serious drawbacks when assumptions of conventional methods such as ANOVA are violated, in order to avoid misleading results and interpretations [see (<xref ref-type="bibr" rid="B115">115</xref>) for more details]. To our knowledge, robust methods do not differ from classical non-parametric techniques (such as the Wilcoxon-Mann-Whitney test) in terms of controlling for item and individual variability.</p>
<p>For the analyses of minimum pitch discrimination and change, we had a one-factorial design with &#x201c;diagnostic group&#x201d; (ASD, NTC) as an independent factor. For the ratings of uncertainty, naturalness and comprehensibility, and their corresponding response time variables, we used a 2 x 8 design with the independent factor &#x201c;diagnostic group&#x201d; (ASD, NTC) and &#x201c;prosodic condition&#x201d; (Cer, Hes, Pau, Into2, HesPau, HesInto2, PauInto2, PauInto2Hes; see <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref> for a description of these conditions) as the dependent factor. For some analyses, we also considered the distractor trials as an additional prosodic condition and Into1 as a &#x201c;milder&#x201d; condition for an intonation (see above) that was not combined with the other two cues <italic>pause</italic> and <italic>hesitation.</italic> Therefore, only Into1 was statistically tested against Into2.</p>
<p>The 2 x 8 design was analyzed with a two-way mixed design robust test statistic [<italic>bwtrim</italic>, F-like test values, see (<xref ref-type="bibr" rid="B112">112</xref>): 29-30; (<xref ref-type="bibr" rid="B113">113</xref>): 479]. <italic>t1waybt</italic> is a robust one-way alternative with an outcome of F-like values for between-subjects effects and effect sizes [see (<xref ref-type="bibr" rid="B112">112</xref>): 28-29]. <italic>yuend</italic> is used as a robust alternative for a dependent t-test that also outputs the explanatory measure of effect size &#x3be;. Similar to Pearson correlations, &#x3be; = .10,.30, and.50 correspond to small, medium, and large effect sizes, respectively [ (<xref ref-type="bibr" rid="B112">112</xref>: 25-26; (<xref ref-type="bibr" rid="B113">113</xref>): 458] [see also (<xref ref-type="bibr" rid="B114">114</xref>): 506-511 for three factor design, 2 x 2 x 8].</p>
<p>All robust tests were performed with the same following parameters (except <italic>bwtrim</italic> without bootstrapping): trimmed mean with 20% trimmed scores (tr = 0.2), the modified one-step estimator (est = &#x201c;mom&#x201d;), and the number of bootstrapping samples of 5000 (nboot = 5000). In order to control the overall probability of a Type I error (false positive) for multiple hypothesis tests, <italic>post-hoc</italic> tests are reported after Bonferroni adjustment.</p>
<p>All statistical analyses were performed with R version 4.1.2 using the R package WRS2 version 1.1-3 with its collection of robust statistical methods. A significance level of &#x3b1; = .05 was used for hypothesis testing.</p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Audiometry test</title>
<p>All participants in the study had unaffected hearing abilities at the frequencies measured.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Minimal pitch discrimination and change</title>
<p>There were no significant differences between the ASD and NTC groups in either pitch discrimination or pitch change perception or reaction time [see (<xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>)]. However, the ASD group descriptively achieved lower values for pitch discrimination and change (in Hertz) than the NTC. There was only a minimally longer response time for pitch change detection in the ASD group than in the NTC. No significant differences were observed.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Test results for pitch variation and for pitch change. Minimum pitch discrimination in Hertz and reaction times in milliseconds.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center" rowspan="2"/>
<th valign="top" colspan="3" align="center">ASD</th>
<th valign="top" colspan="3" align="center">NTC</th>
<th valign="top" align="center" rowspan="2">
<italic>Test Stat<xref ref-type="table-fn" rid="fnT4_1">
<sup>a</sup>
</xref>
</italic>
</th>
<th valign="top" align="center" rowspan="2">
<italic>p<xref ref-type="table-fn" rid="fnT4_1">
<sup>a</sup>
</xref>
</italic>
</th>
<th valign="top" align="center" rowspan="2">
<italic>ES<xref ref-type="table-fn" rid="fnT4_1">
<sup>a</sup>
</xref>
</italic>
</th>
</tr>
<tr>
<th valign="top" align="center">
<italic>n</italic>
</th>
<th valign="top" align="center">
<italic>M</italic>
</th>
<th valign="top" align="center">
<italic>SD</italic>
</th>
<th valign="top" align="center">
<italic>n</italic>
</th>
<th valign="top" align="center">
<italic>M</italic>
</th>
<th valign="top" align="center">
<italic>SD</italic>
</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" colspan="10" align="left">Pitch (in Hz)</th>
</tr>
<tr>
<td valign="top" align="left">Discrimination</td>
<td valign="top" align="center">27</td>
<td valign="top" align="center">43.102</td>
<td valign="top" align="center">57.861</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">58.321</td>
<td valign="top" align="center">81.071</td>
<td valign="top" align="center">0.193</td>
<td valign="top" align="center">.668</td>
<td valign="top" align="center">0.101</td>
</tr>
<tr>
<td valign="top" align="left">Change</td>
<td valign="top" align="center">26</td>
<td valign="top" align="center">3.092</td>
<td valign="top" align="center">3.830</td>
<td valign="top" align="center">27</td>
<td valign="top" align="center">3.520</td>
<td valign="top" align="center">4.330</td>
<td valign="top" align="center">0.019</td>
<td valign="top" align="center">.900</td>
<td valign="top" align="center">0.026</td>
</tr>
<tr>
<th valign="top" colspan="10" align="left">RT (in ms)</th>
</tr>
<tr>
<td valign="top" align="left">Discrimination</td>
<td valign="top" align="center">27</td>
<td valign="top" align="center">2469</td>
<td valign="top" align="center">1700</td>
<td valign="top" align="center">28</td>
<td valign="top" align="center">2472</td>
<td valign="top" align="center">1069</td>
<td valign="top" align="center">1.210</td>
<td valign="top" align="center">.280</td>
<td valign="top" align="center">0.201</td>
</tr>
<tr>
<td valign="top" align="left">Change</td>
<td valign="top" align="center">26</td>
<td valign="top" align="center">1241</td>
<td valign="top" align="center">842</td>
<td valign="top" align="center">27</td>
<td valign="top" align="center">1201</td>
<td valign="top" align="center">599</td>
<td valign="top" align="center">0.465</td>
<td valign="top" align="center">.510</td>
<td valign="top" align="center">0.140</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>ASD, Autism Spectrum Disorder; NTC, Neurotypical Controls.</p>
</fn>
<fn>
<p>Reasons for missing data: In the ASD group two participants dropped out before the end of the pitch tasks (two for the pitch change task and one for the pitch discrimination task); for one participant of the NTC group saving of the data failed for the pitch discrimination task.</p>
</fn>
<fn id="fnT4_1">
<label>a</label>
<p>Values for Test Stat, p and ES (explanatory measure of effect size) are from robust ANOVA.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Prosody test</title>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Perception of uncertainty</title>
<sec id="s3_3_1_1">
<label>3.3.1.1</label>
<title>Distractor analysis</title>
<p>Before describing the results for the ratings of the critical stimuli&#xa0;in terms of perceived uncertainty, naturalness, and comprehensibility we report on the ratings of the distractor items. As mentioned above, we used 10 distractor items, all of which were exclusively generated in an intended certain way of speaking. As shown in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>, there was no significant difference between the ratings of uncertainty for the distractor trial condition Dist and the prosodic uncertainty condition Cer (<italic>M</italic> = 4.20, <italic>SD</italic> = 0.65 vs. <italic>M</italic> = 4.20, <italic>SD</italic> = 0.97; robust test statistic = -0.71, <italic>p</italic> = .482) for the whole sample, and also the pattern of the results of these two with all other conditions is remarkably similar [see (<xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>)]. This was also true for the ratings of uncertainty within the ASD group (Dist: <italic>M</italic> = 4.31, <italic>SD</italic> = 0.63, Cer: <italic>M</italic> = 4.21, <italic>SD</italic> = 0.92; robust test statistic = -0.18, <italic>p</italic>&#xa0;=&#xa0;.861) as well as in the NTC group (Dist: <italic>M</italic> = 4.15, <italic>SD</italic> = 0.67, Cer: <italic>M</italic>&#xa0;= 4.14, <italic>SD</italic> = 1.04; robust test statistic = -0.91, <italic>p</italic> = .375).</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Pairwise comparisons of ratings of uncertainty between prosodic uncertainty conditions (independent of diagnostic group).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Test stat<xref ref-type="table-fn" rid="fnT5_1">
<sup>a</sup>
</xref>
<break/>
<italic>p</italic>
<xref ref-type="table-fn" rid="fnT5_2">
<sup>b</sup>
</xref>
<break/>ES &#x3be;</th>
<th valign="top" align="center">
<italic>M</italic>
</th>
<th valign="top" align="center">
<italic>SD</italic>
</th>
<th valign="top" align="center">1.<break/>Dist</th>
<th valign="top" align="center">2.<break/>Cer</th>
<th valign="top" align="center">3.<break/>Hes</th>
<th valign="top" align="center">4.<break/>Pau</th>
<th valign="top" align="center">5.<break/>Into1</th>
<th valign="top" align="center">6. Into2</th>
<th valign="top" align="center">7.<break/>HesPau</th>
<th valign="top" align="center">8. HesInto2</th>
<th valign="top" align="center">9. PauInto2</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1. Dist</td>
<td valign="top" align="center">4.234</td>
<td valign="top" align="center">0.651</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">2. Cer</td>
<td valign="top" align="center">4.179</td>
<td valign="top" align="center">0.974</td>
<td valign="top" align="center">-0.711<break/>.482<break/>0.074</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">3. Hes</td>
<td valign="top" align="center">2.482</td>
<td valign="top" align="center">1.236</td>
<td valign="top" align="center">
<bold>10.684</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.873</bold>
</td>
<td valign="top" align="center">
<bold>9.485</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.790</bold>
</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">4. Pau</td>
<td valign="top" align="center">3.214</td>
<td valign="top" align="center">1.261</td>
<td valign="top" align="center">
<bold>6.067</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.748</bold>
</td>
<td valign="top" align="center">
<bold>5.720</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.505</bold>
</td>
<td valign="top" align="center">
<bold>-3.708</bold>
<break/>
<bold>.0008</bold>
<break/>
<bold>0.427</bold>
</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">5. Into1</td>
<td valign="top" align="center">3.339</td>
<td valign="top" align="center">1.225</td>
<td valign="top" align="center">
<bold>4.211</bold>
<break/>
<bold>.0002</bold>
<break/>
<bold>0.607</bold>
</td>
<td valign="top" align="center">
<bold>4.331</bold>
<break/>
<bold>.0001</bold>
<break/>
<bold>0.589</bold>
</td>
<td valign="top" align="center">-3.153<break/>.003<break/>0.528</td>
<td valign="top" align="center">-0.591<break/>.558<break/>0.070</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">6. Into2</td>
<td valign="top" align="center">2.732</td>
<td valign="top" align="center">1.408</td>
<td valign="top" align="center">
<bold>6.773</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.715</bold>
</td>
<td valign="top" align="center">
<bold>7.555</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.674</bold>
</td>
<td valign="top" align="center">-0.951<break/>.348<break/>0.109</td>
<td valign="top" align="center">1.769<break/>.086<break/>0.279</td>
<td valign="top" align="center">2.420<break/>.021<break/>0.284</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">7. HesPau</td>
<td valign="top" align="center">2.143</td>
<td valign="top" align="center">1.212</td>
<td valign="top" align="center">
<bold>12.112</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.755</bold>
</td>
<td valign="top" align="center">
<bold>12.232</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.766</bold>
</td>
<td valign="top" align="center">2.499<break/>.018<break/>0.238</td>
<td valign="top" align="center">
<bold>6.277</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.498</bold>
</td>
<td valign="top" align="center">
<bold>4.445</bold>
<break/>
<bold>.0001</bold>
<break/>
<bold>0.573</bold>
</td>
<td valign="top" align="center">2.625<break/>.013<break/>0.281</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">8. HesInto2</td>
<td valign="top" align="center">1.911</td>
<td valign="top" align="center">1.133</td>
<td valign="top" align="center">
<bold>12.660</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.868</bold>
</td>
<td valign="top" align="center">
<bold>13.767</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.818</bold>
</td>
<td valign="top" align="center">3.353<break/>.002<break/>0.378</td>
<td valign="top" align="center">
<bold>5.582</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.598</bold>
</td>
<td valign="top" align="center">
<bold>5.859</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.671</bold>
</td>
<td valign="top" align="center">
<bold>4.624</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.522</bold>
</td>
<td valign="top" align="center">1.130<break/>.267<break/>0.143</td>
<td valign="top" align="center">&#x2013;</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">9. PauInto2</td>
<td valign="top" align="center">2.018</td>
<td valign="top" align="center">1.258</td>
<td valign="top" align="center">
<bold>12.710</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.855</bold>
</td>
<td valign="top" align="center">
<bold>13.025</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.805</bold>
</td>
<td valign="top" align="center">3.096<break/>.004<break/>0.364</td>
<td valign="top" align="center">
<bold>6.192</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.5</bold>74</td>
<td valign="top" align="center">
<bold>6.078</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.646</bold>
</td>
<td valign="top" align="center">
<bold>4.071</bold>
<break/>
<bold>.0003</bold>
<break/>
<bold>0.369</bold>
</td>
<td valign="top" align="center">1.105<break/>.277<break/>0.128</td>
<td valign="top" align="center">-0.150<break/>.881<break/>0.016</td>
<td valign="top" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="top" align="left">10. PauHesInto2</td>
<td valign="top" align="center">1.589</td>
<td valign="top" align="center">0.910</td>
<td valign="top" align="center">
<bold>20.333</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.935</bold>
</td>
<td valign="top" align="center">
<bold>19.727</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.891</bold>
</td>
<td valign="top" align="center">
<bold>5.639</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.556</bold>
</td>
<td valign="top" align="center">
<bold>7.822</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.712</bold>
</td>
<td valign="top" align="center">
<bold>7.812</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.780</bold>
</td>
<td valign="top" align="center">
<bold>6.621</bold>
<break/>
<bold>&lt;.0001</bold>
<break/>
<bold>0.696</bold>
</td>
<td valign="top" align="center">2,995<break/>.005<break/>0.334</td>
<td valign="top" align="center">2.436<break/>.020<break/>0.197</td>
<td valign="top" align="center">2.350<break/>.025<break/>0.214</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Test stat, Test statistic: if sign is positive then row condition has lower ratings of uncertainty than column condition and vice versa; ES &#x3be;, explanatory measure of effect size: Analogous to Pearson correlations, &#x3be; = .10, .30, and .50 correspond to small, medium, and large effect sizes.</p>
</fn>
<fn id="fnT5_1">
<label>a</label>
<p>n = 56 and df = 33 for all pairwise contrasts.</p>
</fn>
<fn id="fnT5_2">
<label>b</label>
<p>The significance cutoff via Bonferroni correction is about p<sub>cutoff</sub> = .05/45 = .00111.</p>
<p>Significant differences are bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>With respect to response time for the ratings of uncertainty, participants needed more time for the distractor trials than for the utterances in the condition Cer (<italic>M</italic> = 4511, <italic>SD</italic> = 2365 vs. <italic>M</italic> = 4230, <italic>SD</italic> = 4508; robust test statistic = 2.80, <italic>p</italic> = .008, ES = 0.27). This difference was also significant within the ASD group (Dist: <italic>M</italic> = 4966, <italic>SD</italic> = 2689; Cer: <italic>M</italic> = 4134, <italic>SD</italic> = 4866; robust test statistic = 2.95, <italic>p</italic> = .009, ES = 0.45), but not within the NTC group (Dist: <italic>M</italic> = 4056, <italic>SD</italic> = 1933; Cer: <italic>M</italic> = 4325, <italic>SD</italic> = 4208; robust test statistic = 0.95, <italic>p</italic> = .360, ES = 0.14). The distractors differ from the stimuli words in their syllable structure. These phonological discrepancies could explain the differences in reaction times.</p>
</sec>
<sec id="s3_3_1_2">
<label>3.3.1.2</label>
<title>Ratings of uncertainty of the 2 x 8 design</title>
<p>In the statistical analysis of the ratings of uncertainty with robust ANOVA, the main effect of diagnostic group was not significant (robust test statistic <italic>F</italic>(1, 32) = 2.10, <italic>p</italic> = .160), whereas the main effect of prosodic uncertainty conditions was significant (robust test statistic <italic>F</italic>(7, 25) = 43.20, <italic>p</italic> &lt;.0001). However, the interaction between these two factors was far from being significant (robust test statistic <italic>F</italic>(7, 25) = 1.30, <italic>p</italic> = .27). In <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>, means of the ratings of uncertainty for all factorial combinations are shown. Due to the non-significant interaction, <italic>post-hoc</italic> comparisons are only reported for the different levels of the significant condition main effect and for the hypothesized group main effect, but not for the non-significant interaction.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Means of uncertainty ratings (1=uncertain, 5=certain), dashed lines denote the median. Abbreviations for the prosodic uncertainty conditions are explained in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-15-1347913-g004.tif"/>
</fig>
<p>In <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>, all pairwise comparisons between the prosodic uncertainty conditions (including the distractor trials) are reported. There are noteworthy differences between the condition CER (and, as already mentioned above, the distractors DIST) and all the other prosody conditions. Also, most of the 2-cue prosody conditions (HesPau, HesInto2, PauInto2) were judged to be more uncertain than the 1-cue prosody conditions (Hes, Pau, Into1, Into2). The 3-cue prosody condition PauInto2Hes which had descriptively the lowest mean, elicited significantly lower ratings of uncertainty than all the 1-cue prosody conditions, whereas differences to the 2-cue prosody conditions were not significant after Bonferroni correction. All significant differences between conditions had medium up to very large effect sizes (all &#x3be;s &gt;.37).</p>
<p>The top part of <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref> shows all the contrasts in ratings of uncertainty between the two groups ASD and NTC for each prosodic uncertainty condition. The corresponding means are shown in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. As we had nine different conditions and three combined comparisons the number of comparisons was twelve, and therefore our p-values should be below.05/12=.004166 in order to be considered as significant after Bonferroni correction. Descriptively, the ASD group had higher ratings of uncertainty in all conditions except for the condition Pau. However, the most pronounced between group difference for the combined condition &#x201c;All cues&#x201d;, was not significant after Bonferroni correction.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Differences between ratings of uncertainty for ASD and NTC.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left" rowspan="2">Ratings of Uncertainty</th>
<th valign="middle" align="center">ASD (<italic>n</italic>=28)</th>
<th valign="middle" align="center">NTC (<italic>n</italic>=28)</th>
<th valign="middle" align="center" rowspan="2">
<italic>Test Stat</italic>
</th>
<th valign="middle" align="center" rowspan="2">
<italic>p<xref ref-type="table-fn" rid="fnT6_1">
<sup>a</sup>
</xref>
</italic>
</th>
<th valign="middle" align="center" rowspan="2">ES &#x3be;</th>
</tr>
<tr>
<th valign="middle" align="center">
<italic>M</italic> (<italic>SD</italic>)</th>
<th valign="middle" align="center">
<italic>M</italic> (<italic>SD</italic>)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Cer</td>
<td valign="middle" align="center">4.214 (0.917)</td>
<td valign="middle" align="center">4.143 (1.044)</td>
<td valign="middle" align="center">0.068</td>
<td valign="middle" align="center">.777</td>
<td valign="middle" align="center">0.050</td>
</tr>
<tr>
<td valign="top" align="left">Hes</td>
<td valign="top" align="center">2.750 (1.323)</td>
<td valign="top" align="center">2.214 (1.101)</td>
<td valign="top" align="center">2.328</td>
<td valign="top" align="center">.136</td>
<td valign="top" align="center">0.308</td>
</tr>
<tr>
<td valign="top" align="left">Pau</td>
<td valign="top" align="center">3.107 (1.166)</td>
<td valign="top" align="center">3.321 (1.362)</td>
<td valign="top" align="center">0.760</td>
<td valign="top" align="center">.392</td>
<td valign="top" align="center">0.196</td>
</tr>
<tr>
<td valign="top" align="left">Into1</td>
<td valign="top" align="center">3.500 (1.171)</td>
<td valign="top" align="center">3.179 (1.278)</td>
<td valign="top" align="center">0.861</td>
<td valign="top" align="center">.358</td>
<td valign="top" align="center">0.177</td>
</tr>
<tr>
<td valign="top" align="left">Into2</td>
<td valign="top" align="center">3.071 (1.412)</td>
<td valign="top" align="center">2.393 (1.343)</td>
<td valign="top" align="center">2.662</td>
<td valign="top" align="center">.112</td>
<td valign="top" align="center">0.331</td>
</tr>
<tr>
<td valign="top" align="left">HesPau</td>
<td valign="top" align="center">2.321 (1.278)</td>
<td valign="top" align="center">1.964 (1.138)</td>
<td valign="top" align="center">0.836</td>
<td valign="top" align="center">.363</td>
<td valign="top" align="center">0.208</td>
</tr>
<tr>
<td valign="top" align="left">HesInto2</td>
<td valign="top" align="center">2.071 (1.184)</td>
<td valign="top" align="center">1.750 (1.076)</td>
<td valign="top" align="center">1.247</td>
<td valign="top" align="center">.266</td>
<td valign="top" align="center">0.212</td>
</tr>
<tr>
<td valign="top" align="left">PauInto2</td>
<td valign="top" align="center">2.250 (1.295)</td>
<td valign="top" align="center">1.786 (1.197)</td>
<td valign="top" align="center">2.660</td>
<td valign="top" align="center">.117</td>
<td valign="top" align="center">0.336</td>
</tr>
<tr>
<td valign="top" align="left">PauHesInto2</td>
<td valign="top" align="center">1.679 (0.983)</td>
<td valign="top" align="center">1.500 (0.839)</td>
<td valign="top" align="center">0.343</td>
<td valign="top" align="center">.501</td>
<td valign="top" align="center">0.156</td>
</tr>
<tr>
<td valign="top" align="left">MEAN</td>
<td valign="top" align="left"/>
<td valign="top" align="left"/>
<td valign="top" align="left"/>
<td valign="top" align="left"/>
<td valign="top" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Single cues</td>
<td valign="top" align="center">2.976 (0.884)</td>
<td valign="top" align="center">2.643 (0.934)</td>
<td valign="top" align="center">2.343</td>
<td valign="top" align="center">.127</td>
<td valign="top" align="center">0.297</td>
</tr>
<tr>
<td valign="top" align="left">Two combined cues</td>
<td valign="top" align="center">2.214 (1.019)</td>
<td valign="top" align="center">1.833 (0.918)</td>
<td valign="top" align="center">3.392</td>
<td valign="top" align="center">.077</td>
<td valign="top" align="center">0.340</td>
</tr>
<tr>
<td valign="top" align="left">All cues</td>
<td valign="top" align="center">2.464 (0.868)</td>
<td valign="top" align="center">2.133 (0.826)</td>
<td valign="top" align="center">4.133</td>
<td valign="top" align="center">.043</td>
<td valign="top" align="center">0.333</td>
</tr>
<tr>
<th valign="middle" align="left" rowspan="2">Uncertainty reaction time [ms]</th>
<th valign="middle" align="center">ASD (<italic>n</italic>=28)</th>
<th valign="middle" align="center">NTC (<italic>n</italic>=28)</th>
<th valign="middle" align="center" rowspan="2">
<italic>Test Stat</italic>
</th>
<th valign="middle" align="center" rowspan="2">
<italic>p<xref ref-type="table-fn" rid="fnT6_1">
<sup>a</sup>
</xref>
</italic>
</th>
<th valign="middle" align="center" rowspan="2">ES &#x3be;</th>
</tr>
<tr>
<th valign="middle" align="center">
<italic>M</italic> (<italic>SD</italic>)</th>
<th valign="middle" align="center">
<italic>M</italic> (<italic>SD</italic>)</th>
</tr>
<tr>
<td valign="middle" align="left">Cer</td>
<td valign="middle" align="center">4134 (4866)</td>
<td valign="middle" align="center">4325 (4208)</td>
<td valign="middle" align="center">0.603</td>
<td valign="middle" align="center">.441</td>
<td valign="middle" align="center">0.138</td>
</tr>
<tr>
<td valign="top" align="left">Hes</td>
<td valign="top" align="center">5109 (3697)</td>
<td valign="top" align="center">3661 (2901)</td>
<td valign="top" align="center">3.149</td>
<td valign="top" align="center">.102</td>
<td valign="top" align="center">0.473</td>
</tr>
<tr>
<td valign="top" align="left">Pau</td>
<td valign="top" align="center">3504 (3076)</td>
<td valign="top" align="center">3342 (3138)</td>
<td valign="top" align="center">1.242</td>
<td valign="top" align="center">.264</td>
<td valign="top" align="center">0.221</td>
</tr>
<tr>
<td valign="top" align="left">Into1</td>
<td valign="top" align="center">5095 (4604)</td>
<td valign="top" align="center">3537 (1635)</td>
<td valign="top" align="center">0.098</td>
<td valign="top" align="center">.767</td>
<td valign="top" align="center">0.080</td>
</tr>
<tr>
<td valign="top" align="left">Into2</td>
<td valign="top" align="center">3986 (2293)</td>
<td valign="top" align="center">3667 (2384)</td>
<td valign="top" align="center">0.890</td>
<td valign="top" align="center">.341</td>
<td valign="top" align="center">0.200</td>
</tr>
<tr>
<td valign="top" align="left">HesPau</td>
<td valign="top" align="center">4759 (4238)</td>
<td valign="top" align="center">4409 (5932)</td>
<td valign="top" align="center">0.429</td>
<td valign="top" align="center">.520</td>
<td valign="top" align="center">0.137</td>
</tr>
<tr>
<td valign="top" align="left">HesInto2</td>
<td valign="top" align="center">4683 (2537)</td>
<td valign="top" align="center">2629 (1165)</td>
<td valign="top" align="center">9.880</td>
<td valign="top" align="center">.014</td>
<td valign="top" align="center">0.656</td>
</tr>
<tr>
<td valign="top" align="left">PauInto2</td>
<td valign="top" align="center">3469 (1909)</td>
<td valign="top" align="center">2992 (1318)</td>
<td valign="top" align="center">0.582</td>
<td valign="top" align="center">.466</td>
<td valign="top" align="center">0.160</td>
</tr>
<tr>
<td valign="top" align="left">PauHesInto2</td>
<td valign="top" align="center">4891 (5530)</td>
<td valign="top" align="center">2974 (1839)</td>
<td valign="top" align="center">2.781</td>
<td valign="top" align="center">.110</td>
<td valign="top" align="center">0.325</td>
</tr>
<tr>
<td valign="top" align="left">MEAN</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
</tr>
<tr>
<td valign="top" align="left">Single cues</td>
<td valign="top" align="center">4200 (2085)</td>
<td valign="top" align="center">3557 (1981)</td>
<td valign="top" align="center">2.900</td>
<td valign="top" align="center">.095</td>
<td valign="top" align="center">0.337</td>
</tr>
<tr>
<td valign="top" align="left">Two combined cues</td>
<td valign="top" align="center">4304 (2220)</td>
<td valign="top" align="center">3344 (2161)</td>
<td valign="top" align="center">4.361</td>
<td valign="top" align="center">.050</td>
<td valign="top" align="center">0.411</td>
</tr>
<tr>
<td valign="top" align="left">All cues</td>
<td valign="top" align="center">4343 (2104)</td>
<td valign="top" align="center">3382 (1698)</td>
<td valign="top" align="center">4.343</td>
<td valign="top" align="center">.044</td>
<td valign="top" align="center">0.398</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>ES, effect size; abbreviations for intended uncertainty levels are explained in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. Means&#xa0;are calculated for single cues: Hes, Into, Pau; for two combined cues: HesPau, HesInto2, PauInto2; for all cues: eight single, combined and triple uncertainty cues together. Reaction times are listed in ms (milliseconds).</p>
</fn>
<fn>
<p>The significance cutoff via Bonferroni correction is approximately p<sub>cutoff</sub> = .05/12 = .004166.</p>
</fn>
<fn id="fnT6_1">
<label>a</label>
<p>Values for Test Stat, p and ES (explanatory measure of effect size) are from robust tests. ES&#xa0;&#x3be;&#xa0;= explanatory measure of effect size: Analogous to Pearson&#x2019;s correlations, &#x3be; = .10, .30, and .50 correspond to small, medium, and large effect sizes.</p>
<p>Mean values for uncertainty perception are shown in the subtable on top, values for reaction times in milliseconds are listed in the subtable below.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_3_1_3">
<label>3.3.1.3</label>
<title>Response times of ratings of uncertainty for the 2 x 8 design</title>
<p>In the statistical analysis of the response times of the ratings of uncertainty with robust ANOVA, the two main effects and the interaction were not significant (main effect &#x201c;diagnostic group&#x201d;: robust test statistic <italic>F</italic>(1, 30) = 3.40, <italic>p</italic> = .074; main effect &#x201c;prosodic uncertainty condition&#x201d;: <italic>F</italic>(7, 22) = 2.30, <italic>p</italic> = .061; interaction: <italic>F</italic>(7, 22) = 1.70, <italic>p</italic> = .163).</p>
<p>Descriptive statistics for all contrasts between the two groups ASD and NTC for each prosodic uncertainty condition are presented in the lower part of <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>. As can be seen, the ASD group needed descriptively more time to reach ratings of uncertainty in almost all prosodic uncertainty conditions (except for the condition &#x201c;Cer&#x201d;). The largest difference can be seen in the condition HesInto2: The ASD group took almost twice as long (4683 ms) as the NTC group (2629 ms). Note that this difference is no longer significant after Bonferroni correction (robust test statistic = 9.90, <italic>p</italic> = .001, ES = 0.66).</p>
<p>When integrating the data on perceptual judgments for single cues, two combined cues, and all the cues, the following was observed: the mean for the ASD group was always higher compared to the NTC, i.e. the ASD group needed more time to rate the different levels of uncertainty than the NTC, but the differences were no longer significant after Bonferroni correction.</p>
</sec>
<sec id="s3_3_1_4">
<label>3.3.1.4</label>
<title>Exploratory analyses: effect of gender, IQ, and severity of autistic symptoms on the processing of uncertainty cues</title>
<p>In order to assess whether or not other variables might influence the processing of uncertainty cues, we also conducted exploratory statistical analyses with the possible impact factors of gender, IQ, and degree of autistic symptom severity.</p>
<p>IQ. Concerning the IQ, we computed Spearman rank-order correlations (<italic>r<sub>s</sub>
</italic>) for both IQ measures (CFT 20-R and MWT-B) with the ratings of uncertainty and also for the response times within each experimental uncertainty cue condition for the total sample and additionally also for the NTC and ASD groups separately. For the IQ measures, we found no significant Spearman rank-order correlations between IQ and ratings of uncertainty for the total sample (CFT 20-R: all <italic>r<sub>s</sub>
</italic> in [-.167; +.160], all <italic>p</italic>s &gt;.219; MWT-B: all <italic>r<sub>s</sub>
</italic> in [-.139; +.092], all <italic>p</italic>s &gt;.309) as well as for both groups (ASD: CFT 20-R: all <italic>r<sub>s</sub>
</italic> in [-.288; +.366], all <italic>p</italic>s &gt;.055; MWT-B: all <italic>r<sub>s</sub>
</italic> in [-.202; +.067], all <italic>p</italic>s &gt;.302; NTC: CFT 20-R: all <italic>r<sub>s</sub>
</italic> in [-.157; +.369], all <italic>p</italic>s &gt;.053; MWT-B: all <italic>r<sub>s</sub>
</italic> in [-.125; +.259], all <italic>p</italic>s &gt;.183).</p>
<p>There were no significant correlations for the response times of the ratings of uncertainty with the IQ measure CFT 20-R (Total: all <italic>r<sub>s</sub>
</italic> in [-.199; +.145], all <italic>p</italic>s &gt;.141; ASD: all <italic>r<sub>s</sub>
</italic> in [-.364; +.100], all <italic>p</italic>s&#xa0;&gt;.057; NTC: all <italic>r<sub>s</sub>
</italic> in [-.325; +.350], all <italic>p</italic>s &gt;.068). Similarly, for the MWT-B, almost all correlations were not significant except for two coefficients in the NTC group (Total: all <italic>r<sub>s</sub>
</italic> in [-.083; +.156], all <italic>p</italic>s&#xa0;&gt;.251; ASD: all <italic>r<sub>s</sub>
</italic> in [-.326; +.166], all <italic>p</italic>s &gt;.091; NTC: all <italic>r<sub>s</sub>
</italic> in [+.060; +.459], <italic>r<sub>s</sub>
</italic> = +.459, <italic>p</italic> = .014 in condition Hes and <italic>r<sub>s</sub>
</italic> = +.459, <italic>p</italic>&#xa0;= .014 in condition PauInto2Hes, all other <italic>p</italic>s &gt;.120). It should be noted that all mentioned p-values are uncorrected with respect to multiple testing.</p>
<p>Degree of autistic symptom severity. As the degree of autistic symptom severity is strongly associated with the diagnostic group membership (see <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>), it is useful to check for correlations within diagnostic groups only in order to assess whether or not this variable has an additional influence on the processing characteristics of uncertainty cues. For the ratings of uncertainty with the autistic symptom severity measure SRS-2 Adult Self-Report there were no significant correlations except for the conditions Hes, Pau, and HesPau within the ASD group, and for the condition Cer within the NTC group (ASD: all <italic>r<sub>s</sub>
</italic> in [-.314; +.627], <italic>r<sub>s</sub>
</italic> = +.487, p = .009 in condition Hes, <italic>r<sub>s</sub>
</italic> = +.627, <italic>p</italic> &lt;.001 in condition Pau, <italic>r<sub>s</sub>
</italic> = +.498, <italic>p</italic> = .007 in condition HesPau, all other <italic>p</italic>s&#xa0;&gt;.100; NTC: all <italic>r<sub>s</sub>
</italic> in [-.409; +.253], <italic>r<sub>s</sub>
</italic> = -.409, <italic>p</italic> = .031 in condition Cer, all other <italic>p</italic>s &gt;.063). For the autistic symptom severity measure AQ, there were no significant correlations except for the condition HesPau for the ASD group (ASD: all <italic>r<sub>s</sub>
</italic> in [-.237; +.404], <italic>r<sub>s</sub>
</italic>&#xa0;= +.404, <italic>p</italic> = .033 in condition HesPau, all other <italic>p</italic>s &gt;.062; NTC: all <italic>r<sub>s</sub>
</italic> in [-.258; +.261], all <italic>p</italic>s &gt;.180). No significant correlations were found for the ADOS-2 (ASD: all <italic>r<sub>s</sub>
</italic> in [-.280; +.276], all <italic>p</italic>s &gt;.149).</p>
<p>There were no significant correlations for the response times of the ratings of uncertainty with the autistic symptom severity measure SRS-2 Adult Self-Report, except for one condition within the NTC group (ASD: all <italic>r<sub>s</sub>
</italic> in [-.233; +.126], all <italic>p</italic>s &gt;.233; NTC: all <italic>r<sub>s</sub>
</italic> in [-.023; +.417], <italic>r<sub>s</sub>
</italic> = +.417, <italic>p</italic> = .027 in condition Into1, all other <italic>p</italic>s&#xa0;&gt;.167). No significant correlations were found for the autistic symptom severity measure AQ (ASD: all <italic>r<sub>s</sub>
</italic> in [-.035; +.327], all <italic>p</italic>s&#xa0;&gt;.090; NTC: all <italic>r<sub>s</sub>
</italic> in [-.015; +.356], all <italic>p</italic>s &gt;.063): For the ADOS-2, only one significant correlation with response times was found in the condition Cer (ASD: all <italic>r<sub>s</sub>
</italic> in [+.080; +.438], <italic>r<sub>s</sub>
</italic> = +.438, <italic>p</italic> = .021 in condition Cer, all other <italic>p</italic>s &gt;.094).</p>
<p>Gender. In order to assess a potential influence of gender on the processing of uncertainty cues, we added gender as an additional independent factor in the robust ANOVA. There was no significant main effect of gender on ratings of uncertainty (<italic>F</italic>(1, 999) = 1.46, <italic>p</italic> = .228), nor were there any significant interactions of the other factors with gender (gender x diagnostic group: <italic>F</italic>(1, 999) &lt; 1; gender x prosodic uncertainty condition: <italic>F</italic>(9, 999) &lt; 1; gender x diagnostic group x prosodic uncertainty condition: <italic>F</italic>(9, 999) &lt; 1). A similar pattern was found for response times: No significant main effect for gender (<italic>F</italic>(1, 999) = 2.30, <italic>p</italic> = .130) and no significant interactions of the other factors with gender (gender x diagnostic group: <italic>F</italic>(1,&#xa0;999)&#xa0;&lt; 1; gender x prosodic uncertainty condition: <italic>F</italic>(9, 999) &lt; 1; gender x diagnostic group x prosodic uncertainty condition: <italic>F</italic>(9,&#xa0;999) &lt; 1).</p>
<p>In summary, the exploratory analyses revealed no strong evidence that gender or IQ are reliably related to the processing of prosodic uncertainty cues. There was weak evidence that severity of autistic symptoms may play an additional role beyond mere diagnostic group membership.</p>
</sec>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Perception of naturalness and comprehensibility</title>
<p>In section 2.2, the (perceived) quality of the synthetic stimuli was mentioned as a possible confounding variable. In the following two subsections we look at the two quality measures naturalness and comprehensibility, and analyze whether there were differences between the two groups that could have influenced the differences in ratings of uncertainty.</p>
<sec id="s3_3_2_1">
<label>3.3.2.1</label>
<title>Naturalness</title>
<p>The statistical analysis of the naturalness ratings using the robust ANOVA revealed neither significant main effects nor a significant interaction (main effect &#x201c;diagnostic group&#x201d;: robust test statistic <italic>F</italic>(1, 32.872) &lt; 1; main effect &#x201c;prosodic uncertainty condition&#x201d;: <italic>F</italic>(7, 24.970) = 2.24, <italic>p</italic> = .065; interaction: <italic>F</italic>(7, 24.970)&#xa0;&lt; 1). In <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>, means of naturalness ratings are depicted for all factorial combinations. Further exploratory analyses for the prosodic uncertainty conditions revealed that the largest difference between conditions was found for the contrast Cer-PauInto2, which had the highest/lowest mean naturalness ratings (Cer: <italic>M</italic> = 3.41, <italic>SD</italic> = 1.30, PauInto2: <italic>M</italic> = 2.95, <italic>S</italic>D = 1.26; robust test statistic = 3.19, <italic>p</italic> = .003, ES = 0.28).</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Means of naturalness ratings (1=little natural, 5=very natural), dashed lines denote the median.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-15-1347913-g005.tif"/>
</fig>
<p>The robust analysis of response times for naturalness ratings showed a significant main effect of &#x201c;prosodic uncertainty condition&#x201d;: <italic>F</italic>(7, 24.643) = 3.33, <italic>p</italic> = .012), whereas the main effect &#x201c;diagnostic group&#x201d; and the interaction were far from being significant (robust test statistic <italic>F</italic>(1, 32) &lt; 1 and <italic>F</italic>(7, 25) &lt; 1, respectively). Further exploratory analyses for the prosodic uncertainty conditions revealed that the largest response time difference between conditions was noted for the contrast Cer-PauHesInto2 that had the highest/lowest mean response times (Cer: <italic>M</italic> = 3155, <italic>SD</italic> = 2300, PauHesInto2: <italic>M</italic> = 4383, <italic>SD</italic> = 2922; robust test statistic = -3.70, <italic>p</italic> &lt;.001, ES = 0.44).</p>
</sec>
<sec id="s3_3_2_2">
<label>3.3.2.2</label>
<title>Comprehensibility</title>
<p>Statistical analysis for the ratings of comprehensibility using the robust ANOVA revealed neither significant main effects nor a significant interaction (main effect &#x201c;diagnostic group&#x201d;: robust test statistic <italic>F</italic>(1, 32.647) = 3.21, <italic>p</italic> = .082; main effect &#x201c;prosodic uncertainty condition&#x201d;: <italic>F</italic>(7, 24.887) = 1.22, <italic>p</italic> = .331; interaction: <italic>F</italic>(7, 24.887) &lt; 1). In <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>, means of comprehensibility ratings are shown for all factorial combinations. Further explorative analyses for the prosodic uncertainty conditions revealed that the biggest difference between conditions was noted for the contrast Cer-PauHesInto2, which had the highest/second lowest mean naturalness ratings (Cer: <italic>M</italic> = 3.89, <italic>SD</italic> = 1.11, PauInto2: <italic>M</italic> = 3.52, <italic>SD</italic> = 1.11; robust test statistic = 2.54, <italic>p</italic> = .016, ES = 0.25).</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Means of comprehensibility ratings (1=little comprehensible, 5=very comprehensible), dashed lines denote the median.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-15-1347913-g006.tif"/>
</fig>
<p>The robust analysis of response times for comprehensibility ratings showed a significant main effect of &#x201c;prosodic uncertainty condition&#x201d;: <italic>F</italic>(7, 24.834) = 6.59, <italic>p</italic> = .0002), whereas the main effect &#x201c;diagnostic group&#x201d; and the interaction were not significant (robust test statistic <italic>F</italic>(1, 29.434) = 1.10, <italic>p</italic> = .303 and <italic>F</italic>(7, 24.834) = 1.858, <italic>p</italic> = .120, respectively). Further exploratory analyses for the prosodic uncertainty conditions revealed that the largest response time difference between conditions was noted for the contrast Cer-PauHesInto2, which had the second highest/lowest mean response times (Cer: <italic>M</italic> = 3013, <italic>SD</italic> = 1975, PauHesInto2: <italic>M</italic> = 4608, <italic>SD</italic> = 3341; robust test statistic = -4.84, <italic>p</italic> &lt;.0001, ES = 0.44).</p>
<p>We summarize the results for the assessment of the perceived naturalness and comprehensibility of the stimuli as follows: Our data show no significant group difference with respect to either dimension.</p>
</sec>
<sec id="s3_3_2_3">
<label>3.3.2.3</label>
<title>Correlation between perceived uncertainty and naturalness or comprehensibility</title>
<p>Fisher&#x2019;s z-transformed correlations were calculated to test the relationship between perceived uncertainty and naturalness as well as perceived uncertainty and comprehensibility [see (<xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>)]. Significant group differences were found between the correlation of uncertainty and naturalness, i.e. in the ASD group the correlation is significantly lower than in the NTC group. This means that the processing of naturalness and uncertainty are more closely linked in the NTC group than in the ASD group which may indicate a different type of processing in ASD.</p>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Fisher&#x2019;s z transformed correlations for uncertainty and naturalness and also for uncertainty and comprehensibility.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center" rowspan="3"/>
<th valign="middle" colspan="8" align="center">Fisher&#x2019;s z transformed correlations</th>
</tr>
<tr>
<th valign="middle" align="center" colspan="3">ASD</th>
<th valign="middle" align="center" colspan="3">NTC</th>
<th valign="middle" rowspan="2" align="center">
<italic>t-test</italic>
</th>
<th valign="middle" rowspan="2" align="center">
<italic>p</italic>
</th>
</tr>
<tr>
<th valign="middle" align="center">
<italic>n</italic>
</th>
<th valign="middle" align="center">
<italic>M</italic>
</th>
<th valign="middle" align="center">
<italic>SD</italic>
</th>
<th valign="middle" align="center">
<italic>n</italic>
</th>
<th valign="middle" align="center">
<italic>M</italic>
</th>
<th valign="middle" align="center">
<italic>SD</italic>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Uncertainty - Naturalness</td>
<td valign="middle" align="center">27<bold>
<italic>
<sup>a</sup>
</italic>
</bold>
</td>
<td valign="middle" align="center">0.209</td>
<td valign="middle" align="center">0.470</td>
<td valign="middle" align="center">28</td>
<td valign="middle" align="center">0.474</td>
<td valign="middle" align="center">0.487</td>
<td valign="middle" align="center">-2.055</td>
<td valign="middle" align="center">.045</td>
</tr>
<tr>
<td valign="top" align="left">Uncertainty - Comprehensibility</td>
<td valign="top" align="center">27<bold>
<italic>
<sup>a</sup>
</italic>
</bold>
</td>
<td valign="top" align="center">0.219</td>
<td valign="top" align="center">0.616</td>
<td valign="top" align="center">27<bold>
<italic>
<sup>b</sup>
</italic>
</bold>
</td>
<td valign="top" align="center">0.360</td>
<td valign="top" align="center">0.461</td>
<td valign="top" align="center">-0.949</td>
<td valign="top" align="center">.348</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Note. <bold>
<sup>a</sup>
</bold>One person in the ASD group gave the same score for all ten different levels of certainty. Due to the lack of variability, correlations cannot be calculated for this person. <bold>
<sup>b</sup>
</bold>One person from the NTC group gave the same scores for certainty and comprehensibility, resulting in a correlation of 1.0. Therefore, the correlation for this person could not be transformed to Fisher&#x2019;s z and had to be excluded from the analysis.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<sec id="s4_1">
<label>4.1</label>
<title>Summary</title>
<p>In this study, we experimentally investigated how prosodic cues of uncertainty were perceived by hearers with ASD without II in comparison to a NTC group. The synthetic utterances were generated by an articulatory speech synthesizer (<xref ref-type="bibr" rid="B33">33</xref>). They were embedded in short question-answer pairs using a scenario in which the question was asked by a human in a natural voice. The robot gave a synthetic response in which the cues pause, intonation and hesitation were varied to generate different levels of intended uncertainty. The synthetic responses were rated by participants on rating scales for (i) uncertainty, (ii) naturalness, and (iii) comprehensibility. Reaction time of the rating was also measured. In addition, a complementary audiometric test, a pitch discrimination test and a pitch change test were performed.</p>
<p>The results for the level <italic>Hesitation</italic> combined with <italic>Intonation 2</italic> showed a group difference in reaction times to judge uncertainty perception: the ASD group took longer for the judgment than the NTC group. Note that this difference is no longer significant after Bonferroni correction. All other levels of uncertainty were not reliably different between the two groups (all <italic>p</italic>s &gt;.10). With the exception of pause, all judgments were reported as more certain in average in the ASD group. In addition, the intended levels of uncertainty showed a tendency for longer reaction times in the ASD group.</p>
<p>No significant difference was found between the ASD and NTC groups in the pitch discrimination and pitch change task for baseline discrimination. Although pitch differences were perceived equally well, the prosodic cues tended to be interpreted differently in terms of uncertainty perception: the intended prosodic cues of uncertainty influenced the perception of hearers less in the ASD group than in the NTC group. However, the ASD group showed longer reaction times than the NTC group. A possible explanation could be that a higher cognitive load was required for the hearers with ASD without II. It is assumed that hearers in the NTC group processed the prosodic cues automatically and with less cognitive effort, allowing them to make their ratings of uncertainty more quickly.</p>
<p>The differential correlation effect, i.e. that ASD individuals show a lower correlation between their ratings of naturalness on the one hand and uncertainty on the other, can be taken as evidence that the co-processing of naturalness and uncertainty is not as tightly linked in the ASD group compared to the typical co-processing of uncertainty cues with the naturalness of the utterance. This weaker relationship would be consistent with weak central coherence accounts of autism [e.g. (<xref ref-type="bibr" rid="B10">10</xref>)].</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Limitations and future directions</title>
<p>Due to the design of the study, there were only few observations per participant, which means that in the current study there were significantly fewer trials per condition in the responses to uncertainty perception than in the pilot study (<xref ref-type="bibr" rid="B109">109</xref>). It is possible that the few observations from participants and the resulting study design had an impact on the results and could explain the non-significant differences in uncertainty judgments between the ASD group and the NTC group in our data. The reason for presenting only a subset of the stimuli to the participants was to minimize learning effects of participants. Previous experimental research on the role of prosody in pragmatic focus interpretation and possible learning effects is described in Fisseni (<xref ref-type="bibr" rid="B30">30</xref>), and Wollermann (<xref ref-type="bibr" rid="B51">51</xref>).</p>
<p>As a consequence of our feasibility study, the number of trials per condition could be increased and the test conditions could be adjusted in order to collect more data and verify the results. We could also reduce the number of psychological and psychiatric tests in order to save time and cognitive capacity. In particular, the tests for minimal pitch discrimination and pitch change could be omitted, as we have not found correlations between baseline auditory abilities and prosody perception in ASD. Instead, we would like to focus on the presentation of the critical stimuli for uncertainty perception by further minimizing learning effects.</p>
<p>It is possible that the order of presentation of the stimuli has an effect on recipients&#x2019; judgments, i.e. the stimulus presented first may be judged differently from stimuli presented later due to possible learning effects. Furthermore, in future research, we could focus on testing hearing abilities by including a group of participants with hearing impairments for comparison with the ASD and NTC groups.</p>
<p>In our approach, we used synthetic speech to generate the different utterances with intended uncertainty. In this way, specific prosodic parameters could be manipulated while the influence of unintended variation was minimized compared to natural speech, so we gave high priority to controllability and selective manipulation. A possible explanation for the non-significant effects of prosody on uncertainty perception could be the use of synthetic speech. It is conceivable that the effects of prosody on uncertainty perception might be more evident when natural speech is used. However, natural speech is less controllable than synthetic speech. In future work, it may be an option to consider neural synthesizers in comparison to our articulatory speech synthesizer for similar experiments. However, our primary goal was to achieve manipulability and controllability. It is an open question to what extent this can be guaranteed by neural synthesis.<xref ref-type="fn" rid="fn12">
<sup>12</sup>
</xref>
</p>
<p>In future experiments we would like to further exploit the advantages of speech synthesis. In particular, we would like to explore the interplay between the three acoustic cues pause, intonation and hesitation to model different degrees of uncertainty in more detail. For example, it would be interesting to test a duration &lt;4 seconds for the pause and also to use other hesitation particles besides uh, such as um. We also think it is important to experimentally investigate the role of lengthening in uncertainty perception, as pointed out by Betz et&#xa0;al. (<xref ref-type="bibr" rid="B93">93</xref>).</p>
<p>Another limitation is the material used in this study. We designed short question-answer situations in order to not only present the synthetic stimuli without embedded context. The answers were one word sentences. In future work, it would be important to test more complex sentences for ecological validity. However, it should be noted that the dialogues were simulated and did not approximate real-life dialogues, which induce uncertainty. This may have influenced the pattern of the results, i.e., the lack of significant interactions of the different variables with the group.</p>
<p>Next, we tested only adult participants. A wider range of ages, especially children and adolescents, might provide more information about the developmental trajectories of the ability to adequately process prosodic cues of uncertainty. Krahmer and Swerts (<xref ref-type="bibr" rid="B61">61</xref>) tested 7-8 year old neurotypical children and adults on the perception and production of uncertainty in question-answer situations. Uncertain utterances produced by adult speakers were recognized more accurately by both children and adult hearers than uncertain utterances produced by children. In addition, adults performed better than children in the recognition of uncertainty. We therefore plan to conduct further studies with children and adolescents. It should be noted that it would be necessary to modify the methodological approach with regard to cognitive abilities, especially in the case of children.</p>
<p>At this point, we would like to take a critical look at the role of ToM for prosody perception. For prosody processing, it may be important whether the ToM is built up automatically in an incidental or cognitive-compensatory manner, as we have explained above. If we assume that prosody perception supports the construction of ToM, there may also be incidental and compensatory prosody processing. This could explain differences in reaction times.</p>
<p>In addition, we only looked at individuals with ASD without II, so it is not possible to generalize the results to all individuals with a diagnosis of ASD.</p>
<p>In future work, we would like to conduct further perceptual experiments of affective prosody recognition, as the investigation of speech characteristics may display a promising novel biomarker and may contribute to the better understanding of mental disorders [cf. (<xref ref-type="bibr" rid="B117">117</xref>): 337; see also (<xref ref-type="bibr" rid="B118">118</xref>): 99].</p>
</sec>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Material</bold>
</xref>. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>CB: Conceptualization, Funding acquisition, Methodology, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. BS: Conceptualization, Data curation, Formal analysis, Methodology, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. RR: Formal analysis, Methodology, Software, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. AR: Conceptualization, Methodology, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. PD: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. PB: Conceptualization, Methodology, Writing &#x2013; review &amp; editing. LT: Supervision, Writing &#x2013; review &amp; editing. TF: Conceptualization, Formal analysis, Methodology, Software, Writing &#x2013; original draft.</p>
</sec>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. The study was funded by the &#x201c;Programm zur F&#xf6;rderung des exzellenten wissenschaftlichen Nachwuchses&#x201d; of the University of Duisburg-Essen. We acknowledge support by the Open Access Publication Fund of the University of Duisburg-Essen.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We would like to thank Johanna Keller and Alisa Aschrich for testing and data collection. We used DeepL Translator and Writer to improve the quality of the article.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpsyt.2024.1347913/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpsyt.2024.1347913/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>Our own empirical studies refer only to German. However, we assume that other West Germanic languages work in a very similar way. Studies on other languages are cited for methodological reasons.</p>
</fn>
<fn id="fn2">
<label>2</label>
<p>Fisseni (<xref ref-type="bibr" rid="B30">30</xref>) provides a systematic overview of the term <italic>focus</italic>; for a model of pragmatic focus interpretation see also Wollermann et&#xa0;al. (<xref ref-type="bibr" rid="B31">31</xref>).</p>
</fn>
<fn id="fn3">
<label>3</label>
<p>It should be noted that we are referring to the current version of the VocalTractlab website (<xref ref-type="bibr" rid="B33">33</xref>).</p>
</fn>
<fn id="fn4">
<label>4</label>
<p>A description of the false-belief task can be found in Wimmer and Perner (<xref ref-type="bibr" rid="B68">68</xref>). A brief overview of the different levels of ToM, i.e. first-order ToM (e.g., &#x201c;X thinks or feels&#x2026;&#x201d;), second-order ToM (e.g., &#x201c;X thinks that Y feels&#x2026;&#x201d;), and third-order ToM (e.g., &#x201c;X believes that Y assumes that Z intends &#x2026;&#x201d;) can be found in Gabriel et&#xa0;al. [ (<xref ref-type="bibr" rid="B69">69</xref>): 534-35].</p>
<p>The ToM of a subject S is generally understood as S's beliefs about mental states, such as beliefs, intentions, or emotions, of another subject O. If S's beliefs concern O's beliefs about mental states of subjects O_2 other than O, we speak of second-order ToM. O_2's mental states can also be beliefs about mental states of other persons. In this case we speak of third-order beliefs.</p>
</fn>
<fn id="fn5">
<label>5</label>
<p>According to Begeer et&#xa0;al. (<xref ref-type="bibr" rid="B75">75</xref>) counterfactual reasoning describes a phenomenon in which people imagine alternatives to one or more features of a perceived event [see also (<xref ref-type="bibr" rid="B76">76</xref>)]. It can be characterized by switching back and forth between a real situation and an imagined situation, i.e. a so-called counterfactual situation.</p>
</fn>
<fn id="fn6">
<label>6</label>
<p>For a more detailed discussion see Gyarmathy and Horv&#xe1;th [ (<xref ref-type="bibr" rid="B90">90</xref>): 27].</p>
</fn>
<fn id="fn7">
<label>7</label>
<p>For a detailed discussion of the role of uncertainty in human-machine-interaction, see Wollermann [ (<xref ref-type="bibr" rid="B51">51</xref>): 91].</p>
</fn>
<fn id="fn8">
<label>8</label>
<p>Unit selection is a method for acoustic speech synthesis based on large corpora of naturally spoken utterances. Units are selected with respect to a target utterance by means of concatenation. The generated speech is characterized by high comprehensibility [cf. (<xref ref-type="bibr" rid="B101">101</xref>): 279].</p>
</fn>
<fn id="fn9">
<label>9</label>
<p>It should be noted that in our current study we also measure comprehensibility in order to better understand the processing of synthetic utterances.</p>
</fn>
<fn id="fn10">
<label>10</label>
<p>It should be noted that the articulatory speech synthesizer is regularly updated. In the following we refer to the Vocaltract Lab website (<xref ref-type="bibr" rid="B33">33</xref>), but in our previous studies we have used older versions of the system.</p>
</fn>
<fn id="fn11">
<label>11</label>
<p>A strong pause of 4 s was used because we could not find exact values for pauses in the literature for modeling uncertainty in question-answer situations in human-machine communication [cf. (<xref ref-type="bibr" rid="B109">109</xref>): 40]. It was important for us to use very clear characteristics of uncertainty to test whether there are effects. We have already successfully used this value of 4 s in our pilot study (<xref ref-type="bibr" rid="B109">109</xref>).</p>
</fn>
<fn id="fn12">
<label>12</label>
<p>An overview of neural text-to-speech synthesis is given by Tan (<xref ref-type="bibr" rid="B116">116</xref>).</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="book">
<person-group person-group-type="author">
<collab>American Psychiatric Association</collab>
</person-group>. <article-title>Diagnostic and statistical manual of mental disorders</article-title>. In: <source>Text Revision: DSM-5-TR</source>, <edition>Fifth Edition</edition>. <publisher-name>American Psychiatric Association Publishing</publisher-name>, <publisher-loc>Washington, DC</publisher-loc> (<year>2022</year>).</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="web">
<person-group person-group-type="author">
<collab>World Health Organization</collab>
</person-group>. <source>International classification of diseases for mortality and morbidity statistics (11th Revision)</source> (<year>2018</year>). Available online at: <uri xlink:href="https://icd.who.int/browse11/l-m/en#/http://id.who.int/icd/entity/437815624">https://icd.who.int/browse11/l-m/en#/http://id.who.int/icd/entity/437815624</uri> (Accessed <access-date>August 28, 2024</access-date>).</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="book">
<person-group person-group-type="author">
<collab>NICE</collab>
</person-group>. <article-title>Autism diagnosis in children and young people. Recognition, referral and diagnosis of children and young people on the autism spectrum</article-title>. In: <source>Clinical Guideline no. 128</source>. <publisher-name>National Institute for Health and Clinical Excellence</publisher-name>, <publisher-loc>London</publisher-loc> (<year>2011</year>).</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lord</surname> <given-names>C</given-names>
</name>
<name>
<surname>Elsabbagh</surname> <given-names>M</given-names>
</name>
<name>
<surname>Baird</surname> <given-names>G</given-names>
</name>
<name>
<surname>Veenstra-Vanderweele</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Autism spectrum disorder</article-title>. <source>Lancet</source>. (<year>2018</year>) <volume>392</volume>:<page-range>508&#x2013;20</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0140-6736(18)31129-2</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Riedel</surname> <given-names>A</given-names>
</name>
</person-group>. <source>Hochfunktionaler Autismus im Erwachsenenalter &#x2013; klinische, neuropsychologische und morphometrische Befunde [habilitation thesis]</source>. <publisher-loc>Freiburg</publisher-loc>: <publisher-name>University of Freiburg</publisher-name> (<year>2015</year>).</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Vogeley</surname> <given-names>K</given-names>
</name>
</person-group>. <source>Anders sein: Autismus-Spektrum-St&#xf6;rungen im Erwachsenenalter &#x2013; ein Ratgeber</source>. <edition>2nd ed</edition>. <publisher-loc>Weinheim</publisher-loc>: <publisher-name>Beltz</publisher-name> (<year>2016</year>).</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Morris</surname> <given-names>CW</given-names>
</name>
</person-group>. <source>Grundlagen der Zeichentheorie</source>. <publisher-loc>Frankfurt am Main</publisher-loc>: <publisher-name>Fischer-Taschenbuch-Verlag</publisher-name> (<year>1988</year>).</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Riedel</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Sprachpragmatik bei Autismus-Spektrum-St&#xf6;rungen</article-title>, in: <source>8. Newsletter, Wissenschaftliche Gesellschaft Autismus-Spektrum-St&#xf6;rung</source> (<year>2015</year>). Available online at: <uri xlink:href="https://silo.tips/download/newsletter-sprachpragmatik-bei-autismus-spektrum-strungen-mai-nr-8-dr-med-dr-phi">https://silo.tips/download/newsletter-sprachpragmatik-bei-autismus-spektrum-strungen-mai-nr-8-dr-med-dr-phi</uri> (Accessed <access-date>August 28, 2024</access-date>).</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Riedel</surname> <given-names>A</given-names>
</name>
<name>
<surname>Suh</surname> <given-names>H</given-names>
</name>
<name>
<surname>Haser</surname> <given-names>V</given-names>
</name>
<name>
<surname>Hermann</surname> <given-names>I</given-names>
</name>
<name>
<surname>Ebert</surname> <given-names>D</given-names>
</name>
<name>
<surname>Riemann</surname> <given-names>D</given-names>
</name>
<etal/>
</person-group>. <article-title>Freiburg Questionnaire of linguistic pragmatics (FQLP): psychometric properties on a psychiatric sample</article-title>. <source>BMC Psychiatry</source>. (<year>2014</year>) <volume>14</volume>:<elocation-id>374</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12888-014-0374-9</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Geurts</surname> <given-names>B</given-names>
</name>
<name>
<surname>Kissine</surname> <given-names>M</given-names>
</name>
<name>
<surname>van Tiel</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>&#x201c;Pragmatic reasoning in autism&#x201d;</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Morsanyi</surname> <given-names>K</given-names>
</name>
<name>
<surname>Byrne</surname> <given-names>RMJ</given-names>
</name>
</person-group>, editors. <source>Thinking, reasoning and decision making in autism</source>. <publisher-name>Routledge</publisher-name>, <publisher-loc>London</publisher-loc> (<year>2019</year>). p. <page-range>113&#x2013;34</page-range>.</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Andr&#xe9;s-Roqueta</surname> <given-names>C</given-names>
</name>
<name>
<surname>Katsos</surname> <given-names>NA</given-names>
</name>
</person-group>. <article-title>A distinction between linguistic- and social-pragmatics helps the precise characterization of pragmatic challenges in children with Autism Spectrum Disorders and Developmental Language Disorder</article-title>. <source>J Speech Lang Hear Res</source>. (<year>2020</year>) <volume>63</volume>:<page-range>1494&#x2013;508</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1044/2020_JSLHR-19-00263</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Durrleman</surname> <given-names>S</given-names>
</name>
<name>
<surname>Hippolyte</surname> <given-names>L</given-names>
</name>
<name>
<surname>Zufferey</surname> <given-names>S</given-names>
</name>
<name>
<surname>Iglesias</surname> <given-names>K</given-names>
</name>
<name>
<surname>Hadjikhani</surname> <given-names>N</given-names>
</name>
</person-group>. <article-title>Complex syntax in autism spectrum disorders: a study of relative clauses</article-title>. <source>Int J Lang Commun Disord</source>. (<year>2015</year>) <volume>50</volume>:<page-range>260&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/1460-6984.12130</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Clark</surname> <given-names>JE</given-names>
</name>
<name>
<surname>Yallop</surname> <given-names>C</given-names>
</name>
<name>
<surname>Fletcher</surname> <given-names>J</given-names>
</name>
</person-group>. <source>An Introduction to Phonetics and Phonology</source>. <edition>3rd ed</edition>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Blackwell Publishing</publisher-name> (<year>2007</year>).</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martzoukou</surname> <given-names>M</given-names>
</name>
<name>
<surname>Papadopoulou</surname> <given-names>D</given-names>
</name>
<name>
<surname>Kosmidis</surname> <given-names>MH</given-names>
</name>
</person-group>. <article-title>The comprehension of syntactic and affective prosody by adults with autism spectrum disorder without accompanying cognitive deficits</article-title>. <source>J Psycholinguist Res</source>. (<year>2017</year>) <volume>46</volume>:<page-range>1573&#x2013;95</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10936-017-9500-4</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martzoukou</surname> <given-names>M</given-names>
</name>
<name>
<surname>Papadopoulos</surname> <given-names>D</given-names>
</name>
<name>
<surname>Kosmidis</surname> <given-names>MH</given-names>
</name>
</person-group>. <article-title>Syntactic and affective prosody recognition: Schizophrenia vs. Autism spectrum disorders</article-title>. <source>PloS One</source>. (<year>2023</year>) <volume>18</volume>:<elocation-id>e0292325</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0292325</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terzi</surname> <given-names>A</given-names>
</name>
<name>
<surname>Marinis</surname> <given-names>T</given-names>
</name>
<name>
<surname>Francis</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>The interface of syntax with pragmatics and prosody in children with autism spectrum disorders</article-title>. <source>J Autism Dev Disord</source>. (<year>2016</year>) <volume>46</volume>:<page-range>2692&#x2013;706</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10803-016-2811-8</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McCann</surname> <given-names>J</given-names>
</name>
<name>
<surname>Pepp&#xe9;</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Prosody in autism spectrum disorders; a critical review</article-title>. <source>Int J Lang Commun Disord</source>. (<year>2003</year>) <volume>38</volume>:<page-range>325&#x2013;50</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/1368282031000154204</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="book">
<person-group person-group-type="editor">
<name>
<surname>Gussenhoven</surname> <given-names>C</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>A</given-names>
</name>
</person-group> eds. <article-title>The Oxford handbook of language prosody</article-title>. In: <source>Oxford Handbooks in Linguistics</source>. <publisher-name>Oxford University Press</publisher-name>, <publisher-loc>Oxford</publisher-loc>.</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Tonhauser</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>"Prosody and meaning"</article-title>, in: <source>The Oxford Handbook of Experimental Semantics and Pragmatics</source> (<year>2019</year>). <publisher-loc>online edn</publisher-loc>: <publisher-name>Oxford Academic</publisher-name>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/oxfordhb/9780198791768.013.30</pub-id> (Accessed <access-date>August 28, 2024</access-date>).</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Larrouy-Maestri</surname> <given-names>P</given-names>
</name>
<name>
<surname>Opeppel</surname> <given-names>D</given-names>
</name>
<name>
<surname>Pell</surname> <given-names>MD</given-names>
</name>
</person-group>. <article-title>The sound of emotional prosody: nearly 3 years of research and future directions</article-title>. <source>Perspect psychol Sci</source>. (<year>2024</year>) <volume>0</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1177/17456916231217722</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grice</surname> <given-names>M</given-names>
</name>
<name>
<surname>Wehrle</surname> <given-names>S</given-names>
</name>
<name>
<surname>Kr&#xfc;ger</surname> <given-names>M</given-names>
</name>
<name>
<surname>Spaniol</surname> <given-names>M</given-names>
</name>
<name>
<surname>Cangemi</surname> <given-names>F</given-names>
</name>
<name>
<surname>Vogeley</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>Linguistic prosody in autism spectrum disorder - An overview</article-title>. <source>Lang Linguistics Compass</source>. (<year>2023</year>) <volume>17</volume>(<issue>5</issue>):<elocation-id>e12498</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/lnc3.12498</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shriberg</surname> <given-names>LD</given-names>
</name>
<name>
<surname>Paul</surname> <given-names>R</given-names>
</name>
<name>
<surname>McSweeney</surname> <given-names>JL</given-names>
</name>
<name>
<surname>Klin</surname> <given-names>AM</given-names>
</name>
<name>
<surname>Cohen</surname> <given-names>DJ</given-names>
</name>
<name>
<surname>Volkmar</surname> <given-names>FR</given-names>
</name>
</person-group>. <article-title>Speech and prosody characteristics of adolescents and adults with high functioning autism and Asperger syndrome</article-title>. <source>J Speech Lang Hear Res</source>. (<year>2001</year>) <volume>44</volume>:<page-range>1097&#x2013;115</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1044/1092-4388(2001/087</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Paul</surname> <given-names>R</given-names>
</name>
<name>
<surname>Augustyn</surname> <given-names>A</given-names>
</name>
<name>
<surname>Klin</surname> <given-names>A</given-names>
</name>
<name>
<surname>Volkmar</surname> <given-names>FR</given-names>
</name>
</person-group>. <article-title>Perception and production of prosody by speakers with autism spectrum disorders</article-title>. <source>J Autism Dev Disord</source>. (<year>2005</year>) <volume>35</volume>:<page-range>205&#x2013;20</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10803-004-1999-1</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kiss</surname> <given-names>G</given-names>
</name>
<name>
<surname>van Santen</surname> <given-names>JPH</given-names>
</name>
<name>
<surname>Prud'hommeaux</surname> <given-names>E</given-names>
</name>
<name>
<surname>Black</surname> <given-names>LM</given-names>
</name>
</person-group>. (<year>2012</year>). <article-title>Quantitative analysis of pitch in speech of children with neurodevelopmental disorders</article-title>, in: <conf-name>Proceedings of Interspeech</conf-name>, <conf-loc>Portland, OR, USA</conf-loc>, <conf-date>2012 Sep 9-13</conf-date>. (<publisher-name>Speech Communication Association</publisher-name>), pp. <page-range>1343&#x2013;46</page-range>.</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nadig</surname> <given-names>A</given-names>
</name>
<name>
<surname>Shaw</surname> <given-names>H</given-names>
</name>
</person-group>. <article-title>Acoustic and perceptual measurement of expressive prosody in high-functioning autism: increased pitch range and what it means to listeners</article-title>. <source>J Autism Dev Disord</source>. (<year>2012</year>) <volume>42</volume>:<fpage>499</fpage>&#x2013;<lpage>511</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10803-011-1264-3</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wehrle</surname> <given-names>S</given-names>
</name>
<name>
<surname>Cangemi</surname> <given-names>F</given-names>
</name>
<name>
<surname>Hanekamp</surname> <given-names>H</given-names>
</name>
<name>
<surname>Vogeley</surname> <given-names>K</given-names>
</name>
<name>
<surname>Grice</surname> <given-names>M</given-names>
</name>
</person-group>. (<year>2020</year>). <article-title>Assessing the intonation style of speakers with autism spectrum disorder</article-title>, in: <conf-name>Proceedings of the 10th International Conference on Speech Prosody</conf-name>, <conf-loc>Tokyo, Japan. Nobuaki Minematsu, Mariko Kondo, Takayuki Arai &amp; Ryoko Hayashi.</conf-loc> pp. <page-range>809&#x2013;13</page-range>.</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Diehl</surname> <given-names>JJ</given-names>
</name>
<name>
<surname>Paul</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>Acoustic differences in the imitation of prosodic patterns in children with autism spectrum disorders</article-title>. <source>Res Autism Spectr Disord</source>. (<year>2012</year>) <volume>6</volume>:<page-range>123&#x2013;34</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rasd.2011.03.012</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grice</surname> <given-names>M</given-names>
</name>
<name>
<surname>Kr&#xfc;ger</surname> <given-names>M</given-names>
</name>
<name>
<surname>Vogeley</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>Adults with Asperger syndrome are less sensitive to intonation than control persons when listening to speech</article-title>. <source>Cult Brain</source>. (<year>2016</year>) <volume>4</volume>:<fpage>38</fpage>&#x2013;<lpage>50</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s40167-016-0035-6</pub-id>
</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Globerson</surname> <given-names>E</given-names>
</name>
<name>
<surname>Amir</surname> <given-names>N</given-names>
</name>
<name>
<surname>Kishon-Rabin</surname> <given-names>L</given-names>
</name>
<name>
<surname>Golan</surname> <given-names>O</given-names>
</name>
</person-group>. <article-title>Prosody recognition in adults with high-functioning autism spectrum disorders: from psychoacoustics to cognition</article-title>. <source>Autism Res</source>. (<year>2015</year>) <volume>8</volume>:<page-range>153&#x2013;63</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/aur.1432</pub-id>
</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Fisseni</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>Focus: interpretation</article-title>? In: <source>Empirical Investigations on Focus Interpretation [dissertation]</source>. <publisher-name>Universit&#xe4;tsverlag Rhein-Ruhr</publisher-name>, <publisher-loc>Duisburg</publisher-loc> (<year>2011</year>).</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wollermann</surname> <given-names>C</given-names>
</name>
<name>
<surname>SChade</surname> <given-names>U</given-names>
</name>
<name>
<surname>Fisseni</surname> <given-names>B</given-names>
</name>
<name>
<surname>Schr&#xf6;der</surname> <given-names>B</given-names>
</name>
</person-group>. (<year>2010</year>). <article-title>Accentuation, Uncertainty and Exhaustivity: towards a model of pragmatic focus interpretation</article-title>, in: <conf-name>Proceedings of the 5th International Conference on Speech prosody</conf-name>, <conf-loc>Chicago, IL</conf-loc>, <conf-date>2010 May 11-14</conf-date>, Vol. <volume>100063</volume>, pp. <fpage>1</fpage>&#x2013;<lpage>4</lpage>.</citation>
</ref>
<ref id="B32">
<label>32</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Henrichsen</surname> <given-names>PJ</given-names>
</name>
<name>
<surname>Allwood</surname> <given-names>J</given-names>
</name>
</person-group>. (<year>2013</year>). <article-title>Predicting the attitude flow in dialogue based on multi-modal speech cues</article-title>, in: <conf-name>Proceedings of the Fourth Nordic Symposium on Multimodal Communication, NEALT Proceedings Series No. 21</conf-name>, <conf-loc>Gothenburg, Sweden. Gothenburg</conf-loc>, <conf-date>2013 Nov 15-16</conf-date>, pp. <fpage>47</fpage>&#x2013;<lpage>53</lpage>.</citation>
</ref>
<ref id="B33">
<label>33</label>
<citation citation-type="web">
<person-group person-group-type="author">
<collab>Vocal Tract Lab</collab>
</person-group>. <source>VocalTractLab</source> (<year>2022</year>). Available online at: <uri xlink:href="https://www.vocaltractlab.de">https://www.vocaltractlab.de</uri> (Accessed <access-date>August 28, 2024</access-date>).</citation>
</ref>
<ref id="B34">
<label>34</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Doi</surname> <given-names>H</given-names>
</name>
<name>
<surname>Fujisawa</surname> <given-names>TX</given-names>
</name>
<name>
<surname>Kanai</surname> <given-names>C</given-names>
</name>
<name>
<surname>Ohta</surname> <given-names>H</given-names>
</name>
<name>
<surname>Yokoi</surname> <given-names>H</given-names>
</name>
<name>
<surname>Iwana*-mi</surname> <given-names>A</given-names>
</name>
<etal/>
</person-group>. <article-title>Recognition of facial expressions and prosodic cues with graded emotional intensities in adults with Asperger syndrome</article-title>. <source>J Autism Dev Disord</source>. (<year>2013</year>) <volume>43</volume>:<page-range>2099&#x2013;113</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10803-013-1760-8</pub-id>
</citation>
</ref>
<ref id="B35">
<label>35</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hsu</surname> <given-names>C</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>S</surname>
</name>
</person-group>. (<year>2014</year>). <article-title>Can adolescents with autism perceive emotional prosody</article-title>? in: <conf-name>Proceedings of Interspeech 2014</conf-name>, <conf-loc>Singapore</conf-loc>: <conf-date>Speech Communication Association.</conf-date>, pp. <page-range>1924&#x2013;28</page-range>.</citation>
</ref>
<ref id="B36">
<label>36</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Birkholz</surname> <given-names>P</given-names>
</name>
<name>
<surname>Kr&#xf6;ger</surname> <given-names>BJ</given-names>
</name>
<name>
<surname>Neuschaefer-Rube</surname> <given-names>C</given-names>
</name>
</person-group>. (<year>2011</year>). <article-title>Synthesis of breathy, normal, and pressed phonation using a two-mass model with a triangular glottis</article-title>, in: <conf-name>Proceedings of Interspeech 2011</conf-name>, <conf-loc>Italy Florence</conf-loc>: <conf-date>Speech Communication Association.</conf-date>, pp. <page-range>2681&#x2013;84</page-range>.</citation>
</ref>
<ref id="B37">
<label>37</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lozier</surname> <given-names>L</given-names>
</name>
<name>
<surname>Vanmeter</surname> <given-names>J</given-names>
</name>
<name>
<surname>Marsh</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Impairments in facial affect recognition associated with autism spectrum disorders: A meta-analysis</article-title>. <source>Dev Psychopathol</source>. (<year>2014</year>) <volume>26</volume>:<page-range>933&#x2013;45</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1017/S0954579414000479</pub-id>
</citation>
</ref>
<ref id="B38">
<label>38</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Uljarevic</surname> <given-names>M</given-names>
</name>
<name>
<surname>Hamilton</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Recognition of emotions in autism: A formal meta-analysis</article-title>. <source>J Autism Dev Disord</source>. (<year>2012</year>) <volume>43</volume>:<page-range>1517&#x2013;26</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10803-012-1695-5</pub-id>
</citation>
</ref>
<ref id="B39">
<label>39</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scheerer</surname> <given-names>NE</given-names>
</name>
<name>
<surname>Shafai</surname> <given-names>F</given-names>
</name>
<name>
<surname>Stevenson</surname> <given-names>RA</given-names>
</name>
<name>
<surname>Iarocci</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>Affective prosody perception and the relation to social competence in autistic and typically developing children</article-title>. <source>J Abnorm Child Psychol</source>. (<year>2020</year>) <volume>48</volume>:<page-range>965&#x2013;75</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10802-020-00644-5</pub-id>
</citation>
</ref>
<ref id="B40">
<label>40</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lartseva</surname> <given-names>A</given-names>
</name>
<name>
<surname>Dijkstra</surname> <given-names>T</given-names>
</name>
<name>
<surname>Buitelaar</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Emotional language processing in Autism Spectrum Disorders: A systematic review</article-title>. <source>Front Hum Neurosci</source>. (<year>2014</year>) <volume>8</volume>:<elocation-id>991</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fnhum.2014.00991</pub-id>
</citation>
</ref>
<ref id="B41">
<label>41</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lui</surname> <given-names>M</given-names>
</name>
<name>
<surname>Lau</surname> <given-names>G</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Yuen</surname> <given-names>K</given-names>
</name>
<name>
<surname>Sommer</surname> <given-names>W</given-names>
</name>
</person-group>. <article-title>Strong relationship between rapid auditory processing and affective prosody recognition among adults with high autistic traits</article-title>. <source>J Autism Dev Disord</source>. (<year>2023</year>) <volume>53</volume>:<page-range>3180&#x2013;93</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10803-022-05600-4</pub-id>
</citation>
</ref>
<ref id="B42">
<label>42</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Bellinghausen</surname> <given-names>C</given-names>
</name>
<name>
<surname>Schr&#xf6;der</surname> <given-names>B</given-names>
</name>
<name>
<surname>Rau</surname> <given-names>R</given-names>
</name>
<name>
<surname>Fangmeier</surname> <given-names>T</given-names>
</name>
<name>
<surname>Dahmen</surname> <given-names>P</given-names>
</name>
<name>
<surname>Riedel</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Tebartz van Elst L Systematische Analyse von Studien zur Emotionswahrnehmung bei ASS mit Fokus auf der Analyse des sprachlichen Materials</article-title>. In: <source>Abstract book of the Wissenschaftliche Tagung Autismus Spektrum</source>. <publisher-name>Freiburg University</publisher-name>, <publisher-loc>Freiburg</publisher-loc> (<year>2022</year>).</citation>
</ref>
<ref id="B43">
<label>43</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ekman</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Universals and cultural differences in facial expressions of emotion</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Cole</surname> <given-names>J</given-names>
</name>
</person-group>, editor. <source>Nebraska Symposium on Motivation 1971</source>. <publisher-name>University of Nebraska Press</publisher-name>, <publisher-loc>Lincoln</publisher-loc> (<year>1972</year>). p. <page-range>207&#x2013;82</page-range>.</citation>
</ref>
<ref id="B44">
<label>44</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>M</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>S</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>H</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y</given-names>
</name>
</person-group>. <article-title>Recognition of affective prosody in autism spectrum conditions: A systematic review and meta-analysis</article-title>. <source>Autism</source>. (<year>2022</year>) <volume>26</volume>:<fpage>798</fpage>&#x2013;<lpage>813</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1177/1362361321995725</pub-id>
</citation>
</ref>
<ref id="B45">
<label>45</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Premack</surname> <given-names>D</given-names>
</name>
<name>
<surname>Woodruff</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>Does the chimpanzee have a theory of mind</article-title>? <source>Behav Brain Sci</source>. (<year>1978</year>) <volume>4</volume>:<page-range>515&#x2013;26</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1017/S0140525X00076512</pub-id>
</citation>
</ref>
<ref id="B46">
<label>46</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Lindeman</surname> <given-names>L</given-names>
</name>
</person-group>. <article-title>&#x201c;Propositional attitudes&#x201d;</article-title>, in: <source>The Internet Encyclopedia of Philosophy</source> (<year>2023</year>). Available online at: <uri xlink:href="https://iep.utm.edu/">https://iep.utm.edu/</uri> (Accessed <access-date>August 28, 2024</access-date>).</citation>
</ref>
<ref id="B47">
<label>47</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Cudney</surname> <given-names>PB</given-names>
</name>
</person-group>. <source>A propositional attitude approach to emotions. [dissertation]</source>. <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>Georgetown University</publisher-name> (<year>2018</year>).</citation>
</ref>
<ref id="B48">
<label>48</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Giannakidou</surname> <given-names>A</given-names>
</name>
<name>
<surname>Mari</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>"Chapter 7. Propositional attitudes of emotion: gradability and nonveridicality"</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Giannakidou</surname> <given-names>A</given-names>
</name>
<name>
<surname>Mari</surname> <given-names>A</given-names>
</name>
</person-group>, editors. <source>Truth and Veridicality in Grammar and Thought: Mood, Modality, and Propositional Attitudes</source>. <publisher-name>University of Chicago Press</publisher-name>, <publisher-loc>Chicago</publisher-loc> (<year>2021</year>). p. <fpage>273</fpage>&#x2013;<lpage>308</lpage>.</citation>
</ref>
<ref id="B49">
<label>49</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hirschberg</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>&#x201c;Pragmatics and prosody&#x201d;</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Huang</surname> <given-names>Y</given-names>
</name>
</person-group>, editor. <source>The Oxford Handbook of Pragmatics, Oxford Handbooks</source>. <publisher-name>Oxford Academic</publisher-name>, <publisher-loc>online edn</publisher-loc> (<year>2017</year>). p. <page-range>532&#x2013;49</page-range>. Available at: <uri xlink:href="https://academic.oup.com/edited-volume/28055">https://academic.oup.com/edited-volume/28055</uri>.</citation>
</ref>
<ref id="B50">
<label>50</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Oh</surname> <given-names>I</given-names>
</name>
</person-group>. <source>Modeling Believable Human-Computer Interaction with an Embodied Conversational Agent: Face-to-Face Communication of Uncertainty</source>. <publisher-loc>USA</publisher-loc>: <publisher-name>Rutgers University</publisher-name> (<year>2006</year>).</citation>
</ref>
<ref id="B51">
<label>51</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wollermann</surname> <given-names>C</given-names>
</name>
</person-group>. <source>Prosodie, nonverbale Signale, Unsicherheit und Kontext &#x2013; Studien zur pragmatischen Fokusinterpretation. [dissertation]</source>. <publisher-loc>Essen</publisher-loc>: <publisher-name>University of Duisburg-Essen</publisher-name> (<year>2012</year>).</citation>
</ref>
<ref id="B52">
<label>52</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rozin</surname> <given-names>P</given-names>
</name>
<name>
<surname>Cohen</surname> <given-names>AB</given-names>
</name>
</person-group>. <article-title>High frequency of facial expressions corresponding to confusion, concentration, and worry in an analysis of naturally occurring facial expressions of Americans</article-title>. <source>Emotion</source>. (<year>2003</year>) <volume>3</volume>:<fpage>68</fpage>&#x2013;<lpage>75</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1037/1528-3542.3.1.68</pub-id>
</citation>
</ref>
<ref id="B53">
<label>53</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kuhlthau</surname> <given-names>CC</given-names>
</name>
</person-group>. <source>Seeking Meaning: A Process Approach to Library and Information Services</source>. <publisher-loc>Norwood</publisher-loc>: <publisher-name>Ablex Publishing</publisher-name> (<year>1993</year>).</citation>
</ref>
<ref id="B54">
<label>54</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>van der Bles</surname> <given-names>M</given-names>
</name>
<name>
<surname>van der Linden</surname> <given-names>S</given-names>
</name>
<name>
<surname>Freeman</surname> <given-names>ALJ</given-names>
</name>
<name>
<surname>Mitchell</surname> <given-names>J</given-names>
</name>
<name>
<surname>Galvao</surname> <given-names>AB</given-names>
</name>
<name>
<surname>Zaval</surname> <given-names>L</given-names>
</name>
<etal/>
</person-group>. <article-title>Communicating uncertainty about facts, numbers and science</article-title>. <source>R Soc Open Sci</source>. (<year>2019</year>) <volume>6</volume>:<elocation-id>181870</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1098/rsos.181870</pub-id>
</citation>
</ref>
<ref id="B55">
<label>55</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carruthers</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Are epistemic emotions metacognitive</article-title>? <source>Philos Psychol</source>. (<year>2017</year>) <volume>30</volume>:<fpage>58</fpage>&#x2013;<lpage>78</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/09515089.2016.1262536</pub-id>
</citation>
</ref>
<ref id="B56">
<label>56</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wollermann</surname> <given-names>C</given-names>
</name>
<name>
<surname>Schr&#xf6;der</surname> <given-names>B</given-names>
</name>
<name>
<surname>SChade</surname> <given-names>U</given-names>
</name>
</person-group>. <article-title>Audiovisual prosody of uncertainty: an overview</article-title>. <source>J Theory Res Educ</source>. (<year>2014</year>) <volume>9</volume>:<page-range>137&#x2013;57</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.6092/issn.1970-2221/4295</pub-id>
</citation>
</ref>
<ref id="B57">
<label>57</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smith</surname> <given-names>VL</given-names>
</name>
<name>
<surname>Clark</surname> <given-names>HH</given-names>
</name>
</person-group>. <article-title>On the course of answering questions</article-title>. <source>J Mem Lang</source>. (<year>1993</year>) <volume>32</volume>:<fpage>25</fpage>&#x2013;<lpage>38</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1006/jmla.1993.1002</pub-id>
</citation>
</ref>
<ref id="B58">
<label>58</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hart</surname> <given-names>JT</given-names>
</name>
</person-group>. <article-title>Memory and the feeling-of-knowing experience</article-title>. <source>J Educ Psychol</source>. (<year>1967</year>) <volume>56</volume>:<page-range>208&#x2013;16</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1037/h0022263</pub-id>
</citation>
</ref>
<ref id="B59">
<label>59</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brennan</surname> <given-names>SE</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>The feeling of another&#x2019;s knowing: Prosody and filled pauses as cues to listeners about the metacognitive states of speakers</article-title>. <source>J Mem Lang</source>. (<year>1995</year>) <volume>34</volume>:<page-range>383&#x2013;98</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1006/jmla.1995.1017</pub-id>
</citation>
</ref>
<ref id="B60">
<label>60</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Swerts</surname> <given-names>M</given-names>
</name>
<name>
<surname>Krahmer</surname> <given-names>E</given-names>
</name>
</person-group>. <article-title>Audiovisual prosody and feeling of knowing</article-title>. <source>J Mem Lang</source>. (<year>2005</year>) <volume>53</volume>:<fpage>81</fpage>&#x2013;<lpage>94</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jml.2005.02.003</pub-id>
</citation>
</ref>
<ref id="B61">
<label>61</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krahmer</surname> <given-names>E</given-names>
</name>
<name>
<surname>Swerts</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>How children and adults produce and perceive uncertainty in audiovisual speech</article-title>. <source>Lang Speech</source>. (<year>2005</year>) <volume>48</volume>:<fpage>29</fpage>&#x2013;<lpage>53</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1177/00238309050480010201</pub-id>
</citation>
</ref>
<ref id="B62">
<label>62</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heyes</surname> <given-names>CM</given-names>
</name>
<name>
<surname>Frith</surname> <given-names>CD</given-names>
</name>
</person-group>. <article-title>The cultural evolution of mind reading</article-title>. <source>Science</source>. (<year>2014</year>) <volume>344</volume>:<elocation-id>1243091</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.1243091</pub-id>
</citation>
</ref>
<ref id="B63">
<label>63</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Astington</surname> <given-names>JW</given-names>
</name>
<name>
<surname>Dack</surname> <given-names>LA</given-names>
</name>
</person-group>. <article-title>Theory of mind</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Benson</surname> <given-names>JB</given-names>
</name>
</person-group>, editor. <source>Encyclopedia of Infant and Early Childhood Development</source>. <publisher-loc>Amsterdam:</publisher-loc> <publisher-name>Elsevier</publisher-name> (<year>2008</year>). p. <page-range>365&#x2013;79</page-range>. Available at: <uri xlink:href="https://www.sciencedirect.com/referencework/9780128165119/encyclopedia-of-infant-and-early-childhood-development#book-info">https://www.sciencedirect.com/referencework/9780128165119/encyclopedia-of-infant-and-early-childhood-development#book-info</uri>.</citation>
</ref>
<ref id="B64">
<label>64</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Leslie</surname> <given-names>AM</given-names>
</name>
</person-group>. <article-title>Theory of mind</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Smelser</surname> <given-names>NJ</given-names>
</name>
<name>
<surname>Baltes</surname> <given-names>PB</given-names>
</name>
</person-group>, editors. <source>International Encyclopedia of the Social &amp; Behavioral Sciences</source>. <publisher-loc>Amsterdam:</publisher-loc> <publisher-name>Elsevier</publisher-name> (<year>2001</year>). p. <page-range>15652&#x2013;6</page-range>. Available at: <uri xlink:href="https://www.sciencedirect.com/referencework/9780080430768/international-encyclopedia-of-the-social-and-behavioral-sciences#book-info">https://www.sciencedirect.com/referencework/9780080430768/international-encyclopedia-of-the-social-and-behavioral-sciences#book-info</uri>.</citation>
</ref>
<ref id="B65">
<label>65</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kamp-Becker</surname> <given-names>I</given-names>
</name>
<name>
<surname>B&#xf6;lte</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Autismus</article-title>. <source>M&#xfc;nchen: Ernst Reinhardt Verlag 3. vollst. &#xfc;berarb. Aufl</source>. (<year>2021</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1026/1616-3443/a000477</pub-id>
</citation>
</ref>
<ref id="B66">
<label>66</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baron-Cohen</surname> <given-names>S</given-names>
</name>
<name>
<surname>Leslie</surname> <given-names>AM</given-names>
</name>
<name>
<surname>Frith</surname> <given-names>U</given-names>
</name>
</person-group>. <article-title>Does the autistic child have a &#x2018;theory of mind&#x2019;</article-title>? <source>Cognition</source>. (<year>1985</year>) <volume>21</volume>:<fpage>37</fpage>&#x2013;<lpage>46</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/0010-0277(85)90022-8</pub-id>
</citation>
</ref>
<ref id="B67">
<label>67</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Happ&#xe9;</surname> <given-names>FGE</given-names>
</name>
</person-group>. <article-title>The role of age and verbal ability in the theory of mind task performance of subjects with autism (1995) child development</article-title>. <source>Child Dev</source>. (<year>1995</year>) <volume>66</volume>(<issue>3</issue>):<page-range>843&#x2013;55</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/1131954</pub-id>
</citation>
</ref>
<ref id="B68">
<label>68</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wimmer</surname> <given-names>H</given-names>
</name>
<name>
<surname>Perner</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Beliefs about beliefs: Representation and constraining function of wrong beliefs in young children&#x2019;s understanding of deception</article-title>. <source>Cognition</source>. (<year>1983</year>) <volume>13</volume>:<page-range>103&#x2013;28</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/0010-0277(83)90004-5</pub-id>
</citation>
</ref>
<ref id="B69">
<label>69</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gabriel</surname> <given-names>TE</given-names>
</name>
<name>
<surname>Oberger</surname> <given-names>R</given-names>
</name>
<name>
<surname>Schmoeger</surname> <given-names>M</given-names>
</name>
<name>
<surname>Deckert</surname> <given-names>M</given-names>
</name>
<name>
<surname>Vockh</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Cognitive and affective Theory of Mind in adolescence: developmental aspects and associated neuropsychological variables</article-title>. <source>psychol Res</source>. (<year>2021</year>) <volume>85</volume>:<page-range>533&#x2013;53</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00426-019-01263-6</pub-id>
</citation>
</ref>
<ref id="B70">
<label>70</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chevallier</surname> <given-names>C</given-names>
</name>
</person-group>. <article-title>"Theory of mind"</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Volkmar</surname> <given-names>FR</given-names>
</name>
</person-group>, editor. <source>Encyclopedia of autism spectrum disorders</source>. <publisher-name>Springer International Publishing</publisher-name>, <publisher-loc>Cham</publisher-loc> (<year>2021</year>).</citation>
</ref>
<ref id="B71">
<label>71</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tager-Flusberg</surname> <given-names>H</given-names>
</name>
<name>
<surname>Sullivan</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>A second look at second-order belief attribution in autism</article-title>. <source>J Autism Dev Disord</source>. (<year>1994</year>) <volume>24</volume>:<page-range>577&#x2013;86</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/BF02172139</pub-id>
</citation>
</ref>
<ref id="B72">
<label>72</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Iao</surname> <given-names>LS</given-names>
</name>
<name>
<surname>Leekam</surname> <given-names>SR</given-names>
</name>
</person-group>. <article-title>Nonspecificity and theory of mind: new evidence from a nonverbal false-sign task and children with autism spectrum disorders</article-title>. <source>J Exp Child Psychol</source>. (<year>2014</year>) <volume>122</volume>:<fpage>1</fpage>&#x2013;<lpage>20</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jecp.2013.11.017</pub-id>
</citation>
</ref>
<ref id="B73">
<label>73</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shamay-Tsoory</surname> <given-names>SG</given-names>
</name>
<name>
<surname>Aharon-Peretz</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Dissociable prefrontal networks for cognitive and affective theory of mind: A lesion study</article-title>. <source>Neuropsychologia</source>. (<year>2007</year>) <volume>45</volume>:<page-range>3054&#x2013;67</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cortex.2009.04.008</pub-id>
</citation>
</ref>
<ref id="B74">
<label>74</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raimo</surname> <given-names>S</given-names>
</name>
<name>
<surname>Cropano</surname> <given-names>M</given-names>
</name>
<name>
<surname>Rold&#xe1;n-Tapia</surname> <given-names>MD</given-names>
</name>
<name>
<surname>Ammendola</surname> <given-names>L</given-names>
</name>
<name>
<surname>Malangone</surname> <given-names>D</given-names>
</name>
<name>
<surname>Santangelo</surname> <given-names>G</given-names>
</name>
</person-group>. <article-title>Cognitive and affective theory of mind across adulthood</article-title>. <source>Brain Sci</source>. (<year>2022</year>) <volume>12</volume>:<elocation-id>899</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/brainsci12070899</pub-id>
</citation>
</ref>
<ref id="B75">
<label>75</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Begeer</surname> <given-names>S</given-names>
</name>
<name>
<surname>De Rosnay</surname> <given-names>M</given-names>
</name>
<name>
<surname>Lunenburg</surname> <given-names>P</given-names>
</name>
<name>
<surname>Stegge</surname> <given-names>H</given-names>
</name>
<name>
<surname>Terwogt</surname> <given-names>MM</given-names>
</name>
</person-group>. <article-title>Understanding of emotions based on counterfactual reasoning in children with autism spectrum disorders</article-title>. <source>Autism</source>. (<year>2014</year>) <volume>18</volume>:<page-range>301&#x2013;10</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1177/1362361312468798</pub-id>
</citation>
</ref>
<ref id="B76">
<label>76</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Epstude</surname> <given-names>K</given-names>
</name>
<name>
<surname>Roese</surname> <given-names>NJ</given-names>
</name>
</person-group>. <article-title>The functional theory of counterfactual thinking</article-title>. <source>Pers Soc Psychol Rev</source>. (<year>2008</year>) <volume>12</volume>:<page-range>168&#x2013;92</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1177/1088868308316091</pub-id>
</citation>
</ref>
<ref id="B77">
<label>77</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scheeren</surname> <given-names>AM</given-names>
</name>
<name>
<surname>de Rosnay</surname> <given-names>M</given-names>
</name>
<name>
<surname>Koot</surname> <given-names>HM</given-names>
</name>
<name>
<surname>Begeer</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Rethinking theory of mind in high-functioning autism spectrum disorder</article-title>. <source>J Child Psychol Psychiatry Allied Discip</source>. (<year>2013</year>) <volume>54</volume>:<page-range>628&#x2013;35</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/jcpp.12007</pub-id>
</citation>
</ref>
<ref id="B78">
<label>78</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kimhi</surname> <given-names>Y</given-names>
</name>
</person-group>. <article-title>Theory of mind abilities and deficits in autism spectrum disorders</article-title>. <source>Top Lang Disord</source>. (<year>2014</year>) <volume>34</volume>:<page-range>329&#x2013;43</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/TLD.0000000000000033</pub-id>
</citation>
</ref>
<ref id="B79">
<label>79</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meylan</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Epistemic emotions: a natural kind</article-title>? <source>Philos Inquiries</source>. (<year>2014</year>) <volume>2</volume>:<page-range>173&#x2013;90</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.4454/philinq.v2i1.83</pub-id>
</citation>
</ref>
<ref id="B80">
<label>80</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Silva</surname> <given-names>L</given-names>
</name>
</person-group>. <article-title>Epistemic emotions justified</article-title>. <source>Philosophies</source>. (<year>2022</year>) <volume>7</volume>:<elocation-id>104</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/philosophies7050104</pub-id>
</citation>
</ref>
<ref id="B81">
<label>81</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Morriss</surname> <given-names>J</given-names>
</name>
<name>
<surname>Tupitsa</surname> <given-names>E</given-names>
</name>
<name>
<surname>Dodd</surname> <given-names>HF</given-names>
</name>
<name>
<surname>Hirsch</surname> <given-names>CR</given-names>
</name>
</person-group>. <article-title>Uncertainty makes me emotional: uncertainty as an elicitor and modulator of emotional states</article-title>. <source>Front Psychol</source>. (<year>2022</year>) <volume>13</volume>:<elocation-id>777025</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpsyg.2022.777025</pub-id>
</citation>
</ref>
<ref id="B82">
<label>82</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baron-Cohen</surname> <given-names>S</given-names>
</name>
<name>
<surname>Wheelwright</surname> <given-names>S</given-names>
</name>
<name>
<surname>Hill</surname> <given-names>J</given-names>
</name>
<name>
<surname>Raste</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Plumb</surname> <given-names>I</given-names>
</name>
</person-group>. <article-title>The "Reading the mind in the eyes" Test revised version: A study with normal adults, and adults with asperger syndrome or high-functioning autism</article-title>. <source>J Child Psychol Psychiatry</source>. (<year>2001</year>) <volume>42</volume>:<page-range>241&#x2013;51</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1017/S0021963001006643</pub-id>
</citation>
</ref>
<ref id="B83">
<label>83</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pe&#xf1;uelas-Calvo</surname> <given-names>I</given-names>
</name>
<name>
<surname>Sareen</surname> <given-names>A</given-names>
</name>
<name>
<surname>Sevilla-Llewellyn-Jones</surname> <given-names>J</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez-Berrocal</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>The &#x201c;Reading the mind in the eyes&#x201d; Test in autism-spectrum disorders comparison with healthy controls: A systematic review and meta-analysis</article-title>. <source>J Autism Dev Disord</source>. (<year>2018</year>) <volume>49</volume>:<page-range>1048&#x2013;61</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10803-018-3814-4</pub-id>
</citation>
</ref>
<ref id="B84">
<label>84</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Livingston</surname> <given-names>LA</given-names>
</name>
<name>
<surname>Colvert</surname> <given-names>E</given-names>
</name>
<collab>the Social Relationships Study Team</collab>
<name>
<surname>Bolton</surname> <given-names>P</given-names>
</name>
<name>
<surname>Happe</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Good social skills despite poor theory of mind: exploring compensation in autism spectrum disorder</article-title>. <source>J Child Psychol Psychiatry</source>. (<year>2019</year>) <volume>60</volume>:<page-range>102&#x2013;10</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/jcpp.12886</pub-id>
</citation>
</ref>
<ref id="B85">
<label>85</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Corbett</surname> <given-names>BA</given-names>
</name>
<name>
<surname>Schwartzman</surname> <given-names>JM</given-names>
</name>
<name>
<surname>Libsack</surname> <given-names>EJ</given-names>
</name>
<name>
<surname>Muscatello</surname> <given-names>RA</given-names>
</name>
<name>
<surname>Lerner</surname> <given-names>MD</given-names>
</name>
<name>
<surname>Simmons</surname> <given-names>GL</given-names>
</name>
<etal/>
</person-group>. <article-title>Camouflaging in autism: examining sex-based and compensatory models in social cognition and communication</article-title>. <source>Autism Res</source>. (<year>2021</year>) <volume>14</volume>:<page-range>127&#x2013;42</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/aur.2440</pub-id>
</citation>
</ref>
<ref id="B86">
<label>86</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Rose</surname> <given-names>RL</given-names>
</name>
</person-group>. (<year>2017</year>). <article-title>Silent and filled pauses and speech planning in first and second language production</article-title>, in: <conf-name>Proceedings of the Workshop on Disfluency in Spontaneous Speech</conf-name>, <conf-loc>Royal Institute of Technology: Stockholm Sweden</conf-loc>, <conf-date>2017 18&#x2013;19 August</conf-date>.</citation>
</ref>
<ref id="B87">
<label>87</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Fox Tree</surname> <given-names>JE</given-names>
</name>
</person-group>. <article-title>Disfluencies in spoken language</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Nadel</surname> <given-names>L</given-names>
</name>
</person-group>, editor. <source>Encyclopedia of Cognitive Science</source>, vol. <volume>1</volume> . <publisher-name>Nature Publishing Group</publisher-name>, <publisher-loc>London</publisher-loc> (<year>2003</year>). p. <page-range>983&#x2013;6</page-range>.</citation>
</ref>
<ref id="B88">
<label>88</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Belz</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Defining filler particles: A phonetic account of the terminology, form, and grammatical classification of &#x201c;Filled pauses</article-title>. <source>Languages</source>. (<year>2023</year>) <volume>8</volume>:<elocation-id>57</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/languages8010057</pub-id>
</citation>
</ref>
<ref id="B89">
<label>89</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Be&#x148;u&#x161;</surname> <given-names>&#x160;</given-names>
</name>
</person-group>. <article-title>Variability and stability in collaborative dialogues: turn-taking and filled pauses</article-title>. In: <source>Proceedings of Interspeech 2009</source>. <publisher-name>Brighton</publisher-name>, <publisher-loc>UK</publisher-loc> (<year>2009</year>). p. <page-range>796&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21437/Interspeech.2009-180</pub-id>
</citation>
</ref>
<ref id="B90">
<label>90</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Gyarmathy</surname> <given-names>D</given-names>
</name>
<name>
<surname>Horv&#xe1;th</surname> <given-names>V</given-names>
</name>
</person-group>. (<year>2019</year>). <article-title>Pausing strategies with regard to speech style</article-title>, in: <conf-name>Proceedings of the Workshop on Disfluency in Spontaneous Speech 2019</conf-name>, <conf-loc>Budapest: E&#xf6;tv&#xf6;s Lor&#xe1;nd University</conf-loc>, pp. <fpage>27</fpage>&#x2013;<lpage>30</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21862/diss-09-008-gyar-horv</pub-id>
</citation>
</ref>
<ref id="B91">
<label>91</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Belz</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Die Phonetik von &#xe4;h und &#xe4;hm</article-title>. In: <source>Akustische Variation von F&#xfc;llpartikeln im Deutschen</source>. <publisher-name>J.B. Metzler</publisher-name>, <publisher-loc>Berlin</publisher-loc> (<year>2021</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-662-62812-6</pub-id>
</citation>
</ref>
<ref id="B92">
<label>92</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wehrle</surname> <given-names>S</given-names>
</name>
<name>
<surname>Grice</surname> <given-names>M</given-names>
</name>
<name>
<surname>Vogeley</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>Filled Pauses Produced by Autistic Adults Differ in Prosodic Realization, but not Rate or Lexical Type</article-title>. <source>J Autism Dev Disord</source>. (<year>2023</year>) <volume>54</volume>:<page-range>2513&#x2013;25</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10803-023-06000-y</pub-id>
</citation>
</ref>
<ref id="B93">
<label>93</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Betz</surname> <given-names>S</given-names>
</name>
<name>
<surname>Zarrie&#xdf;</surname> <given-names>S</given-names>
</name>
<name>
<surname>Sz&#xe9;kely</surname> <given-names>&#xc9;</given-names>
</name>
<name>
<surname>Wagner</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>The greennn tree &#x2014; Lengthening position influences uncertainty perception</article-title>. <source>Proc Interspeech</source>. (<year>2019</year>) <volume>2019)</volume>:<page-range>3990&#x2013;4</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21437/Interspeech.2019-2572</pub-id>
</citation>
</ref>
<ref id="B94">
<label>94</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lickley</surname> <given-names>RJ</given-names>
</name>
</person-group>. <article-title>Fluency and disfluency</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>Redford</surname> <given-names>MA</given-names>
</name>
</person-group>, editor. <source>The Handbook of Speech Production</source>. <publisher-name>Wiley Online Library</publisher-name>, <publisher-loc>Chichester</publisher-loc> (<year>2015</year>). p. <page-range>445&#x2013;74</page-range>.</citation>
</ref>
<ref id="B95">
<label>95</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tits</surname> <given-names>N</given-names>
</name>
</person-group>. <article-title>Controlling the emotional expressiveness of synthetic speech: a deep learning approach</article-title>. <source>4OR</source>. (<year>2022</year>) <volume>20</volume>:<page-range>165&#x2013;66</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10288-021-00473-2</pub-id>
</citation>
</ref>
<ref id="B96">
<label>96</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Murray</surname> <given-names>IR</given-names>
</name>
<name>
<surname>Arnott</surname> <given-names>JL</given-names>
</name>
</person-group>. (<year>1996</year>). <article-title>Synthesizing emotions in speech: is it time to get excited</article-title>?, in: <conf-name>Proceedings of the International Conference on Spoken Language Processing</conf-name>, <conf-loc>Philadelphia, PA. New Castle, Delaware: University of Delaware</conf-loc>, <conf-date>1996 Oct 3-6</conf-date>, pp. <page-range>1816&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21437/ICSLP.1996-461</pub-id>
</citation>
</ref>
<ref id="B97">
<label>97</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Marsi</surname> <given-names>E</given-names>
</name>
<name>
<surname>Rooden</surname> <given-names>Fv</given-names>
</name>
</person-group>. (<year>2007</year>). <article-title>Expressing uncertainty with a talking head in a multimodal question-answering system</article-title>, in: <conf-name>Proceedings of the Workshop on Multimodal Output Generation</conf-name>, <conf-loc>University of Aberdeen, United Kingdom. Enschede, Netherlands: Centre for Telematics and Information Technology</conf-loc>, <conf-date>2007 Jan 25-26</conf-date>, pp. <page-range>105&#x2013;16</page-range>.</citation>
</ref>
<ref id="B98">
<label>98</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>&#x15a;zekely</surname> <given-names>E</given-names>
</name>
<name>
<surname>Mendelson</surname> <given-names>J</given-names>
</name>
<name>
<surname>Gustafson</surname> <given-names>J</given-names>
</name>
</person-group>. (<year>2017</year>). <article-title>Synthesizing uncertainty: The interplay of vocal effort and hesitation disfluencies</article-title>, in: <conf-name>Proceedings of Interspeech</conf-name>, <conf-loc>Stockholm, Sweden</conf-loc>, <conf-date>2017 August 20-24</conf-date>, pp. <page-range>804&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21437/Interspeech.2017-1507</pub-id>
</citation>
</ref>
<ref id="B99">
<label>99</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Adell</surname> <given-names>J</given-names>
</name>
<name>
<surname>Bonafonte</surname> <given-names>A</given-names>
</name>
<name>
<surname>Escudero-Mancebo</surname> <given-names>D</given-names>
</name>
</person-group>. (<year>2010</year>). <article-title>Modelling. Filled pauses prosody to synthesize disfluent speech</article-title>, in: <conf-name>Proceedings of Speech Prosody</conf-name>, <conf-loc>Chicago, IL</conf-loc>, <conf-date>2010, May 11-14</conf-date>, Vol. <volume>. 100624</volume>, pp. <fpage>1</fpage>&#x2013;<lpage>4</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21437/SpeechProsody.2010-75</pub-id>
</citation>
</ref>
<ref id="B100">
<label>100</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Andersson</surname> <given-names>S</given-names>
</name>
<name>
<surname>Georgila</surname> <given-names>K</given-names>
</name>
<name>
<surname>Traum</surname> <given-names>D</given-names>
</name>
<name>
<surname>Aylett</surname> <given-names>M</given-names>
</name>
<name>
<surname>Clark</surname> <given-names>RAJ</given-names>
</name>
</person-group>. (<year>2010</year>). <article-title>Prediction and realization of conversational characteristics by utilizing spontaneous speech for unit selection</article-title>, in: <conf-name>Proceedings of Speech Prosody</conf-name>, <conf-loc>Chicago, IL</conf-loc>, <conf-date>2010; May 11-14</conf-date>, Vol. <volume>. 100116</volume>, pp. <fpage>1</fpage>&#x2013;<lpage>4</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21437/SpeechProsody.2010-89</pub-id>
</citation>
</ref>
<ref id="B101">
<label>101</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Campbell</surname> <given-names>N</given-names>
</name>
<name>
<surname>Black</surname> <given-names>AW</given-names>
</name>
</person-group>. <article-title>Prosody and the selection of source units for concatenative synthesis</article-title>. In: <person-group person-group-type="editor">
<name>
<surname>van Santen</surname> <given-names>JPH</given-names>
</name>
<name>
<surname>Sproat</surname> <given-names>RW</given-names>
</name>
<name>
<surname>Olive</surname> <given-names>JP</given-names>
</name>
<name>
<surname>Hirschberg</surname> <given-names>J</given-names>
</name>
</person-group>, editors. <source>Progress in speech synthesis</source>. <publisher-name>Springer Verlag</publisher-name>, <publisher-loc>New York</publisher-loc> (<year>1996</year>). p. <page-range>279&#x2013;92</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-1-4612-1894-4_22</pub-id>
</citation>
</ref>
<ref id="B102">
<label>102</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>H&#xf6;nemann</surname> <given-names>A</given-names>
</name>
<name>
<surname>Wagner</surname> <given-names>P</given-names>
</name>
</person-group>. (<year>2016</year>). <article-title>Synthesizing attitudes in german</article-title>, in: <conf-name>Proceedings of the 16th Speech Science and Technology Conference 2016</conf-name>, <conf-loc>Parramatta; Australia. Australia: Australasian Speech Science and Technology Association</conf-loc>, <conf-date>2016 December 6-9</conf-date>, pp. <page-range>209&#x2013;13</page-range>.</citation>
</ref>
<ref id="B103">
<label>103</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Betz</surname> <given-names>S</given-names>
</name>
</person-group>. <source>Hesitations in Spoken Dialogue Systems. [dissertation]</source>. <publisher-loc>Bielefeld</publisher-loc>: <publisher-name>Universit&#xe4;t Bielefeld</publisher-name> (<year>2020</year>).</citation>
</ref>
<ref id="B104">
<label>104</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Dall</surname> <given-names>R</given-names>
</name>
<name>
<surname>Yamagishi</surname> <given-names>J</given-names>
</name>
<name>
<surname>King</surname> <given-names>S</given-names>
</name>
</person-group>. (<year>2014</year>). <article-title>Rating naturalness in speech synthesis: the effect of style and expectation</article-title>, in: <conf-name>Proceedings of Speech Prosody 2014</conf-name>, <conf-loc>Dublin Ireland</conf-loc>, <conf-date>May 20-23</conf-date>, pp. <page-range>1012&#x2013;16</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21437/SpeechProsody.2014-192</pub-id>
</citation>
</ref>
<ref id="B105">
<label>105</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wagner</surname> <given-names>P</given-names>
</name>
<name>
<surname>Beskow</surname> <given-names>J</given-names>
</name>
<name>
<surname>Betz</surname> <given-names>S</given-names>
</name>
<name>
<surname>Edlund</surname> <given-names>J</given-names>
</name>
<name>
<surname>Gustafson</surname> <given-names>J</given-names>
</name>
<name>
<surname>Eje Henter</surname> <given-names>G</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Speech synthesis evaluation &#x2014; State-of-the-art assessment and suggestion for a novel research program</article-title>, in: <conf-name>Proc. 10th ISCA Workshop on Speech Synthesis (SSW 10)</conf-name>, <conf-loc>Vienna, Austria</conf-loc>, <conf-date>September 20-22</conf-date>. pp. <page-range>105&#x2013;10</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21437/SSW.2019-19</pub-id>
</citation>
</ref>
<ref id="B106">
<label>106</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lasarcyk</surname> <given-names>E</given-names>
</name>
<name>
<surname>Wollermann</surname> <given-names>C</given-names>
</name>
<name>
<surname>Schr&#xf6;der</surname> <given-names>B</given-names>
</name>
<name>
<surname>SChade</surname> <given-names>U</given-names>
</name>
</person-group>. (<year>2013</year>). <article-title>On the modelling of prosodic cues in synthetic speech: what are the effects on perceived uncertainty and naturalness</article-title>?, in: <conf-name>Proceedings of the 10th International Workshop on Natural Language Processing and Cognitive Science</conf-name>, <conf-loc>Marseille, France. International Speech Communication Association</conf-loc>, <conf-date>2013 October 15-16</conf-date>, pp. <page-range>117&#x2013;28</page-range>.</citation>
</ref>
<ref id="B107">
<label>107</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wollermann</surname> <given-names>C</given-names>
</name>
<name>
<surname>Lasarcyk</surname> <given-names>E</given-names>
</name>
<name>
<surname>SChade</surname> <given-names>U</given-names>
</name>
<name>
<surname>Schr&#xf6;der</surname> <given-names>B</given-names>
</name>
</person-group>. (<year>2013</year>). <article-title>Disfluencies and uncertainty perception - evidence from a human-machine scenario</article-title>, in: <conf-name>Proceedings of Diss 2013 the 6th Workshop on Disfluency in Spontaneous Speech</conf-name>, <conf-loc>Stockholm, Sweden. Sweden: Department of Speech, Music and Hearing, KTH</conf-loc>, <conf-date>2013 August 21-23</conf-date>, pp. <page-range>73&#x2013;6</page-range>.</citation>
</ref>
<ref id="B108">
<label>108</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wollermann</surname> <given-names>C</given-names>
</name>
<name>
<surname>Lasarcyk</surname> <given-names>E</given-names>
</name>
</person-group>. (<year>2007</year>). <article-title>Modeling and perceiving of (Un)Certainty in articulatory speech synthesis</article-title>, in: <conf-name>Proceedings of the 6th ISCA Workshop on Speech Synthesis</conf-name>, <conf-loc>Bonn, Germany. Pittsburgh: International Speech Communication Association</conf-loc>, <conf-date>2007 August 22-24</conf-date>. pp. <page-range>40&#x2013;5</page-range>.</citation>
</ref>
<ref id="B109">
<label>109</label>
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bellinghausen</surname> <given-names>C</given-names>
</name>
<name>
<surname>Fangmeier</surname> <given-names>T</given-names>
</name>
<name>
<surname>Schr&#xf6;der</surname> <given-names>B</given-names>
</name>
<name>
<surname>Keller</surname> <given-names>J</given-names>
</name>
<name>
<surname>Drechsel</surname> <given-names>S</given-names>
</name>
<name>
<surname>Birkholz</surname> <given-names>P</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>On the role of disfluent speech for uncertainty in articulatory speech synthesis</article-title>, in: <conf-name>Proceedings of DiSS 2019: The 9th Workshop on Disfluency in Spontaneous Speech</conf-name>, <conf-loc>Budapest, Hungary. Hungary: E&#xf6;tv&#xf6;s Lor&#xe1;nd University (ELTE</conf-loc>, <conf-date>2019 September 12-13</conf-date>, pp. <fpage>39</fpage>&#x2013;<lpage>42</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21862/diss-09-011-bell-etal</pub-id>
</citation>
</ref>
<ref id="B110">
<label>110</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Birkholz</surname> <given-names>P</given-names>
</name>
</person-group>. <article-title>Modeling consonant-vowel coarticulation for articulatory speech synthesis</article-title>. <source>PloS One</source>. (<year>2013</year>) <volume>8</volume>:<elocation-id>e60603</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0060603</pub-id>
</citation>
</ref>
<ref id="B111">
<label>111</label>
<citation citation-type="web">
<person-group person-group-type="author">
<collab>Praat</collab>
</person-group>. <source>Praat doing phonetics by computer</source> (<year>2023</year>). Available online at: <uri xlink:href="https://www.fon.hum.uva.nl/praat/">https://www.fon.hum.uva.nl/praat/</uri> (Accessed <access-date>August 28, 2024</access-date>).</citation>
</ref>
<ref id="B112">
<label>112</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Field</surname> <given-names>AP</given-names>
</name>
<name>
<surname>Wilcox</surname> <given-names>RR</given-names>
</name>
</person-group>. <article-title>Robust statistical methods: A primer for clinical psychology and experimental psychopathology researchers</article-title>. <source>Behav Res Ther</source>. (<year>2017</year>) <volume>98</volume>:<fpage>19</fpage>&#x2013;<lpage>38</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.brat.2017.05.013</pub-id>
</citation>
</ref>
<ref id="B113">
<label>113</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mair</surname> <given-names>P</given-names>
</name>
<name>
<surname>Wilcox</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>Robust statistical methods in R using the WRS2 package</article-title>. <source>Behav Res Methods</source>. (<year>2020</year>) <volume>52</volume>:<page-range>464&#x2013;88</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3758/s13428-019-01246-w</pub-id>
</citation>
</ref>
<ref id="B114">
<label>114</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wilcox</surname> <given-names>RR</given-names>
</name>
</person-group>. <source>Introduction to robust Estimation and Hypothesis Testing</source>. <edition>5th ed</edition>. <publisher-loc>Elsevier</publisher-loc>: <publisher-name>Academic Press</publisher-name> (<year>2022</year>).</citation>
</ref>
<ref id="B115">
<label>115</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilcox</surname> <given-names>RR</given-names>
</name>
<name>
<surname>Rousselet</surname> <given-names>GA</given-names>
</name>
</person-group>. <article-title>An updated guide to robust statistical methods in neuroscience</article-title>. <source>Curr Protoc</source>. (<year>2023</year>) <volume>3</volume>:<fpage>1</fpage>&#x2013;<lpage>31</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cpz1.719</pub-id>
</citation>
</ref>
<ref id="B116">
<label>116</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>X</given-names>
</name>
</person-group>. <article-title>Neural text-to-speech synthesis</article-title>. In: <source>Artificial Intelligence: Foundations, Theory, and Algorithms</source>. <publisher-name>Springer Nature</publisher-name>, <publisher-loc>Singapore</publisher-loc> (<year>2023</year>).</citation>
</ref>
<ref id="B117">
<label>117</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname> <given-names>H</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y</given-names>
</name>
</person-group>. <article-title>Speech prosody in mental disorders</article-title>. <source>Annu Rev Linguistics</source>. (<year>2023</year>) <volume>9</volume>:<page-range>335&#x2013;55</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1146/annurev-linguistics-030421-065139</pub-id>
</citation>
</ref>
<ref id="B118">
<label>118</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Robin</surname> <given-names>J</given-names>
</name>
<name>
<surname>Harrison</surname> <given-names>JE</given-names>
</name>
<name>
<surname>Kaufman</surname> <given-names>LD</given-names>
</name>
<name>
<surname>Rudzicz</surname> <given-names>F</given-names>
</name>
<name>
<surname>Simpson</surname> <given-names>W</given-names>
</name>
<name>
<surname>Yancheva</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Evaluation of speech-based digital biomarkers: review and recommendations</article-title>. <source>Digit biomark</source>. (<year>2020</year>) <volume>4</volume>:<fpage>99</fpage>&#x2013;<lpage>108</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1159/000510820</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>