<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="brief-report" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Virtual Real.</journal-id>
<journal-title>Frontiers in Virtual Reality</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Virtual Real.</abbrev-journal-title>
<issn pub-type="epub">2673-4192</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1377210</article-id>
<article-id pub-id-type="doi">10.3389/frvir.2024.1377210</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Virtual Reality</subject>
<subj-group>
<subject>Brief Research Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Towards believable and educational conversations with virtual patients</article-title>
<alt-title alt-title-type="left-running-head">Graf et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frvir.2024.1377210">10.3389/frvir.2024.1377210</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Graf</surname>
<given-names>Linda</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2607837/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sykownik</surname>
<given-names>Philipp</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1018154/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gradl-Dietsch</surname>
<given-names>Gertraud</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2521696/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Masuch</surname>
<given-names>Maic</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/912346/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Entertainment Computing Group</institution>, <institution>University of Duisburg-Essen</institution>, <addr-line>Duisburg</addr-line>, <country>Germany</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Child and Adolescent Psychiatry and Psychotherapy</institution>, <institution>University Hospital Essen of the University of Duisburg-Essen</institution>, <addr-line>Essen</addr-line>, <addr-line>North Rhine-Westphalia</addr-line>, <country>Germany</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1083353/overview">Bastian Kordyaka</ext-link>, University of Bremen, Germany</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2644146/overview">Silvia Dopler</ext-link>, University of Applied Sciences Upper Austria, Austria</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2674125/overview">Isabella Saccardi</ext-link>, Utrecht University, Netherlands</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Linda Graf, <email>linda.graf@uni-due.de</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>5</volume>
<elocation-id>1377210</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>04</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Graf, Sykownik, Gradl-Dietsch and Masuch.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Graf, Sykownik, Gradl-Dietsch and Masuch</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Virtual Reality (VR) technology allows the design and application of realistic but adaptive learning environments in medical education. In particular, virtual patient systems have logistical and methodological advantages compared to non-computerized interventions. However, evidence for their effectiveness is fragmented as any educational domain introduces its requirements regarding learning goals, measurements of learning outcomes, and application design. In this context, we present preliminary results of evaluating a VR training application for conducting a clinical interview to diagnose mental disorders in children and adolescents using virtual patients. The evaluation focuses on design elements related to the virtual patient&#x2019;s appearance and natural language capabilities. Our results indicate that our virtual patient design is highly believable and that our dialog system is satisfying. However, conversational flow requires optimization. We discuss design directions and potential enhancements for learner-virtual patient interactions in VR and address future operations to evaluate the effectiveness of our approach.</p>
</abstract>
<kwd-group>
<kwd>virtual patients</kwd>
<kwd>emotional virtual agents</kwd>
<kwd>embodied digital technology</kwd>
<kwd>adaptive virtual environments</kwd>
<kwd>medical education</kwd>
<kwd>agent design</kwd>
<kwd>human-agent interaction</kwd>
<kwd>virtual reality</kwd>
</kwd-group>
<contract-sponsor id="cn001">Universit&#xe4;t Duisburg-Essen<named-content content-type="fundref-id">10.13039/501100008349</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Virtual Reality and Human Behaviour</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Extended reality systems like VR have become increasingly relevant as a means for medical education <xref ref-type="bibr" rid="B32">Kononowicz et al., 2015</xref>, <xref ref-type="bibr" rid="B31">Kononowicz et al., 2019</xref>; <xref ref-type="bibr" rid="B11">Campillos-Llanos et al., 2020</xref>; <xref ref-type="bibr" rid="B39">Pantziaras et al., 2015</xref>; <xref ref-type="bibr" rid="B35">Mavrogiorgou et al., 2022</xref>; <xref ref-type="bibr" rid="B21">Graf et al., 2023b</xref>. Utilizing its high degree of sensory immersion and natural interaction affordances, VR enables the simulation of face-to-face interaction scenarios within an adaptive learning environment that is cost-effective, scalable, and applicable in a standardized way for different learners. Further, VR-simulated medical scenarios provide training opportunities in a less stressful learning environment using embodied digital technologies like virtual patients (VP) <xref ref-type="bibr" rid="B3">Barry Issenberg et al., 2005</xref>; <xref ref-type="bibr" rid="B14">Cook et al., 2010</xref>. A VP is &#x201c;a specific type of computer program that simulates real-life clinical scenarios; learners emulate the roles of healthcare providers to obtain a history, conduct a physical exam, and make diagnostic and therapeutic decisions&#x201d; <xref ref-type="bibr" rid="B12">Candler (2007)</xref>. In a real-world educational context, access to patients for means of training is usually limited, and therefore, it is not feasible to provide a large group of students with individual patient contact. Thus, VPs are already used in medical education and show several advantages <xref ref-type="bibr" rid="B40">Plackett et al., 2022</xref>; <xref ref-type="bibr" rid="B30">Kocaballi et al., 2019</xref>. Compared to conventional interventions, like simulation patients (i.e., role plays with actors), VPs are independent of student schedules <xref ref-type="bibr" rid="B14">Cook et al. (2010)</xref>, or the ability of the actors to portray the patients authentically <xref ref-type="bibr" rid="B46">Wuendrich et al. (2012)</xref>. In particular, in pediatric contexts, the lack of children simulation patients introduces a fundamental challenge in training interaction with young patients. Besides those benefits, research is still ongoing to assess the learning effectiveness of using VPs. Several review articles report that VP systems show positive learning effects on clinical reasoning and knowledge acquisition in comparison with no practical intervention, but relatively small or no effects compared to conventional or non-computerized interventions <xref ref-type="bibr" rid="B40">Plackett et al., 2022</xref>; <xref ref-type="bibr" rid="B14">Cook et al., 2010</xref>; <xref ref-type="bibr" rid="B37">Milne-Ives et al., 2020</xref>. Thereby, design elements like specific feedback mechanisms <xref ref-type="bibr" rid="B36">McGaghie et al., 2010</xref>; <xref ref-type="bibr" rid="B3">Barry Issenberg et al., 2005</xref> and the level of interactivity <xref ref-type="bibr" rid="B14">Cook et al. (2010)</xref> have been discussed as crucial factors for a positive learning outcome. However, the reviews conclude that it is hard to generalize the results, as on the one hand, the VP systems show a great variety in the design, their aim, as well as in the measurement of the learning outcome (e.g., clinical reasoning) <xref ref-type="bibr" rid="B37">Milne-Ives et al., 2020</xref>; <xref ref-type="bibr" rid="B14">Cook et al., 2010</xref>; <xref ref-type="bibr" rid="B40">Plackett et al., 2022</xref>. The evaluation of specific design elements of VP systems has received less attention in research so far. Our work addresses this gap and explores design elements expected to convey a sense of a &#x201c;genuine&#x201d; social interaction, which can enhance learning motivation when using systems with virtual tutor agents <xref ref-type="bibr" rid="B4">Baylor (2011)</xref>. Specifically, we focus on design elements related to the <italic>appearance of a VP</italic> and its <italic>natural language capabilities</italic> and investigate whether these elements are decisive for the perceived believability of the interaction between learners and a VP in a specific educational context. In the following, we review related work on virtual patient systems and their design. Then, we describe our VR application and its interim evaluation. The paper concludes by discussing our preliminary findings regarding future research implications.</p>
<p>This brief research report presents preliminary results of the ongoing development of an educational VR application for learning how to conduct a clinical interview for diagnosis of mental disorders in children and adolescents using VPs. In an interim evaluation, we assessed a VP&#x2019;s believability and conversational flow and its potential to promote future learning outcomes based on how users rate the design elements 1) VP&#x2019;s appearance and 2) its conversational capabilities.</p>
<sec id="s1-1">
<title>1.1 Learning effects of virtual patient systems in medical education</title>
<p>VP systems can provide explicit medical skills training recommended for health professionals&#x2019; education to reduce the impact of future diagnostic errors and potential patient harm <xref ref-type="bibr" rid="B13">Cleland et al., 2009</xref>; <xref ref-type="bibr" rid="B2">Balogh et al., 2015</xref>. Several research projects are investigating the use of VP systems in the education of medical students <xref ref-type="bibr" rid="B11">Campillos-Llanos et al., 2020</xref>; <xref ref-type="bibr" rid="B39">Pantziaras et al., 2015</xref>; <xref ref-type="bibr" rid="B35">Mavrogiorgou et al., 2022</xref>; <xref ref-type="bibr" rid="B21">Graf et al., 2023b</xref>. Thereby, the art of VPs can vary from chatbots <xref ref-type="bibr" rid="B10">Cameron et al. (2019)</xref> to embodied conversation virtual agents <xref ref-type="bibr" rid="B11">Campillos-Llanos et al., 2020</xref>; <xref ref-type="bibr" rid="B39">Pantziaras et al., 2015</xref>. They can be accessible via different devices like computers <xref ref-type="bibr" rid="B39">Pantziaras et al., 2015</xref>; <xref ref-type="bibr" rid="B11">Campillos-Llanos et al., 2020</xref> or VR headsets <xref ref-type="bibr" rid="B35">Mavrogiorgou et al., 2022</xref>; <xref ref-type="bibr" rid="B21">Graf et al., 2023b</xref>. Several review articles have investigated the effectiveness of VP systems over the past years <xref ref-type="bibr" rid="B14">Cook et al., 2010</xref>; <xref ref-type="bibr" rid="B37">Milne-Ives et al., 2020</xref>; <xref ref-type="bibr" rid="B36">McGaghie et al., 2010</xref>; <xref ref-type="bibr" rid="B40">Plackett et al., 2022</xref>; <xref ref-type="bibr" rid="B30">Kocaballi et al., 2019</xref>; <xref ref-type="bibr" rid="B27">Isaza-Restrepo et al., 2018</xref>. A systematic review by <xref ref-type="bibr" rid="B14">Cook et al. (2010)</xref> evaluated computerized VPs, especially in educating health professionals on the learning outcome. They also focused on the design features of the respective virtual patients. Their review included 48 articles, including VPs for medicine students, nurses, and other health professionals. Their results show that VPs show positive learning effects on clinical reasoning and knowledge acquisition compared to no intervention but relatively small effects compared to non-computerized interventions. Regarding the design features, they could show that repetition, extended feedback from the VP system, and explicitly contrasting cases can improve learning outcomes. Furthermore, features essential for the students were natural case progression (including collecting data, offering more and less restricted options, and adapting to the actions of learners), case realism, realistic dialogue flow, and working together in a group of students. Another later review by <xref ref-type="bibr" rid="B40">Plackett et al. (2022)</xref> also investigated the effectiveness of VPs, especially regarding clinical reasoning skills. They included 19 research articles covering VP systems from a range of disciplines. Only 58% of the reviewed studies reported significant positive effects of the VP systems on clinical reasoning skills, while 21% indicated mixed effects and 21% no effects. However, compared to other teaching methods (i.e., tutorials), 75% of the students showed no effects. Thus, VP systems seem to outperform having no intervention but not other teaching interventions regarding improved clinical reasoning skills. Their review also identified two main intervention features in VP systems. Most of the VP systems (68%) use feedback on the learners&#x2019; performance and thus align with recommendations from studies about simulation-based learning <xref ref-type="bibr" rid="B42">Schubach et al., 2017</xref>; <xref ref-type="bibr" rid="B27">Isaza-Restrepo et al., 2018</xref>. 50% implement a high level of interactivity, requiring the learners to gather information from the VP. Another review by <xref ref-type="bibr" rid="B37">Milne-Ives et al. (2020)</xref>, focused on evaluating conversational agents in healthcare that are supported by artificial intelligence. Again, the review indicates positive or mixed effectiveness (76.7%) of the VP systems. Additionally, the majority of the reviewed VP systems seems to have good usability (90%) and user satisfaction (83.9%). Further, qualitative user feedback revealed that the most common complaint with conversational agents was poor comprehension due to a lack of vocabulary, inaccurate voice recognition, or improper word input error management. Users disliked the repetitive conversations, and the conversational agents frequently had to ask questions more than once to process the response. Furthermore, negative aspects were the difficulty of empathizing with the VP and the lack of representation of the situation&#x2019;s complexity by the agent. They liked that VPs provided a risk-free learning environment, as they were not actual patients.</p>
<p>There are just as many disciplines for VP systems as there are in the education of medical students, not only in practicing ambulatory medicine <xref ref-type="bibr" rid="B9">Buysse et al. (2002)</xref>, medical ethics <xref ref-type="bibr" rid="B19">Fleetwood et al. (2000)</xref>, but also for mental health assessment skills <xref ref-type="bibr" rid="B45">Washburn et al. (2016)</xref> or diagnostics skills <xref ref-type="bibr" rid="B35">Mavrogiorgou et al., 2022</xref>; <xref ref-type="bibr" rid="B39">Pantziaras et al., 2015</xref> developed an interactive desktop application where medical assistants conducted a psychiatric interview. The VP responded with pre-recorded video sequences. They can also physically examine the patient and order laboratory and imaging examinations. The learners then draw up a differential diagnosis and a treatment plan. In addition, they receive feedback from the patient regarding the consultation and from a virtual consultant who refers to the clinical performance. Their results show that the acquisition of basic knowledge in the field of psychiatry was improved. <xref ref-type="bibr" rid="B35">Mavrogiorgou et al. (2022)</xref> also developed a VP system for adult psychiatry using VR and embodied agents that allows students to interview an embodied VP using natural language input and output. However, this system still needs to be evaluated.</p>
</sec>
<sec id="s1-2">
<title>1.2 Design elements of virtual patient systems</title>
<p>To design embodied virtual agents in a learning context, <xref ref-type="bibr" rid="B18">Doering et al. (2008)</xref> developed a framework that implies an agent should be attentive and responsive during the interaction and ready to respond. It should be able to reply to queries and obtain feedback. The messages it communicates should be adapted to the user&#x2019;s experience and needs and contain congruent verbal and non-verbal elements. Finally, the agent should awaken believability and trust. The believability of virtual characters describes the acceptance that someone or something in a virtual world is perceived as real <xref ref-type="bibr" rid="B1">Allbeck and Badler (2001)</xref>. Aspects that play an essential role in increasing the believability of virtual characters can be their appearance, body language, and voice <xref ref-type="bibr" rid="B33">Lim and Aylett 2007</xref>; <xref ref-type="bibr" rid="B15">Demeure et al., 2011</xref> or interactivity <xref ref-type="bibr" rid="B29">Knoppel 2009</xref>; <xref ref-type="bibr" rid="B16">De Rosis et al., 2003</xref>; <xref ref-type="bibr" rid="B5">Baylor and Kim 2009</xref> showed in their study that a visible and physically present agent positively influenced users&#x2019; motivation compared to a voice or a text box. Thereby, the appearance of virtual characters affects a player&#x2019;s perception. For example, while <xref ref-type="bibr" rid="B5">Baylor and Kim (2009)</xref> showed that realistically designed agents were more beneficial, as cartoon-style agents reduced motivation in users, <xref ref-type="bibr" rid="B20">Graf et al. (2023a)</xref> showed that a comic-like and even animal-like virtual character could influence the emotional experience, as well as the motivation and performance of the players. Again, <xref ref-type="bibr" rid="B47">Zibrek et al. (2018)</xref> showed that participants were more concerned with a realistically rendered character than with characters rendered in less realistic styles. Considering the <italic>uncanny valley</italic> effect is crucial when choosing a degree of realism. It describes the sudden change of a user&#x2019;s evaluation of an artificial human from positive to negative if it approaches photorealism but still has subtle characteristics that limit its realism <xref ref-type="bibr" rid="B38">Mori et al. (2012)</xref>. Besides the appearance, an appropriate display of emotions is crucial for the believability of virtual agents. A study by <xref ref-type="bibr" rid="B33">Lim and Aylett (2007)</xref> showed that virtual agents showing appropriate emotions are more believable than those showing no emotions. Studies showed that learners liked VPs showing empathy and when having a personality <xref ref-type="bibr" rid="B10">Cameron et al., 2019</xref>; <xref ref-type="bibr" rid="B17">Dimeff et al., 2020</xref> or disliked it when it was missing it <xref ref-type="bibr" rid="B34">Ly et al., 2017</xref>; <xref ref-type="bibr" rid="B8">Borja-Hart et al., 2019</xref>; <xref ref-type="bibr" rid="B14">Cook et al., 2010</xref> define interactivity as the &#x201c;degree to which the course design encouraged learners to engage cognitively.&#x201d; Former research results are inconclusive about the effect of interactivity on learning outcomes <xref ref-type="bibr" rid="B24">Homer and Plass (2014)</xref>. According to studies, increased interactivity can encourage more engaged users and deeper learning, but it can also increase cognitive load, which can impede learning <xref ref-type="bibr" rid="B28">Kalet et al., 2012</xref>; <xref ref-type="bibr" rid="B24">Homer and Plass 2014</xref>. In the VP context, studies showed that learners liked the interactivity of the VPs <xref ref-type="bibr" rid="B26">Hudlicka 2013</xref>; <xref ref-type="bibr" rid="B34">Ly et al., 2017</xref> or wished for more interactivity <xref ref-type="bibr" rid="B10">Cameron et al., 2019</xref>; <xref ref-type="bibr" rid="B23">H&#xe5;vik et al., 2019</xref>.</p>
</sec>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Procedure</title>
<p>We evaluated our application with medical students. Each student had a conversation with a 14-year-old female VP suffering from depression using the Meta Quest Pro. Before entering the virtual world, the catalog was shown and explained to the participants. They interacted with the VP in the virtual world for 25.13&#x2013;54, 56&#xa0;min (<italic>M</italic> &#x3d; 33.7, <italic>SD</italic> &#x3d; 11.7) while they had to ask all 58 questions from the catalog. Therefore, the time they spent in VR was at least the time they needed to ask all the given questions. The total number of questions for each participant could differ as participants could also try to ask questions not included in the catalog. It could take longer depending on how long it took the participants to ask the questions. After that, they filled out question items regarding the believability of the dialog between them and the VP and its appearance. In the end, we conducted a 10-min interview with each participant to identify the advantages and pitfalls of their conversations with the virtual patient and how believable they perceived the situation. Furthermore, we tracked the progress of the questions, the answers given by the VP, and whether and how many hints the participants had asked for while using the application. As shown in <xref ref-type="fig" rid="F1">Figure 1</xref>, for example, in the category <italic>habits and consumption behavior</italic>, the first hint shows the keyword <italic>alcohol</italic>, which means that the user should ask a question regarding the VP&#x2019;s alcohol consumption. The second hint then shows the specific question <italic>Do you drink alcohol?</italic>
</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Left: The tablet shows students the category of question and how many questions belong to the category by the filling circles. Furthermore, it shows the level one and level two hint button, that students can press for help. Right: The VP sitting on the virtual couch in front of a student.</p>
</caption>
<graphic xlink:href="frvir-05-1377210-g001.tif"/>
</fig>
</sec>
<sec id="s2-2">
<title>2.2 Participants</title>
<p>We recruited five medical students (3 female, 2 male) aged 23&#x2013;26 (<italic>M</italic> &#x3d; 24.2, <italic>SD</italic> &#x3d; 1.3) in their 7th to 12th clinical semester via advertisement on a digital bulletin board; previous experience in child and adolescent psychiatry was not mandatory. Two students had prior experience in psychiatry diagnostics and relatively little VR (<italic>M</italic> &#x3d; 3.6, <italic>SD</italic> &#x3d; 0.89, <italic>Mdn</italic> &#x3d; 4.00), gaming (<italic>M</italic> &#x3d; 3.8, <italic>SD</italic> &#x3d; 2.17, <italic>Mdn</italic> &#x3d; 3.00), or experiences with virtual agents (<italic>M</italic> &#x3d; 2.0, <italic>SD</italic> &#x3d; 1.23, <italic>Mdn</italic> &#x3d; 2.00) measured on a scale from 1 (&#x3d; no experience at all) to 7 (&#x3d; a lot experience). The participants filled out the questionnaire and were interviewed in German.</p>
</sec>
<sec id="s2-3">
<title>2.3 VR application</title>
<p>We designed a VR application for the teaching of conducting clinical interviews and diagnosing mental disorders in child and adolescent psychiatry using embodied virtual agents as patients. The application is structured by following a catalog of questions we created based on the AMDP system together with a Child and adolescent psychiatrist (see the catalog in the <xref ref-type="sec" rid="s12">Supplementary Material</xref>). The AMDP system<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref> is an international standard for the methodical documentation of psychiatric diagnoses, developed by a German association for methodology and documentation in psychiatry. The catalog covers 17 categories relating to different symptoms or characteristics (e.g., <italic>habits and consumption behavior</italic> or <italic>affective disorders</italic>). Each category then contains one to six subcategories (e.g., <italic>alcohol, drug, and media consumption</italic>, or <italic>aggressiveness and mood swing</italic>) the students need to address in their interview. A pre-defined sequence of the question catalog guides the students through the conversation. However, asking about any symptoms and repeating individual questions is possible. The application uses natural language understanding (NLU) as input using Wit.ai<xref ref-type="fn" rid="fn2">
<sup>2</sup>
</xref> by Meta. Wit.ai converts the user&#x2019;s question into text and then assigns it to an existing intention, which outputs a corresponding voice output from a selection of prerecorded audios (see <xref ref-type="fig" rid="F2">Figure 2</xref>). Based on the virtual patient&#x2019;s answers, the students eventually have to decide on a psychiatric diagnosis. The VP is an embodied virtual model of a teenager with stylized aesthetics. We chose a stylized yet realistic approach compared to photorealism due to the risk of the <italic>uncanny valley</italic> effect <xref ref-type="bibr" rid="B38">Mori et al. (2012)</xref>. Furthermore, studies show that photorealism is unnecessary to achieve behavioral realism <xref ref-type="bibr" rid="B7">Blascovich et al. (2002)</xref> or believability <xref ref-type="bibr" rid="B21">Graf et al. (2023b)</xref>. Further, we decided against an even more stylized design, as in previous discussions, medical students indicated the preference for a realistic VP <xref ref-type="bibr" rid="B21">Graf et al. (2023b)</xref>. Accordingly, we made the facial features, skin, hair, and clothes childlike. We also mapped the prerecorded facial expressions of a real actress onto the agent&#x2019;s face and created a corresponding body language in the form of animations. We used Elevenlabs<xref ref-type="fn" rid="fn3">
<sup>3</sup>
</xref>, a generative voice AI, to synthesize a natural voice for the VP&#x2019;s versatile responses retaining the emotionality of a human voice. We also created a user interface integrated into a virtual tablet the users have in the virtual world (see <xref ref-type="fig" rid="F1">Figure 1</xref>). The tablet gives the students an overview of the catalog of questions. It displays the current category and allows students to ask for two levels of hints. The first hint shows the keyword of the subcategory of questions, and the second hint shows a sample question if students do not know how to ask for the respective symptom. We chose this two-level hint system so that students can use the application regardless of their previous knowledge of child and adolescent psychiatry.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>This is a visualization of the VR application and how teachers and students interact with it. Through a web interface, the teacher can choose a patient with whom the student will interact. The student can ask questions using natural language input. This input is transcribed by using Wit.ai, allocating the recognized intention to a certain response of the current VP. The allocated responses then trigger a respective audio and animation in the VP.</p>
</caption>
<graphic xlink:href="frvir-05-1377210-g002.tif"/>
</fig>
</sec>
<sec id="s2-4">
<title>2.4 Measures</title>
<sec id="s2-4-1">
<title>2.4.1 Quantitative measures</title>
<p>To measure the believability of the dialog and the VP and its appearance, we used self-formulated items measured on a scale from 1 (&#x3d; do not agree at all) to 7 (&#x3d; fully agree), see <xref ref-type="table" rid="T1">Table 1</xref>. Furthermore, we collected demographic data such as age and gender as well as previous experience with a single-item each &#x201c;<italic>Please rate how familiar you are with the concept of [virtual reality/virtual agents/games]</italic>&#x201d; on a scale from 1 (&#x3d; not at all familiar) to 7 (&#x3d; very familiar). Therefore, we gave participants the following definition of a virtual agent: <italic>virtual agents are the visual representation of a character (e.g., a person) whose behavior is controlled by a computer algorithm</italic>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Descriptive values of the quantitative data regarding believability of the dialog and the VP&#x2019;s appearance.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">
<italic>M</italic>(<italic>SD</italic>)</th>
<th align="center">
<italic>Mdn</italic>
</th>
<th align="center">
<italic>Range</italic>
</th>
<th align="center">Scale</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="5" align="left">
<bold>Believabilty of the dialog</bold>
</td>
</tr>
<tr>
<td align="left">1. I perceived the conversation with the VP as believable</td>
<td align="center">4.2 (1.92)</td>
<td align="center">5.00</td>
<td align="center">1&#x2013;6</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">2. The conversation with the VP felt real</td>
<td align="center">3.6 (1.52)</td>
<td align="center">4.00</td>
<td align="center">1&#x2013;5</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">3. The VP recognizes my question and answer it appropriately</td>
<td align="center">3.8 (1.64)</td>
<td align="center">3.00</td>
<td align="center">2&#x2013;6</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">4. The VP is too slow in answering the questions</td>
<td align="center">4.2 (1.64)</td>
<td align="center">5.00</td>
<td align="center">2&#x2013;6</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">5. I am satisfied with the accuracy of the VP&#x2019;s answer</td>
<td align="center">5.2 (1.92)</td>
<td align="center">6.00</td>
<td align="center">2&#x2013;7</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">6. I had difficulties because the VP did not recognize my questions</td>
<td align="center">4.8 (1.64)</td>
<td align="center">5.00</td>
<td align="center">2&#x2013;6</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">7. I had to use a lot of tips to get through the conversation</td>
<td align="center">6.0 (1.00)</td>
<td align="center">6.00</td>
<td align="center">5&#x2013;7</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">8. Overall, I am satisfied with the VP&#x2019;s voice recognition system</td>
<td align="center">4.4 (1.82)</td>
<td align="center">5.00</td>
<td align="center">2&#x2013;6</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td colspan="5" align="left">
<bold>Believabilty of the VP&#x2019;s appearance</bold>
</td>
</tr>
<tr>
<td align="left">1. Overall, I perceived the VP as believable</td>
<td align="center">5.0 (2.35)</td>
<td align="center">6.00</td>
<td align="center">1&#x2013;7</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">2. I perceived the VP&#x2019;s emotional facial expressions as believable</td>
<td align="center">4.4 (1.52)</td>
<td align="center">4.00</td>
<td align="center">3&#x2013;6</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">3. I perceived the VP&#x2019;s posture as believable</td>
<td align="center">5.6 (2.07)</td>
<td align="center">6.00</td>
<td align="center">2&#x2013;7</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">4. I perceived the VP&#x2019;s voice as believable</td>
<td align="center">5.4 (1.95)</td>
<td align="center">6.00</td>
<td align="center">2&#x2013;7</td>
<td align="center">1&#x2013;7</td>
</tr>
<tr>
<td align="left">5. I perceived the virtual character as appropriate for the situation</td>
<td align="center">6.8 (0.45)</td>
<td align="center">7.00</td>
<td align="center">6&#x2013;7</td>
<td align="center">1&#x2013;7</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-4-2">
<title>2.4.2 Qualitative measures</title>
<p>To further evaluate the believability and investigate the pitfalls and advantages of our application, we conducted a semi-structured 10-min interview with each participant at the end of the evaluation. Five questions provided the basis for the interview:<list list-type="simple">
<list-item>
<p>1. How did you like the application?</p>
</list-item>
<list-item>
<p>2. How believable did you perceive the conversation between you and the virtual patient?</p>
</list-item>
<list-item>
<p>3. How did you generally like the way the conversation was conducted?</p>
<list list-type="simple">
<list-item>
<p>&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;a. Were there any problems you noticed during the interview?</p>
</list-item>
<list-item>
<p>&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;b. For example, were there any questions you asked that the patient did not recognize or answered inappropriately?</p>
</list-item>
</list>
</list-item>
<list-item>
<p>4. To what extent can this application be a useful addition to your studies?</p>
</list-item>
<list-item>
<p>5. Was there anything that you would have liked to have been added to the application?</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2-4-3">
<title>2.4.3 Objective measures</title>
<p>Furthermore, we tracked the progress of the questions in a downloadable interaction log from the VR headset&#x2019;s memory storage after the session. The log included the answers given by the VP and whether and how many hints the participants had asked for while using the application. Based on this interaction log, we have defined different types of errors: <italic>concept</italic> and <italic>system errors</italic>. A <italic>concepts error</italic> appears when the VP&#x2019;s answer does not match the participant&#x2019;s question because the question is not part of the catalog, so there is no implemented intention in Wit.ai. As a <italic>system error</italic>, we define errors when the VP&#x2019;s answer does not match the participant&#x2019;s question because wit.ai allocated the answer to a wrong intention. When the VP&#x2019;s answer matched the participant&#x2019;s question, we defined this turn as the correct allocation. We also counted how often participants used a hint when receiving a correct allocation or errors. Thereby, we differ between level one hints (H1), the keyword display, level two hints (H2), the display of the sample question, and the use of both hints.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Quantitative results</title>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> shows the descriptive values of the quantitative results regarding the believability of the dialog between participants and the VP and of the VP&#x2019;s appearance. The believability of the dialog was rated on average on <italic>M</italic> &#x3d; 4.2(<italic>SD</italic> &#x3d; 1.92). But there was strong agreement that many hints were taken during the conversation. Satisfaction with the accuracy of the VP&#x2019;s answers was rated particularly high (<italic>M</italic> &#x3d; 5.2, <italic>SD</italic> &#x3d; 1.92), and the lowest score was given to the question of how real the conversation with the VP felt (<italic>M</italic> &#x3d; 3.6, <italic>SD</italic> &#x3d; 1.52). Regarding the believability of the VP&#x2019;s appearance, it received higher values. Here, the highest agreement was that the VP was appropriate for the situation (<italic>M</italic> &#x3d; 6.8, <italic>SD</italic> &#x3d; 0.45), and the lowest agreement was assigned to the VP&#x2019;s facial expression (<italic>M</italic> &#x3d; 4.4, <italic>SD</italic> &#x3d; 1.52).</p>
<p>We analyzed the interaction logs (<xref ref-type="table" rid="T2">Table 2</xref>) and counted 19.8 (28.62%) errors in total by a total amount of 68.8 asked questions on average. All participants, besides participant (p01), asked all given questions and more. Participant (p01) had to stop early because the application crashed due to an internet connection error. The natural language understanding achieved 71.38% correct allocations on average. Overall, we found more errors defined as <italic>concept errors</italic> (20.3%) than <italic>system errors</italic> (8.32%). It can also be observed that a high number of hints are used for the correctly allocated turns (<italic>no errors</italic>). On average, both hints were used 13 times for the correct allocation, while both hints were used one time on average for <italic>concept errors</italic> and one time for <italic>system errors</italic>. With an average value of 30 times, participants used the level 2 hint (sample question) most frequently for the correct allocation. On average, up to three hints were used when errors appeared.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Amount of errors for all participants and average, tracked by the interaction log. H1 &#x3d; hint 1 (keyword), H2 &#x3d; hint 2 (sample question), Both &#x3d; both hints.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Participants</th>
<th align="center">P01</th>
<th align="center">P02</th>
<th align="center">P03</th>
<th align="center">P04</th>
<th align="center">P05</th>
<th align="center">Average</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Time spend in VR (minutes)</td>
<td align="center">53.56</td>
<td align="center">28.33</td>
<td align="center">32.50</td>
<td align="center">29.35</td>
<td align="center">25.13</td>
<td align="center">33.70</td>
</tr>
<tr>
<td align="left">Amount of Asked Questions</td>
<td align="center">56</td>
<td align="center">79</td>
<td align="center">69</td>
<td align="center">75</td>
<td align="center">65</td>
<td align="center">68.8</td>
</tr>
<tr>
<td align="left">Total Amount of H1</td>
<td align="center">24 (42.9%)</td>
<td align="center">5 (6.33%)</td>
<td align="center">29 (42%)</td>
<td align="center">22 (29.3%)</td>
<td align="center">31 (47.7%)</td>
<td align="center">22.2 (33.6%)</td>
</tr>
<tr>
<td align="left">Total Amount of H2</td>
<td align="center">27 (48.2%)</td>
<td align="center">45 (57%)</td>
<td align="center">25 (36.2%)</td>
<td align="center">19 (25.3%)</td>
<td align="center">53 (81.5%)</td>
<td align="center">33.8 (49.6%)</td>
</tr>
<tr>
<td align="left">Total Amount of Both</td>
<td align="center">14 (25%)</td>
<td align="center">3 (3.79%)</td>
<td align="center">16 (23.2%)</td>
<td align="center">15 (20%)</td>
<td align="center">25 (38.5%)</td>
<td align="center">14.6 (22.1%)</td>
</tr>
<tr>
<td align="left">Total Amount of Errors</td>
<td align="center">19 (33.9%)</td>
<td align="center">32 (40.5%)</td>
<td align="center">19 (27.5%)</td>
<td align="center">17 (22.7%)</td>
<td align="center">12 (18.5%)</td>
<td align="center">19.8 (28.6%)</td>
</tr>
<tr>
<td align="left">Wrong intention allocation</td>
<td align="center">5 (8.93%)</td>
<td align="center">7 (8.86%)</td>
<td align="center">3 (4.35%)</td>
<td align="center">10 (13.3%)</td>
<td align="center">4 (6.15%)</td>
<td align="center">5.8 (8.32%)</td>
</tr>
<tr>
<td colspan="7" align="left">(<italic>System Error</italic>)</td>
</tr>
<tr>
<td align="left">H1</td>
<td align="center">1</td>
<td align="center">0</td>
<td align="center">2</td>
<td align="center">1</td>
<td align="center">1</td>
<td align="center">1</td>
</tr>
<tr>
<td align="left">H2</td>
<td align="center">1</td>
<td align="center">6</td>
<td align="center">1</td>
<td align="center">0</td>
<td align="center">4</td>
<td align="center">2</td>
</tr>
<tr>
<td align="left">Both</td>
<td align="center">1</td>
<td align="center">0</td>
<td align="center">1</td>
<td align="center">0</td>
<td align="center">1</td>
<td align="center">1</td>
</tr>
<tr>
<td align="left">Not-Existing Intention</td>
<td align="center">14 (25.0%)</td>
<td align="center">25 (31.7%)</td>
<td align="center">16 (23.19%)</td>
<td align="center">7 (9.33%)</td>
<td align="center">7 (9.33%)</td>
<td align="center">14 (20.3%)</td>
</tr>
<tr>
<td colspan="7" align="left">(<italic>Concept Error</italic>)</td>
</tr>
<tr>
<td align="left">H1</td>
<td align="center">7</td>
<td align="center">3</td>
<td align="center">3</td>
<td align="center">0</td>
<td align="center">3</td>
<td align="center">3</td>
</tr>
<tr>
<td align="left">H2</td>
<td align="center">3</td>
<td align="center">4</td>
<td align="center">1</td>
<td align="center">0</td>
<td align="center">1</td>
<td align="center">2</td>
</tr>
<tr>
<td align="left">Both</td>
<td align="center">2</td>
<td align="center">1</td>
<td align="center">1</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">1</td>
</tr>
<tr>
<td align="left">Correct Allocation</td>
<td align="center">37 (66.0%)</td>
<td align="center">47 (59.5%)</td>
<td align="center">50 (72.5%)</td>
<td align="center">58 (77.3%)</td>
<td align="center">53 (81.5%)</td>
<td align="center">49 (71.4%)</td>
</tr>
<tr>
<td colspan="7" align="left">(<italic>No Error</italic>)</td>
</tr>
<tr>
<td align="left">H1</td>
<td align="center">16</td>
<td align="center">2</td>
<td align="center">24</td>
<td align="center">21</td>
<td align="center">27</td>
<td align="center">18</td>
</tr>
<tr>
<td align="left">H2</td>
<td align="center">23</td>
<td align="center">35</td>
<td align="center">23</td>
<td align="center">19</td>
<td align="center">48</td>
<td align="center">30</td>
</tr>
<tr>
<td align="left">Both</td>
<td align="center">11</td>
<td align="center">2</td>
<td align="center">14</td>
<td align="center">15</td>
<td align="center">24</td>
<td align="center">13</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-2">
<title>3.2 Qualitative results</title>
<p>We transcribed all interviews and formed categories using the MAXQDA<xref ref-type="fn" rid="fn4">
<sup>4</sup>
</xref>. After one researcher categorized the answers into categories, another researcher independently checked whether they would assign the aspects to the same categories. We calculated no inter-rater agreement. In total, we derived five categories.</p>
<sec id="s3-2-1">
<title>3.2.1 Flow of conversation</title>
<p>The participants identified problems in the flow of the conversation, such as the strict categories that one had to follow throughout the dialog (p01). Participant (p02) mentioned that they thought it was strange to ask every patient the same questions and, especially, that not all given questions fit the depressive young girl. They also said that they would ask an open-ended question to a patient first, not to direct the patient&#x2019;s answer. Two reported repeated answers to differently formulated questions (p02, p04) and that the answers did not always fit the questions (p04). Two participants thought the system would not understand them when their formulation of a question differed too much from the given sample question (Hint 2) (p04) or when they formulated questions too long (p05). Participant (p04) also criticized that symptoms the VP mentioned had to be asked for explicitly, and additionally, that each student had their way of structuring such conversations with patients.</p>
<p>There were also positive statements about the dialog. Two mentioned that the system understood their questions well, and, therefore, the dialog proceeded well (p03, p05). One participant did not feel narrowed by the straight structure of the catalog of questions but praised that they could repeat questions and even ask questions that were not next in the sequence (p03).</p>
</sec>
<sec id="s3-2-2">
<title>3.2.2 Believability</title>
<p>All participants evaluated the believability of the virtual patient well. They highlight the voice (p01, p03, p04, p05), the posture (p02, p03, p04), as well as the mimic and facial expressions (p02, p04) of the patient. One participant said they &#x201c;did not want to put their foot in their mouth, even though it was a computer&#x201d; (p03), and another said they realized that the VP adjusted their facial expressions according to the respective questions (p05).</p>
<p>We asked the participants to rate the dialog between them and the VP to identify what already works well and what does not. A participant rated positively that, in general, the dialog felt believable, as they could communicate well with the VP (p01). The principle of asking questions and receiving answers felt natural and pleasant like in a real dialog (p03), and one could imagine that a dialog in real life would proceed similarly (p05). Nonetheless, participants also rated aspects negatively. They mainly mentioned that they had to ask particular questions and follow the predefined questions of the catalog, which felt unnatural to them (p02, p05). One added that they would usually go deeper into the symptoms and ask them further questions, and as this was impossible, it felt less natural to them (p04). They also said they were more focused on formulating the questions so the system could understand them than formulating the question for a young patient (p04).</p>
</sec>
<sec id="s3-2-3">
<title>3.2.3 General aspects</title>
<p>Participants named different positive aspects of the application. Three of them mentioned that they were positively surprised by how well the VP appropriately answered the questions, even when participants formulated long questions (p01, p04, p05). Four participants said they liked the general idea of the application as a learning tool, especially for beginners. They find it helpful that the application guided them through the procedure of an anamnesis dialog by asking questions one after the other in a schematic sequence (p01, p02, p04, p05). Four participants highlighted the advantages compared to simulation patients (p01, p02, p03, p05); for example, the application provides a calmer surrounding and time, and it would be more believable compared to actors who do no do an excellent job or compared to classmates who you know are not patient. Furthermore, students would be more independent from the actors, and they could all practice independently or even in parallel. It would provide reasonable access for students with social phobia or inhibitions of patient contact (p01, p02). Two liked being together with the patient in the great virtual environment (p03, p05) and praised how well the application&#x2019;s control system for the user was designed (p05).</p>
</sec>
<sec id="s3-2-4">
<title>3.2.4 Accepted limitations and possible reasons for errors</title>
<p>Some participants mentioned limitations they accepted and why they think errors occur. One said even though it could be beneficial to ask more open-ended questions, the participant admitted that this would be more difficult to implement (p01) and that a strict sequence of questions could also be helpful for beginners (p05). Similarly, one participant also thought that the facial expression of the VP could be better, but that facial expressions are very complex, which is also difficult to simulate (p03). Furthermore, a few participants attributed recognition errors to their behavior. For example, when a &#x201c;Hm&#x201d; was recognized as a question, one participant thought that they should have stopped saying it (p03), or others saw the cause of the errors in their excessively long and convoluted sentences (p01, p02, p03). One also cited his lack of knowledge in psychiatry as the reason for the errors, which they could not compensate for even with the hints (p01).</p>
</sec>
<sec id="s3-2-5">
<title>3.2.5 Future wishes</title>
<p>Two participants explicitly mentioned expanding the possibility to ask questions more freely and individually (p02, p05). Another suggested that the diagnosis could be made by having to answer questions about the patient at the end (p04). Another person suggested that one should not always press the level 2 hint (sample question); otherwise, one could be tempted to use it all the time (p02). Instead, you could only see them every few minutes, so you must consider the question yourself. To create further motivation, you could receive points at the end the fewer times you have been given a tip. Participant (p05) would like more time and space in the interview for transitions so that they does not have to go from question to question. This would give the conversation more credibility because you would first have to gain the patient&#x2019;s trust. They also suggested more direct feedback from the patient, e.g., if you have said something stupid, the VP tells you directly. The patient could also ask the students a question (p05).</p>
</sec>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>We presented the evaluation results of a VR training application that simulates diagnostic interviews with embodied VPs. We let participants rate two design elements associated with the VP&#x2019;s believability: its appearance and natural language-based dialog system.</p>
<p>The VP&#x2019;s appearance was rated well in terms of believability. The quantitative results show moderate values, which are confirmed, in particular, by the qualitative results. Participants highlighted the patient&#x2019;s voice, posture, and facial expression and rated it appropriate for the context. The second design element, the natural language-based dialog system, was also rated moderately regarding believability, but the qualitative results were unclear. For instance, asking questions and perceiving answers felt like in a real dialog; however, the fact that they had to ask particular questions and follow the predefined catalog felt unnatural. Also, the dialog&#x2019;s believability was limited by the impossibility of going deeper and asking further questions. Although, the specification of the questions was considered helpful for beginners. The interaction logs indicate that system errors occur significantly less frequently and are more at a conceptual level. This was because participants felt disturbed or irritated by the given sequence. Subjectively, however, the participants attributed the errors more to the system, as they sought the cause, for example, in questions that were too long and confusing. Furthermore, fewer errors occur when participants use many hints, which again indicates that conceptual problems, rather than the speech recognition system, cause errors more often. Overall, participants were impressed by the VP&#x2019;s current functionality and believability and consider this type of application a valuable learning tool for medical education.</p>
<p>Based on our results, our virtual patient has a highly believable appearance design and a satisfying dialog system. In the current state, the most significant limitation for increased believability is the catalog&#x2019;s predefined questions, which prevent the natural flow of asking questions. However, the participants tended to accept certain restrictions if they are plausible in the given learning context, i.e., learning a set of standard questions. The participants see great potential here, especially for beginners. The findings from quantitative and qualitative results strengthen our belief in our approach.</p>
<sec id="s4-1">
<title>4.1 Design directions and enhancements for believable conversations</title>
<p>To our knowledge, only a few research articles focused on believability of simulation patients (human actors) <xref ref-type="bibr" rid="B6">Baylor et al. (2017)</xref> or virtual patients <xref ref-type="bibr" rid="B41">Rizzo et al. (2011)</xref>, though without investigating individual design aspects of the VR system. One significant finding was that the predefined questions of the catalog limited the participants in the flow of their conversation with the VP, which resulted in a student-VP interaction that felt less natural. Our design aimed to create a natural conversation using natural language input and to guide the students through the clinical interview by presenting the question catalog. The fact that more system errors occurred shows that we needed to consider more questions on a conceptual level in advance. It resulted in participants asking questions to which the VP had no answer. Even after revising the catalog of questions and implementing a more accessible design, such errors could still occur. It is difficult to predict and thus prepare prefabricated answers to all possible questions. One enhancement could be the generation of missing answers to unforeseen questions using AI. Studies show that dynamic response behaviors of virtual agents were rated more positively compared to predefined ones <xref ref-type="bibr" rid="B43">Toader et al., 2019</xref>; <xref ref-type="bibr" rid="B25">Hsu and Lin 2023</xref>. However, the accuracy of the statements generated by the AI needs to be better and apparent beforehand <xref ref-type="bibr" rid="B44">Wang et al. (2023)</xref>. This limitation poses a particular problem in the education context and, especially in a psychiatric context, statements invented by the AI could be over- or misinterpreted. Generative AI, such as ChatGPT, also usually follows certain restrictions, such as not making statements about suicidal behavior to protect users. Therefore, future research should validate the adjustment of the prompts precisely to monitor response behavior in the best possible way regarding the teaching content.</p>
</sec>
<sec id="s4-2">
<title>4.2 Limitations and future work</title>
<p>We want to have a critical look at our evaluation. To measure the believability of the design elements, we did not use validated questionnaires but custom items applicable to our use case. Other researchers may consider using a scale like the one of <xref ref-type="bibr" rid="B22">Guo et al. (2023)</xref>. This and the small number of medical students who tested our application must be considered when inferring conclusions from our results for other projects. Further, we have yet to assert the effectiveness of our system. In the future, we want to enhance, in particular, the dialog system and evaluate further elements like a feedback system (e.g., the VP giving verbal feedback during the dialog) or playful elements (e.g., receiving points for correct diagnoses) and their impact on believability and eventually students&#x2019; learning outcomes.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>We presented the progress in our attempt to design a VR training application for conducting a clinical interview to diagnose mental disorders using embodied VPs. We have focused on the believability of the VP system as a decisive factor in the system&#x2019;s eventual learning success. If VP systems are to provide advantages over simulation patients whose authenticity is questionable, the believability of the VPs must be considered. Users must perceive the VP as an actual patient suffering from the disease to behave genuinely towards them. Hence, believable VPs are the only way to ensure a real simulation of the situation. Due to the complex and varied use cases, it is difficult to generalize the evaluation results of individual VP systems. By focusing on individual design aspects of the application, which are then application-independent, we want to identify application-independent components that will help design future VP systems. With our preliminary findings, we want to show the technical basis for a believable component, such as a dialog system. Accordingly, our contribution lies in the methodological approach of examining individual design aspects for their believability in order to improve future VP systems. In the future, we will revise the design of the dialog system to allow more freedom and individuality when asking questions. Afterward, we will evaluate the updated version with medical students.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusion of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7">
<title>Ethics statement</title>
<p>The requirement of ethical approval was waived. The studies wereconducted in accordance with the local legislation and institutionalrequirements. The participants provided their written informedconsent to participate in this study. Written informed consent was obtained from the individual(s) for the publication of anypotentially identifiable images or data included in this article.</p>
</sec>
<sec id="s8">
<title>Author contributions</title>
<p>LG: Conceptualization, Data curation, Formal Analysis, Funding acquisition, Investigation, Methodology, Project administration, Supervision, Validation, Visualization, Writing&#x2013;original draft. PS: Visualization, Writing&#x2013;review and editing. GG-D: Conceptualization, Funding acquisition, Supervision, Writing&#x2013;review and editing. MM: Funding acquisition, Resources, Supervision, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. We acknowledge support by the Open Access Publication Fund of the University of Duisburg-Essen. As part of the project &#x201c;DEVIA&#x201d;, this work was supported by the Robert-Enke-Stiftung.</p>
</sec>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s12">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frvir.2024.1377210/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frvir.2024.1377210/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>AMDP &#x3d; Arbeitsgemeinschaft f&#xfc;r Methodik und Dokumentation in der Psychiatrie, <ext-link ext-link-type="uri" xlink:href="https://www.amdp.de/">https://www.amdp.de/</ext-link>, 03/10/2024.</p>
</fn>
<fn id="fn2">
<label>2</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://wit.ai/">https://wit.ai/</ext-link>, 03/10/2024.</p>
</fn>
<fn id="fn3">
<label>3</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://elevenlabs.io/">https://elevenlabs.io/</ext-link>, 03/10/2024.</p>
</fn>
<fn id="fn4">
<label>4</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.maxqda.com/de/">https://www.maxqda.com/de/</ext-link>, 03/10/2024.</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Allbeck</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Badler</surname>
<given-names>N. I.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Consistent communication with control</article-title>. <source>Cent. Hum. Model. Simul.</source> <volume>85</volume>.</citation>
</ref>
<ref id="B2">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Balogh</surname>
<given-names>E. P.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>B. T.</given-names>
</name>
<name>
<surname>Ball</surname>
<given-names>J. R.</given-names>
</name>
</person-group> (<year>2015</year>). <source>Improving diagnosis in health care</source>.</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barry Issenberg</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mcgaghie</surname>
<given-names>W. C.</given-names>
</name>
<name>
<surname>Petrusa</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Lee Gordon</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Scalese</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Features and uses of high-fidelity medical simulations that lead to effective learning: a beme systematic review</article-title>. <source>Med. Teach.</source> <volume>27</volume>, <fpage>10</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1080/01421590500046924</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baylor</surname>
<given-names>A. L.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>The design of motivational agents and avatars</article-title>. <source>Educ. Technol. Res. Dev.</source> <volume>59</volume>, <fpage>291</fpage>&#x2013;<lpage>300</lpage>. <pub-id pub-id-type="doi">10.1007/s11423-011-9196-3</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baylor</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Designing nonverbal communication for pedagogical agents: when less is more</article-title>. <source>Comput. Hum. Behav.</source> <volume>25</volume>, <fpage>450</fpage>&#x2013;<lpage>457</lpage>. <pub-id pub-id-type="doi">10.1016/j.chb.2008.10.008</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baylor</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Burns</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Struijk</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Herron</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Mach</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yorkston</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Assessing the believability of standardized patients trained to portray communication disorders</article-title>. <source>Am. J. Speech-Language Pathology</source> <volume>26</volume>, <fpage>791</fpage>&#x2013;<lpage>805</lpage>. <pub-id pub-id-type="doi">10.1044/2017_ajslp-16-0068</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Blascovich</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Loomis</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Beall</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Swinth</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Hoyt</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Bailenson</surname>
<given-names>J. N.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>TARGET ARTICLE: immersive virtual environment technology as a methodological tool for social psychology</article-title>. <source>Psychol. Inq.</source> <volume>13</volume>, <fpage>103</fpage>&#x2013;<lpage>124</lpage>. <pub-id pub-id-type="doi">10.1207/s15327965pli1302_01</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Borja-Hart</surname>
<given-names>N. L.</given-names>
</name>
<name>
<surname>Spivey</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>George</surname>
<given-names>C. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Use of virtual patient software to assess student confidence and ability in communication skills and virtual patient impression: a mixed-methods approach</article-title>. <source>Curr. Pharm. Teach. Learn.</source> <volume>11</volume>, <fpage>710</fpage>&#x2013;<lpage>718</lpage>. <pub-id pub-id-type="doi">10.1016/j.cptl.2019.03.009</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Buysse</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Van Maele</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>De Moor</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2002</year>). &#x201c;<article-title>The dynamic patient simulator: learning process, first results and students&#x2019; satisfaction</article-title>,&#x201d; in <source>E-Health in Belgium and in The Netherlands</source> (<publisher-name>IOS Press</publisher-name>), <fpage>19</fpage>&#x2013;<lpage>24</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Cameron</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Cameron</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Megaw</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Bond</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Mulvenna</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>O&#x2019;Neill</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). &#x201c;<article-title>Assessing the usability of a chatbot for mental health care</article-title>,&#x201d; in <source>Internet science: INSCI 2018 international workshops, st. Petersburg, Russia, october 24&#x2013;26, 2018, revised selected papers 5</source> (<publisher-name>Springer</publisher-name>), <fpage>121</fpage>&#x2013;<lpage>132</lpage>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Campillos-Llanos</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bilinski</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Zweigenbaum</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Rosset</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Designing a virtual patient dialogue system based on terminology-rich resources: challenges and evaluation</article-title>. <source>Nat. Lang. Eng.</source> <volume>26</volume>, <fpage>183</fpage>&#x2013;<lpage>220</lpage>. <pub-id pub-id-type="doi">10.1017/s1351324919000329</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Candler</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>Effective use of educational technology in medical education</article-title>,&#x201d; in <source>Colloquium on educational technology: recommendations and guidelines for medical educators</source> (<publisher-loc>Washington, DC</publisher-loc>: <publisher-name>AAMC Institute for Improving Medical Education</publisher-name>).</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cleland</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Abe</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Rethans</surname>
<given-names>J.-J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>The use of simulated patients in medical education: amee guide no 42</article-title>. <source>Med. Teach.</source> <volume>31</volume>, <fpage>477</fpage>&#x2013;<lpage>486</lpage>. <pub-id pub-id-type="doi">10.1080/01421590903002821</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cook</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Erwin</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Triola</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Computerized virtual patients in health professions education: a systematic review and meta-analysis</article-title>. <source>Acad. Med.</source> <volume>85</volume>, <fpage>1589</fpage>&#x2013;<lpage>1602</lpage>. <pub-id pub-id-type="doi">10.1097/acm.0b013e3181edfe13</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Demeure</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Niewiadomski</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pelachaud</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>How is believability of a virtual agent related to warmth, competence, personification, and embodiment?</article-title> <source>Presence</source> <volume>20</volume>, <fpage>431</fpage>&#x2013;<lpage>448</lpage>. <pub-id pub-id-type="doi">10.1162/pres_a_00065</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Rosis</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Pelachaud</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Poggi</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Carofiglio</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>De Carolis</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>From greta&#x2019;s mind to her face: modelling the dynamics of affective states in a conversational embodied agent</article-title>. <source>Int. J. human-computer Stud.</source> <volume>59</volume>, <fpage>81</fpage>&#x2013;<lpage>118</lpage>. <pub-id pub-id-type="doi">10.1016/s1071-5819(03)00020-x</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dimeff</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Jobes</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Chalker</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Piehl</surname>
<given-names>B. M.</given-names>
</name>
<name>
<surname>Duvivier</surname>
<given-names>L. L.</given-names>
</name>
<name>
<surname>Lok</surname>
<given-names>B. C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A novel engagement of suicidality in the emergency department: virtual collaborative assessment and management of suicidality</article-title>. <source>General Hosp. psychiatry</source> <volume>63</volume>, <fpage>119</fpage>&#x2013;<lpage>126</lpage>. <pub-id pub-id-type="doi">10.1016/j.genhosppsych.2018.05.005</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Doering</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Veletsianos</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yerasimou</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Conversational agents and their longitudinal affordances on communication and interaction</article-title>. <source>J. Interact. Learn. Res.</source> <volume>19</volume>, <fpage>251</fpage>&#x2013;<lpage>270</lpage>.</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fleetwood</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Vaught</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Feldman</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gracely</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Kassutto</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Novack</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Medethex online: a computer-based learning program in medical ethics and communication skills</article-title>. <source>Teach. Learn. Med.</source> <volume>12</volume>, <fpage>96</fpage>&#x2013;<lpage>104</lpage>. <pub-id pub-id-type="doi">10.1207/s15328015tlm1202_7</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Graf</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Abramowski</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Born</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Masuch</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>Emotional virtual characters for improving motivation and performance in vr exergames</article-title>. <source>Proc. ACM Human-Computer Interact.</source> <volume>7</volume>, <fpage>1115</fpage>&#x2013;<lpage>1135</lpage>. <pub-id pub-id-type="doi">10.1145/3611063</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Graf</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gradl-Dietsch</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Masuch</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023b</year>). &#x201c;<article-title>Depressed virtual agents: development of a playful vr application for the training of child and adolescent psychiatry students</article-title>,&#x201d; in <source>Proceedings of the 23rd ACM international conference on intelligent virtual agents</source>, <fpage>1</fpage>&#x2013;<lpage>3</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Adamo</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Mousas</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Developing a scale for measuring the believability of virtual agents</article-title>
</citation>
</ref>
<ref id="B23">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>H&#xe5;vik</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wake</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Flobak</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Lundervold</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Guribye</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>A conversational interface for self-screening for adhd in adults</article-title>,&#x201d; in <source>Internet science: INSCI 2018 international workshops, st. Petersburg, Russia, october 24&#x2013;26, 2018, revised selected papers 5</source> (<publisher-name>Springer</publisher-name>), <fpage>133</fpage>&#x2013;<lpage>144</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Homer</surname>
<given-names>B. D.</given-names>
</name>
<name>
<surname>Plass</surname>
<given-names>J. L.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Level of interactivity and executive functions as predictors of learning in computer-based chemistry simulations</article-title>. <source>Comput. Hum. Behav.</source> <volume>36</volume>, <fpage>365</fpage>&#x2013;<lpage>375</lpage>. <pub-id pub-id-type="doi">10.1016/j.chb.2014.03.041</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hsu</surname>
<given-names>C.-L.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J. C.-C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Understanding the user satisfaction and loyalty of customer service chatbots</article-title>. <source>J. Retail. Consumer Serv.</source> <volume>71</volume>, <fpage>103211</fpage>. <pub-id pub-id-type="doi">10.1016/j.jretconser.2022.103211</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hudlicka</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Virtual training and coaching of health behavior: example from mindfulness meditation training</article-title>. <source>Patient Educ. Couns.</source> <volume>92</volume>, <fpage>160</fpage>&#x2013;<lpage>166</lpage>. <pub-id pub-id-type="doi">10.1016/j.pec.2013.05.007</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Isaza-Restrepo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>G&#xf3;mez</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Cifuentes</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Arg&#xfc;ello</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The virtual patient as a learning tool: a mixed quantitative qualitative study</article-title>. <source>BMC Med. Educ.</source> <volume>18</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1186/s12909-018-1395-8</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kalet</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sarpel</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Schwartz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Brenner</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ark</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Just enough, but not too much interactivity leads to better clinical skills performance after a computer assisted learning module</article-title>. <source>Med. Teach.</source> <volume>34</volume>, <fpage>833</fpage>&#x2013;<lpage>839</lpage>. <pub-id pub-id-type="doi">10.3109/0142159x.2012.706727</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Knoppel</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Gaze patterns for a storytelling embodied conversational agent</article-title>. <source>Capita Sel</source>.</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kocaballi</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>Berkovsky</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Quiroz</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Laranjo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tong</surname>
<given-names>H. L.</given-names>
</name>
<name>
<surname>Rezazadegan</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>The personalization of conversational agents in health care: systematic review</article-title>. <source>J. Med. Internet Res.</source> <volume>21</volume>, <fpage>e15360</fpage>. <pub-id pub-id-type="doi">10.2196/15360</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kononowicz</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Woodham</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Edelbring</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Stathakarou</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Davies</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Saxena</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Virtual patient simulations in health professions education: systematic review and meta-analysis by the digital health education collaboration</article-title>. <source>J. Med. Internet Res.</source> <volume>21</volume>, <fpage>e14676</fpage>. <pub-id pub-id-type="doi">10.2196/14676</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kononowicz</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Zary</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Edelbring</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Corral</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hege</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Virtual patients-what are we talking about? a framework to classify the meanings of the term in healthcare education</article-title>. <source>BMC Med. Educ.</source> <volume>15</volume>, <fpage>11</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1186/s12909-015-0296-3</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lim</surname>
<given-names>M. Y.</given-names>
</name>
<name>
<surname>Aylett</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>Feel the difference: a guide with attitude</article-title>,&#x201d; in <source>International workshop on intelligent virtual agents</source> (<publisher-name>Springer</publisher-name>), <fpage>317</fpage>&#x2013;<lpage>330</lpage>.</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ly</surname>
<given-names>K. H.</given-names>
</name>
<name>
<surname>Ly</surname>
<given-names>A.-M.</given-names>
</name>
<name>
<surname>Andersson</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A fully automated conversational agent for promoting mental well-being: a pilot rct using mixed methods</article-title>. <source>Internet interv.</source> <volume>10</volume>, <fpage>39</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1016/j.invent.2017.10.002</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mavrogiorgou</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>B&#xf6;hme</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Hooge</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Pfeiffer</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Juckel</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Virtuelle realit&#xe4;t in der lehre im fach psychiatrie und psychotherapie</article-title>. <source>Der Nervenarzt</source> <volume>93</volume>, <fpage>728</fpage>&#x2013;<lpage>734</lpage>. <pub-id pub-id-type="doi">10.1007/s00115-021-01227-5</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McGaghie</surname>
<given-names>W. C.</given-names>
</name>
<name>
<surname>Issenberg</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Petrusa</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Scalese</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>A critical review of simulation-based medical education research: 2003&#x2013;2009</article-title>. <source>Med. Educ.</source> <volume>44</volume>, <fpage>50</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-2923.2009.03547.x</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Milne-Ives</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>de Cock</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lim</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Shehadeh</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>de Pennington</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Mole</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>The effectiveness of artificial intelligence conversational agents in health care: systematic review</article-title>. <source>J. Med. Internet Res.</source> <volume>22</volume>, <fpage>e20346</fpage>. <pub-id pub-id-type="doi">10.2196/20346</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mori</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>MacDorman</surname>
<given-names>K. F.</given-names>
</name>
<name>
<surname>Kageki</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>The uncanny valley [from the field]</article-title>. <source>IEEE Robotics automation Mag.</source> <volume>19</volume>, <fpage>98</fpage>&#x2013;<lpage>100</lpage>. <pub-id pub-id-type="doi">10.1109/mra.2012.2192811</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pantziaras</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Fors</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Ekblad</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Training with virtual patients in transcultural psychiatry: do the learners actually learn?</article-title> <source>J. Med. Internet Res.</source> <volume>17</volume>, <fpage>e46</fpage>. <pub-id pub-id-type="doi">10.2196/jmir.3497</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Plackett</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kassianos</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Mylan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kambouri</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Raine</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sheringham</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>The effectiveness of using virtual patient educational tools to improve medical students&#x2019; clinical reasoning skills: a systematic review</article-title>. <source>BMC Med. Educ.</source> <volume>22</volume>, <fpage>365</fpage>. <pub-id pub-id-type="doi">10.1186/s12909-022-03410-x</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rizzo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kenny</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Parsons</surname>
<given-names>T. D.</given-names>
</name>
</person-group> (<year>2011</year>). <source>Intelligent virtual patients for training clinical skills</source>, <volume>8</volume>. <publisher-name>JVRB-Journal of Virtual Reality and Broadcasting</publisher-name>.</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schubach</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Goos</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fabry</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Vach</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Boeker</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Virtual patients in the acquisition of clinical reasoning skills: does presentation mode matter? a quasi-randomized controlled trial</article-title>. <source>BMC Med. Educ.</source> <volume>17</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1186/s12909-017-1004-2</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Toader</surname>
<given-names>D.-C.</given-names>
</name>
<name>
<surname>Boca</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Toader</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>M&#x103;celaru</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Toader</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ighian</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>The effect of social presence and chatbot errors on trust</article-title>. <source>Sustainability</source> <volume>12</volume>, <fpage>256</fpage>. <pub-id pub-id-type="doi">10.3390/su12010256</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Ethical considerations of using chatgpt in health care</article-title>. <source>J. Med. Internet Res.</source> <volume>25</volume>, <fpage>e48009</fpage>. <pub-id pub-id-type="doi">10.2196/48009</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Washburn</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bordnick</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Rizzo</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A pilot feasibility study of virtual patient simulation to enhance social work students&#x2019; brief mental health assessment skills</article-title>. <source>Soc. work health care</source> <volume>55</volume>, <fpage>675</fpage>&#x2013;<lpage>693</lpage>. <pub-id pub-id-type="doi">10.1080/00981389.2016.1210715</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wuendrich</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Nissen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Feige</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Philipsen</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Voderholzer</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Portrayal of psychiatric disorders: are simulated patients authentic?</article-title> <source>Acad. Psychiatry</source> <volume>36</volume>, <fpage>501</fpage>. <pub-id pub-id-type="doi">10.1176/appi.ap.11090163</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zibrek</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kokkinara</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>McDonnell</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The effect of realistic appearance of virtual characters in immersive environments-does the character&#x2019;s personality play a role?</article-title> <source>IEEE Trans. Vis. Comput. Graph.</source> <volume>24</volume>, <fpage>1681</fpage>&#x2013;<lpage>1690</lpage>. <pub-id pub-id-type="doi">10.1109/tvcg.2018.2794638</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>