<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Robot. AI</journal-id>
<journal-title>Frontiers in Robotics and AI</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Robot. AI</abbrev-journal-title>
<issn pub-type="epub">2296-9144</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">783863</article-id>
<article-id pub-id-type="doi">10.3389/frobt.2022.783863</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Robotics and AI</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Conveying Intention by Motions With Awareness of Information Asymmetry</article-title>
<alt-title alt-title-type="left-running-head">Fukuchi et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">Intention-Conveying Motions and Information Asymmetry</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Fukuchi</surname>
<given-names>Yosuke</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1365653/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Osawa</surname>
<given-names>Masahiko</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yamakawa</surname>
<given-names>Hiroshi</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/336861/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Takahashi</surname>
<given-names>Tatsuji</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/154265/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Imai</surname>
<given-names>Michita</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Faculty of Science and Technology</institution>, <institution>Keio University</institution>, <addr-line>Yokohama</addr-line>, <country>Japan</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Nihon University</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>The Whole Brain Architecture Initiative</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>School of Engineering</institution>, <institution>University of Tokyo</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>RIKEN Center for Advanced Intelligence Project</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>School of Science and Engineering</institution>, <institution>Tokyo Denki University</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/88605/overview">Salvatore Maria Anzalone</ext-link>, Universit&#xe9; Paris 8, France</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1343715/overview">Konstantinos Tsiakas</ext-link>, Delft University of Technology, Netherlands</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1573539/overview">Gloria Beraldo</ext-link>, National Research Council (CNR), Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Yosuke Fukuchi, <email>fukuchi@ailab.ics.keio.ac.jp</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Human-Robot Interaction, a section of the journal Frontiers in Robotics and&#x20;AI</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>16</day>
<month>02</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>9</volume>
<elocation-id>783863</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>09</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>10</day>
<month>01</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Fukuchi, Osawa, Yamakawa, Takahashi and Imai.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Fukuchi, Osawa, Yamakawa, Takahashi and Imai</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>Humans sometimes attempt to infer an artificial agent&#x2019;s mental state based on mere observations of its behavior. From the agent&#x2019;s perspective, it is important to choose actions with awareness of how its behavior will be considered by humans. Previous studies have proposed computational methods to generate such publicly self-aware motion to allow an agent to convey a certain intention by motions that can lead a human observer to infer what the agent is aiming to do. However, little consideration has been given to the effect of information asymmetry between the agent and a human, or to the gaps in their beliefs due to different observations from their respective perspectives. This paper claims that information asymmetry is a key factor for conveying intentions with motions. To validate the claim, we developed a novel method to generate intention-conveying motions while considering information asymmetry. Our method utilizes a Bayesian public self-awareness model that effectively simulates the inference of an agent&#x2019;s mental states as attributed to the agent by an observer in a partially observable domain. We conducted two experiments to investigate the effects of information asymmetry when conveying intentions with motions by comparing the motions from our method with those generated without considering information asymmetry in a manner similar to previous work. The results demonstrate that by taking information asymmetry into account, an agent can effectively convey its intention to human observers.</p>
</abstract>
<kwd-group>
<kwd>Bayesian theory of mind</kwd>
<kwd>public self-awareness</kwd>
<kwd>PublicSelf model</kwd>
<kwd>human-agent collaboration</kwd>
<kwd>legible motion</kwd>
<kwd>reinforcement learning</kwd>
<kwd>explainable AI</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Theory of mind is the ability to infer other people&#x2019;s mental states, such as their beliefs, desires, and intentions, from their actions. By attributing mental states to others, people attempt to interpret their past behavior and predict their future actions (<xref ref-type="bibr" rid="B32">Premack and Woodruff, 1978</xref>). The ability to infer others&#x2019; minds in this way serves as a basis for social interaction (<xref ref-type="bibr" rid="B25">Marchesi et&#x20;al., 2019</xref>). In cooperation, for example, a worker requires mutual understanding of what another worker is intending to do to decide how to act or whether to help that person in a given situation (<xref ref-type="bibr" rid="B16">Hayes and Scassellati, 2013</xref>). Theory of mind enables workers to quickly understand each other with a reduced amount of explicit communication.</p>
<p>The targets of theory of mind include not only other humans but sometimes also artifacts (<xref ref-type="bibr" rid="B15">Gergely et&#x20;al., 1995</xref>; <xref ref-type="bibr" rid="B34">Schellen and Wykowska, 2019</xref>), regardless of whether they actually possess mental states similar to those of humans. This phenomenon can be utilized to facilitate natural and efficient interactions between humans and artificial agents, such as seeking human help without verbal cues (<xref ref-type="bibr" rid="B2">Cha and Mataric, 2016</xref>), although it may also have undesirable effects. For example, humans may make false inferences regarding what an agent is intending to do based on mere observation of its behavior. Such misunderstandings can lead to failure of collaboration or even serious accidents. In this context, autonomous artificial agents need to act with <italic>public self-awareness</italic>, or inference of how its behavior will be considered by its observers (<xref ref-type="bibr" rid="B12">Feningstein, 1975</xref>; <xref ref-type="bibr" rid="B11">Falewicz and Bak, 2015</xref>).</p>
<p>Previous studies have proposed computational methods for enabling autonomous agents to act with awareness of an observer&#x2019;s theory of mind. Dragan et&#x20;al. formalized the problem of an artificial agent&#x2019;s inference of the goal attributed to it by a human observer and proposed a method to generate motion that conveys a goal-directed agent&#x2019;s specific intention to a human observer to either lead or mislead human inference of what the agent is aiming to do (<xref ref-type="bibr" rid="B9">Dragan and Srinivasa, 2014</xref>; <xref ref-type="bibr" rid="B8">Dragan et&#x20;al., 2015b</xref>). Motion that conveys an agent&#x2019;s true intention is specifically called <italic>legible</italic> motion. <xref ref-type="fig" rid="F1">Figure&#x20;1</xref> illustrates an example. The blue agent intends to retrieve the apple in the environment. The original motion (<xref ref-type="fig" rid="F1">Figure&#x20;1A</xref>) is the result of an attempt to choose efficient motion to achieve its goal without considering its observer&#x2019;s theory of mind. The agent moves straight and then turns toward the apple just in front of the observer. The observer cannot judge which fruit the agent intends to retrieve when the agent is moving straight toward the observer; thus, it is difficult for the observer to quickly predict the agent&#x2019;s intention by observing the agent&#x2019;s behavior. By contrast, with legible motion (<xref ref-type="fig" rid="F1">Figure&#x20;1B</xref>), the agent moves toward the side corresponding to the apple from the beginning, excluding the possibility of interpretation that the agent intends to retrieve the pear. Although the time required for the agent to retrieve the apple is increased, the observer can more quickly and correctly infer that the agent intends to retrieve the apple than in the case of the original motion. Previous work on legible motion has successfully demonstrated the effectiveness of endowing an artificial agent with awareness of human theory-of-mind inference with respect to its behavior during human-robot collaboration (<xref ref-type="bibr" rid="B7">Dragan et&#x20;al., 2015a</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>An example in which legible motion serves to improve the predictability of an agent&#x2019;s intention. The blue agent is initially at the far side of the room and intends to get the apple. The agent&#x2019;s movements are captured from a fixed observation point. Legible motion reduces ambiguity early on, enabling the observer to infer the true intention of the agent. <bold>(A)</bold> Original motion (t &#x3d; 0, 17, 34, 50). <bold>(B)</bold> Legible motion (t &#x3d; 0, 17, 34, 58).</p>
</caption>
<graphic xlink:href="frobt-09-783863-g001.tif"/>
</fig>
<p>Previous studies assumed simple situations in which both the agent and its observer can easily share information about the environment, so they do not have to consider information asymmetry or differences in what each individual observes. <xref ref-type="fig" rid="F2">Figure&#x20;2</xref> shows a simple example in which previous method does not work due to information asymmetry. An apple and a pear are both present on the right side of the observer (<xref ref-type="fig" rid="F2">Figure&#x20;2A</xref>). If the observer could observe both the apple and the pear, the motion curved to the apple could be effective because it prevented the observer from mistaking the agent&#x2019;s target for the pear. However, when we see the motion from the perspective of the observer who cannot observe the pear, the early motion makes it appear as though the agent is ignoring the apple (<xref ref-type="fig" rid="F2">Figure&#x20;2B</xref>). This example suggests that legible motion with the false assumption that the actor and the observer share beliefs about the environment does not always improve the legibility of an agent&#x2019;s behavior and can sometimes even make it more difficult for an observer to infer the intention that the agent aims to convey.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>An example in which FalseProjective legible motion, which does not consider information asymmetry, fails to effectively convey the agent&#x2019;s intention. FalseProjective legible motion results in showing an unuseful detour because it does not consider the fact that the human does not know where a pear is. <bold>(A)</bold> Legible motion in the agent&#x2019;s belief. <bold>(B)</bold> FalseProjective legible motion from the human&#x2019;s limited observation.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g002.tif"/>
</fig>
<p>Our main claim here is that information asymmetry is a critical factor in generating motions that convey certain intentions. To formalize this claim, we developed a novel method to generate legible motion with awareness of information asymmetry. This method is based on our previously proposed PublicSelf model, which is a computational model of public self-awareness that infers the mental states attributed to the agent by an observer (<xref ref-type="bibr" rid="B14">Fukuchi et&#x20;al., 2018</xref>). By explicitly distinguishing the observations and beliefs of an acting agent and its observer, PublicSelf can accurately predict a human&#x2019;s inference of an agent&#x2019;s mind in information-asymmetric situations with partial observability, but it was not applied to generating an agent&#x2019;s behavior. To validate our claim, we conducted a simulation study and a user study to compare the legible motion generated with our method, PublicSelf legible motion, with <italic>FalseProjective</italic> legible motion, which does not consider information asymmetry. The results showed that PublicSelf legible motion improves the predictability of an agent&#x2019;s intentions compared with FalseProjective legible motion, indicating that information asymmetry is a critical problem when conveying intentions by motions and that our formalization can effectively address this problem.</p>
<p>This paper is structured as follows. <xref ref-type="sec" rid="s2">Section 2</xref> presents the background, explains the problem of conveying intentions by motions, and summarizes previous studies. <xref ref-type="sec" rid="s3">Section 3</xref> proposes our method of generating legible motion using PublicSelf. <xref ref-type="sec" rid="s4">Section 4</xref> describes the implementation of PublicSelf and the generation of legible motion. <xref ref-type="sec" rid="s5">Section 5</xref> reports on the two experiments conducted to evaluate PublicSelf legible motion and discusses the results. <xref ref-type="sec" rid="s6">Section 6</xref> discusses directions for future work. <xref ref-type="sec" rid="s7">Section 7</xref> concludes this&#x20;paper.</p>
</sec>
<sec id="s2">
<title>2 Background</title>
<sec id="s2-1">
<title>2.1 Explainability of Intelligent Agents&#x2019; Behavior in Human-Agent Collaboration</title>
<p>In this paper, intelligent agents (IAs) refers to an autonomous and goal-directed artificial agent that utilizes machine learning (ML) methods such as deep reinforcement learning (DRL) to achieve certain objectives. With the recent development of ML, IAs have achieved good performance in complex tasks (<xref ref-type="bibr" rid="B27">Mnih et&#x20;al., 2015</xref>; <xref ref-type="bibr" rid="B36">Silver et&#x20;al., 2017</xref>), and an increasing number of studies are focusing on the application of real-world robots (<xref ref-type="bibr" rid="B21">Kahn et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B22">Kalashnikov et&#x20;al., 2018</xref>). Introducing ML methods can be a promising approach to realize effective goal-directed human-agent collaboration.</p>
<p>However, many challenges remain that hinder collaboration between people and IAs. One of the major difficulties is the lack of explainability of an agent&#x2019;s future behavior. Decision-making modules that utilize modern ML tend to be a black box (<xref ref-type="bibr" rid="B13">Fukuchi et&#x20;al., 2017</xref>; <xref ref-type="bibr" rid="B17">Hayes and Shah, 2017</xref>). In particular, the DRL model embeds the control logic in high-dimensional parameter space and usually does not provide human-comprehensible expressions of the agent&#x2019;s plans, goals, or intentions. Therefore, most people cannot understand what an IA is aiming to do. The ability to understand and predict a human coworker&#x2019;s behavior can help robots to effectively collaborate with people (<xref ref-type="bibr" rid="B23">Lasota and Shah, 2015</xref>; <xref ref-type="bibr" rid="B18">Huang and Mutlu, 2016</xref>). Similarly, human agents also should be able to better understand their coworker agents&#x2019; future behavior.</p>
<p>Previous studies have proposed methods to explain diverse aspects of an IA&#x2019;s decision making. Saliency maps are commonly used to explain the reason a deep learning module made a specific decision based on its input modality, and they are also applied to an IA&#x2019;s policy model (<xref ref-type="bibr" rid="B37">Tamagnini et&#x20;al., 2017</xref>; <xref ref-type="bibr" rid="B19">Iyer et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B28">Mott et&#x20;al., 2019</xref>). While many approaches focus on explanations for AI practitioners, Cruz et&#x20;al. proposed a method for end users (<xref ref-type="bibr" rid="B4">Cruz et&#x20;al., 2021</xref>). Their method enables an IA to explain its next action with the probability of success. Hayes et&#x20;al. proposed a natural language question answering system that can handle some templates of questions about an IA&#x2019;s policy such as &#x201c;when do you {action}?&#x201d; and &#x201c;what will you do when {state}?&#x201d; (<xref ref-type="bibr" rid="B17">Hayes and Shah, 2017</xref>).</p>
</sec>
<sec id="s2-2">
<title>2.2 Inference of IA Minds by Humans</title>
<p>Humans sometimes consider an artificial agent to have a mind and thus to be a valid target for theory-of-mind inference. This attitude is called the &#x201c;intentional stance&#x201d; (<xref ref-type="bibr" rid="B5">Dennett, 1987</xref>). Many studies have investigated which characteristics of an artificial agent lead people to adopt the intentional stance. Human-likeness is a factor that leads humans to adopt an intentional stance (<xref ref-type="bibr" rid="B38">Wimmer and Perner, 1983</xref>; <xref ref-type="bibr" rid="B30">Perez-Osorio and Wykowska, 2020</xref>), but even geometrical figures can be targets of the intentional stance (<xref ref-type="bibr" rid="B15">Gergely et&#x20;al., 1995</xref>) when they appear to be goal-directed (<xref ref-type="bibr" rid="B31">Premack and Premack, 1997</xref>), rational (<xref ref-type="bibr" rid="B15">Gergely et&#x20;al., 1995</xref>), self-propelled (<xref ref-type="bibr" rid="B24">Luo and Baillargeon, 2005</xref>), or in violation of Newtonian laws (<xref ref-type="bibr" rid="B35">Scholl and Tremoulet, 2000</xref>).</p>
<p>IAs have many of these characteristics, which suggests that people can adopt an intentional stance toward them. For example, a rational agent is expected to take actions that maximize its own utility (<xref ref-type="bibr" rid="B20">Jara-Ettinger et&#x20;al., 2015</xref>), and the RL framework is designed to address exactly the problem of utility maximization. The utility function for a general RL agent is designed to drive that agent to achieve certain goals in an efficient (or rational) manner; positive rewards encourage the RL agent to achieve certain tasks, while negative costs urge the agent to choose more efficient actions.</p>
</sec>
<sec id="s2-3">
<title>2.3&#x20;Self-Awareness for Explainable IAs</title>
<p>A person can infer another person&#x2019;s beliefs, goals, or intentions even without explicit communication. In effect, people can infer the minds of others simply by observing their behavior (<xref ref-type="bibr" rid="B1">Baker et&#x20;al., 2017</xref>). However, there is also a risk of misunderstanding or of forming false beliefs about other people based on such observations. When a person adopts the intentional stance with regard to an IA and attempts to infer the agent&#x2019;s mind, the same problem can&#x20;arise.</p>
<p>In this context, objective self-awareness, or the ability of a person to recognize themselves as an object of attention (<xref ref-type="bibr" rid="B10">Duval and Wicklund, 1972</xref>), becomes an important component for an IA to help humans correctly understand its behavior. Objective self-awareness is considered to have two aspects: private self-awareness and public self-awareness (<xref ref-type="bibr" rid="B12">Feningstein, 1975</xref>; <xref ref-type="bibr" rid="B11">Falewicz and Bak, 2015</xref>). A privately self-aware person is self-reflective and attentive to their own thoughts. A publicly self-aware person, on the other hand, focuses on the self as a social object and is attentive to how he or she appears to others. If an IA has a private self-awareness model, it can reveal <italic>&#x3b9;</italic>&#x2a;, the intention that is going to be achieved by the agent. A public self-awareness model will also enable the agent to infer the intention <italic>&#x3b9;</italic> that a human observer will attribute to it based on its behavior. On this basis, the agent can select an action <italic>a</italic> that will lead the observer to infer the agent&#x2019;s true intention <inline-formula id="inf999">
<mml:math id="m999">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>argmax</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
<mml:mi>P</mml:mi>
<mml:mo>(</mml:mo>
<mml:mi>&#x3b9;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b9;</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. Similarly, an agent can also select actions that will mislead the observer to infer a particular false intention.</p>
</sec>
<sec id="s2-4">
<title>2.4 Computational Theory of Mind Model</title>
<p>Multiagent systems (MASs) represent one research field that aims to introduce the concept of theory of mind to artificial agents. Theory of mind ability enables agents to choose their actions based on what another agent is going to do, resulting in better performance in both cooperative and competitive situations (<xref ref-type="bibr" rid="B39">Zettlemoyer et&#x20;al., 2009</xref>; <xref ref-type="bibr" rid="B33">Raileanu et&#x20;al., 2018</xref>).</p>
<p>One of the major challenges of inferring an other&#x2019;s mind is its multiply nested structure. Here, we formalize the nested inference structure using belief-desire-intention logic (<xref ref-type="bibr" rid="B3">Cohen and Levesque, 1990</xref>). Suppose that there are two agents, an actor agent that performs actions, and an observer agent that attempts to infer the actor&#x2019;s intention based on its actions. Let us call the latter a first-order inference and denote the inferred intention <italic>&#x3b9;</italic>
<sup>1</sup>:<disp-formula id="equ1">
<mml:math id="m1">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mtext>BEL</mml:mtext>
<mml:mspace width="0.28em"/>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">I</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mi mathvariant="normal">T</mml:mi>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mspace width="0.28em"/>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
</disp-formula>where (BEL <italic>i X</italic>) means agent <italic>i</italic> believes <italic>X</italic>, and (INTEND <italic>i &#x3b9;</italic>) means agent <italic>i</italic> intends to achieve <italic>&#x3b9;</italic>. Actors can also have second-order beliefs, that is, beliefs about the observer&#x2019;s belief. For example,<disp-formula id="equ2">
<mml:math id="m2">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mtext>BEL</mml:mtext>
<mml:mspace width="0.28em"/>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mtext>BEL</mml:mtext>
<mml:mspace width="0.28em"/>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">I</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mi mathvariant="normal">T</mml:mi>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mspace width="0.28em"/>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mspace width="0.28em"/>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
</p>
<p>means that the actor believes that &#x201c;the observer believes that the actor intends to achieve <italic>&#x3b9;</italic>
<sup>2</sup>.&#x201d; In this paper, a superscript <italic>k</italic> means that the corresponding variable represents a <italic>k</italic>-th order belief. We can consider an arbitrary order of inference by repeating this manipulation.</p>
<p>Zettlemoyer et&#x20;al. proposed sparse distributions over sequences (SDS) filtering (<xref ref-type="bibr" rid="B39">Zettlemoyer et&#x20;al., 2009</xref>), an algorithm to compute the nested belief. SDS filtering can efficiently solve the problem of sequential inference about nested beliefs by utilizing a <italic>sequence distribution</italic>, which represents the probability distribution of an agent&#x2019;s belief about the environment and the other agents&#x2019; beliefs given a set of possible sequences of states.</p>
<p>The computational theory-of-mind model has also been studied in the field of cognitive science. Baker et&#x20;al. proposed the Bayesian theory of mind (BToM) model (<xref ref-type="bibr" rid="B1">Baker et&#x20;al., 2017</xref>). The BToM describes an observer agent&#x2019;s first-order inference of an actor agent&#x2019;s mental state, such as a belief, desire, or intention, while observing the actor agent&#x2019;s behavior. <xref ref-type="disp-formula" rid="e1">Equation 1</xref> describes the inference model:<disp-formula id="e1">
<mml:math id="m3">
<mml:mtable class="aligned">
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mspace width="1em"/>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mo>&#x221d;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mtable class="subarray-c" columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:munder>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mspace width="1em"/>
<mml:mspace width="1em"/>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(1)</label>
</disp-formula>where <italic>a</italic> is an action performed by the actor, and belief <italic>b</italic>
<sub>
<italic>t</italic>
</sub> is a probability distribution representing the probability that the environmental state is <italic>s</italic>
<sub>
<italic>t</italic>
</sub> given past observations <italic>o</italic>
<sub>:<italic>t</italic>
</sub>, i.e.,&#x20;<italic>b</italic>
<sub>
<italic>t</italic>
</sub>(<italic>s</italic>
<sub>
<italic>t</italic>
</sub>) &#x3d; <italic>P</italic>(<italic>s</italic>
<sub>
<italic>t</italic>
</sub>&#x7c;<italic>o</italic>
<sub>:<italic>t</italic>
</sub>). The observer attributes mental states to the actor at time <italic>t</italic>, such as observation <inline-formula id="inf1">
<mml:math id="m4">
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, belief <inline-formula id="inf2">
<mml:math id="m5">
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, desire <italic>d</italic>
<sup>1</sup>, and intention <inline-formula id="inf3">
<mml:math id="m6">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> based on the observer&#x2019;s observation history <italic>o</italic>
<sub>:<italic>t</italic>
</sub> from times <italic>t</italic>&#x20;&#x3d; 0, 1, &#x2026;, <italic>t</italic>. Experiments demonstrated that BToM accurately captures human mental state judgments.</p>
<p>The PublicSelf model extended BToM to an actor&#x2019;s second-order belief inference (<xref ref-type="bibr" rid="B14">Fukuchi et&#x20;al., 2018</xref>). We can consider PublicSelf as a computational model of an actor&#x2019;s public self-awareness. PublicSelf can be represented in the form of&#x20;a Bayesian network (<xref ref-type="fig" rid="F3">Figure&#x20;3</xref>). From the actor&#x2019;s observations <italic>o</italic>
<sub>:<italic>t</italic>
</sub>, the probabilities with which belief <inline-formula id="inf4">
<mml:math id="m7">
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, desire <italic>d</italic>
<sup>2</sup>, and intention <inline-formula id="inf5">
<mml:math id="m8">
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> will be attributed to the actor can be calculated:<disp-formula id="e2">
<mml:math id="m9">
<mml:mtable class="aligned">
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mspace width="1em"/>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mo>&#x221d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mtable class="subarray-c" columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:munder>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mspace width="17.0pt"/>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>A graphical representation of PublicSelf. PublicSelf distinguishes mental states that are based on an actor agent&#x2019;s actual observations, first-level beliefs of an observer&#x2019;s mental state, and second-level beliefs attributed to the actor by the observer, which makes it possible to consider information asymmetry.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g003.tif"/>
</fig>
<p>In PublicSelf, first, a belief about the environment is constructed based on the actor&#x2019;s own visual observations. That is, an observation of an object <italic>o</italic>
<sub>
<italic>t</italic>
</sub> increases the likelihood of the actor&#x2019;s belief of possible environment states <italic>s</italic> in which the object&#x20;exists at the observed position. Then, the observer&#x2019;s belief about the environment, denoted by <italic>b</italic>
<sup>1</sup>, is considered, i.e.,&#x20;<inline-formula id="inf6">
<mml:math id="m10">
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, where <italic>o</italic>
<sup>1</sup> is an inference concerning the observer&#x2019;s observations. From <italic>b</italic>
<sup>1</sup>, one can then estimate <italic>o</italic>
<sup>2</sup> and <italic>b</italic>
<sup>2</sup>, the observation and belief attributed to the actor by the observer, where <inline-formula id="inf7">
<mml:math id="m11">
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The important point of PublicSelf is that it distinguishes mental states that are based on an actor agent&#x2019;s actual observations, first-level beliefs of an observer&#x2019;s mental state, and second-level beliefs attributed to the actor by the observer. This ability allows PublicSelf to infer the intention the observer attributes to the actor while considering information asymmetry. A user study demonstrated that PublicSelf enables an IA to accurately infer the mental state attributed to it by a human observer in&#x20;situations where partial observability causes information asymmetry between an actor and its observer. However, previous work on PublicSelf focused only on the accuracy with which it infers the mental states attributed to an IA by a human observer, and PublicSelf has not previously been applied to generating an agent&#x2019;s actions based on the inference.</p>
</sec>
<sec id="s2-5">
<title>2.5 Generating Motions That Convey Intentions</title>
<p>Dragan et&#x20;al. proposed a method for generating an artificial agent&#x2019;s behavior with awareness of a human observer&#x2019;s theory of mind, specifically, behaviors that communicate the agent&#x2019;s intention to a human observer. In particular, <italic>legible motion</italic> aims to allow an observer to quickly and correctly infer an agent&#x2019;s intention (<xref ref-type="bibr" rid="B7">Dragan et&#x20;al., 2015a</xref>). By means of legible motion, an agent attempts to increase the probability that the intention an observer attributes to it will match its true intention.</p>
<p>In previous studies on the generation of legible motion, it was assumed that the environment is limited and that both the human observer and the artificial agent share complete information about the environment, such as what exists and where it exists. However, most actual collaboration scenarios are subject to uncertainty, and information asymmetry typically exists between human and artificial agents, meaning that one agent may possess information that the other does not. Different observations result in different beliefs, which is important to consider when modeling human theory of mind. Information asymmetry is deliberately employed in daily social acts including deception, and many related psychological experiments, such as false-belief tasks, have been performed (<xref ref-type="bibr" rid="B38">Wimmer and Perner, 1983</xref>). Therefore, this paper claims that an artificial agent needs to handle information asymmetry between the agent and a human observer when generating publicly self-aware behavior.</p>
<p>To validate our claim, we compare PublicSelf legible motion that considers information asymmetry with legible motion that does not account for information asymmetry, which we call <italic>FalseProjective</italic>.</p>
<p>With FalseProjective, an actor does not distinguish its own belief regarding the environment from an observer&#x2019;s belief and falsely identifies its own belief with the observer&#x2019;s&#x20;one.</p>
<p>The work by Nikolaidis et&#x20;al. is the most closely related to this paper&#x2019;s concept (<xref ref-type="bibr" rid="B29">Nikolaidis et&#x20;al., 2016</xref>). They proposed a method for generating legible motion considering the effect of a human observer&#x2019;s viewpoint. However, their focus was on depth uncertainty and occlusion of a robot arm and did not target the differences in beliefs about the world state such as what is where in the environment.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Conveying Intentions by Motions With Awareness of Information Asymmetry</title>
<p>The claim of this paper is that we need to consider information asymmetry to generate motions that convey a certain intention to other agents. To validate this claim, we develop a method for generating such motions by extending the PublicSelf model and compare the generated motions with those that do not consider information asymmetry in an approach similar to that in previous&#x20;work.</p>
<table-wrap id="alg1" position="float">
<label>Algorithm 1</label>
<caption>
<p>Generating PublicSelf legible motion.</p>
</caption>
<table>
<tbody>
<tr>
<td>
<inline-graphic xlink:href="frobt-09-783863-fx1.tif"/>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="alg2" position="float">
<label>Algorithm 2</label>
<caption>
<p>Updating the PublicSelf&#x20;model.</p>
</caption>
<table>
<tbody>
<tr>
<td>
<inline-graphic xlink:href="frobt-09-783863-fx2.tif"/>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>A brief description of our method is as follows: By extracting specific probabilities under the conditions of each action <italic>a</italic>
<sub>
<italic>t</italic>
</sub> in the summation of <xref ref-type="disp-formula" rid="e2">Eq. 2</xref>, we can calculate <italic>P</italic>(<italic>b</italic>
<sup>2</sup>, <italic>d</italic>
<sup>2</sup>, <italic>&#x3b9;</italic>
<sup>2</sup>&#x7c;<italic>o</italic>, <italic>a</italic>), the probability that a human observer will attribute mental states (<italic>b</italic>
<sup>2</sup>, <italic>d</italic>
<sup>2</sup>, <italic>&#x3b9;</italic>
<sup>2</sup>) to an actor given that actor&#x2019;s specific action <italic>a</italic>. On this basis, we can select the action that will most effectively express the actor&#x2019;s mental state:<disp-formula id="e3">
<mml:math id="m12">
<mml:mi>a</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>argmax</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>o</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>In particular, by marginalizing over <italic>b</italic>
<sup>2</sup> and <italic>d</italic>
<sup>2</sup>, we can obtain the action that will most effectively express the agent&#x2019;s intention, or the most legible action. Because PublicSelf captures the differences in observations (<italic>o</italic>, <italic>o</italic>
<sup>1</sup>, <italic>o</italic>
<sup>2</sup>) and beliefs (<italic>b</italic>, <italic>b</italic>
<sup>1</sup>, <italic>b</italic>
<sup>2</sup>) between an actor and an observer agent, we can generate motions while considering information asymmetry.</p>
<p>
<xref ref-type="other" rid="alg1">Algorithm 1</xref> presents the procedure for generating legible motion with PublicSelf. Here, <italic>&#x3c0;</italic>(<italic>a</italic>&#x2223;<italic>s</italic>) is a probability distribution of taking the actions <italic>a</italic> given a state <italic>s</italic>, which corresponds to a typical formulation of an actor&#x2019;s decision-making model in reinforcement learning. We do not simply choose an action based on <xref ref-type="disp-formula" rid="e3">Eq. 3</xref>. In other words, we do not adopt actions that are unlikely to be chosen under the actor&#x2019;s original policy. Instead, we calculate the increase in the probability that an observer will infer the actor&#x2019;s true intention that is achieved by changing the actor&#x2019;s original action to the action identified by PublicSelf as the most legible; we do not change the actor&#x2019;s action if the legible action does not increase the probability sufficiently to balance the cost of taking that legible action. In the situation presented in <xref ref-type="fig" rid="F1">Figure&#x20;1</xref>, for example, the actor would perform quite a wide turn and take a long time to achieve the original goal if all actions were selected strictly in accordance with <xref ref-type="disp-formula" rid="e3">Eq.&#x20;3</xref>.</p>
</sec>
<sec id="s4">
<title>4 Fetchfruit Task and Implementation</title>
<sec id="s4-1">
<title>4.1 Environment of the FetchFruit Task</title>
<p>We developed the FetchFruit task in a simulated environment (<xref ref-type="fig" rid="F1">Figures 1</xref>, <xref ref-type="fig" rid="F2">2</xref>) and implemented our method of generating legible motions for an IA in this simulated environment. The environment is a square room containing an apple, a pear, an actor, and a human observer. The initial positions of the actor and the observer are&#x20;fixed.</p>
<p>The actor&#x2019;s actions are driven by a policy for the retrieval of an apple or a pear. Our method is independent of the implementation of the policy model as long as the probability of the actor taking each action can be calculated. In this paper, we use the asynchronous advantage actor-critic (A3C) algorithm (<xref ref-type="bibr" rid="B26">Mnih et&#x20;al., 2016</xref>), which is one of the most representative algorithms for DRL. Every 0.5&#xa0;s, the actor selects an action from the action space <italic>A</italic>, which is composed of three discrete actions: accelerate forward, turn clockwise, and turn counterclockwise.</p>
<p>The environmental state <italic>s</italic>
<sub>
<italic>t</italic>
</sub> is composed of the locations of the apple and pear, the area that is within the observer&#x2019;s sight, and the actor&#x2019;s state, which includes the actor&#x2019;s location, velocity, and direction as well as the area within the actor&#x2019;s sight. In this paper, only the actor&#x2019;s state changes over&#x20;time.</p>
<p>The human observer does not move and observes the environment from a fixed viewpoint. The observer can acquire only information that is in his/her field of view. That is, the observer can know where a fruit is only if s/he can see it. Similarly, the actor&#x2019;s location, velocity, and direction are provided when the observer can see the actor, but none of the above is provided when the actor is out of the observer&#x2019;s field of&#x20;view.</p>
</sec>
<sec id="s4-2">
<title>4.2 PublicSelf Model</title>
<p>
<xref ref-type="other" rid="alg2">Algorithm 2</xref> shows how <xref ref-type="disp-formula" rid="e2">Eq. 2</xref> is calculated in our implementation. There are infinite possible environmental states because <italic>s</italic>
<sub>
<italic>t</italic>
</sub> consists of continuous values and is subject to uncertainty due to partial observability. We solve the problem by sampling possible states to simplify the state space, which is analogous to the sequential Monte Carlo method (<xref ref-type="bibr" rid="B6">Doucet et&#x20;al., 2001</xref>). We first randomly sample <italic>n</italic> states <italic>s</italic>
<sub>0,0</sub>, <italic>s</italic>
<sub>0,1</sub>, &#x2026;, <italic>s</italic>
<sub>0,<italic>n</italic>&#x2212;1</sub> from among the possible states. The implementation of PublicSelf is based on the SDS filtering concept (see <xref ref-type="sec" rid="s2-4">Section 2.4</xref>). PublicSelf includes four filters for inferring the beliefs and desires that the observer will attribute to the actor:<disp-formula id="equ3">
<mml:math id="m13">
<mml:mtable class="eqnarray-star">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x221d;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:msup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x221d;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x221d;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x221d;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
</p>
<p>The delta function <italic>&#x3b4;</italic>(<italic>&#x3b1;</italic>, <italic>&#x3b2;</italic>) returns a value of 1 when <italic>&#x3b1;</italic> &#x3d; <italic>&#x3b2;</italic> and a value of 0 otherwise. <italic>B</italic>
<sup>
<italic>obs</italic>
</sup>(<italic>s</italic>
<sub>:<italic>t</italic>
</sub>) and <italic>B</italic>
<sup>
<italic>act</italic>
</sup>(<italic>s</italic>
<sub>:<italic>t</italic>
</sub>) return the observer&#x2019;s and actor&#x2019;s beliefs, respectively, given the history of the environmental states. Here, for simplicity, we will suppose that the transitioning of the environmental state is a Markov process and <italic>s</italic>
<sub>:<italic>t</italic>
</sub> can be denoted as <italic>s</italic>
<sub>
<italic>t</italic>
</sub>. <inline-formula id="inf8">
<mml:math id="m14">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the actor&#x2019;s belief regarding the environment. <inline-formula id="inf9">
<mml:math id="m15">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the probability that the observer will believe that the environmental state is <inline-formula id="inf10">
<mml:math id="m16">
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> when the actual environmental state is <italic>s</italic>
<sub>
<italic>t</italic>
</sub>. <inline-formula id="inf11">
<mml:math id="m17">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the probability that the observer will infer that the actor believes that the environmental state is <inline-formula id="inf12">
<mml:math id="m18">
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> when the actual state is <italic>s</italic>
<sub>
<italic>t</italic>
</sub>. <inline-formula id="inf13">
<mml:math id="m19">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represents the probability with which desire <italic>d</italic>
<sup>2</sup> will be attributed to the actor by the observer given state <italic>s</italic>
<sub>
<italic>t</italic>
</sub>. These filters are initialized as uniform distributions.</p>
<p>Based on the actor&#x2019;s observation <italic>o</italic>
<sub>
<italic>t</italic>
</sub>, <inline-formula id="inf14">
<mml:math id="m20">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is updated by multiplying it by the observation probability <italic>P</italic>(<italic>o</italic>
<sub>
<italic>t</italic>
</sub>&#x7c;<italic>s</italic>
<sub>
<italic>t</italic>
</sub>) because <italic>P</italic>(<italic>s</italic>
<sub>
<italic>t</italic>
</sub>&#x7c;<italic>o</italic>
<sub>
<italic>t</italic>
</sub>) &#x221d; <italic>P</italic>(<italic>s</italic>
<sub>
<italic>t</italic>
</sub>&#x7c;<italic>o</italic>
<sub>
<italic>t</italic>&#x2212;1</sub>) &#x22c5; <italic>P</italic>(<italic>o</italic>
<sub>
<italic>t</italic>
</sub>&#x7c;<italic>s</italic>
<sub>
<italic>t</italic>
</sub>). We can similarly update <inline-formula id="inf15">
<mml:math id="m21">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf16">
<mml:math id="m22">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> by estimating the actor&#x2019;s and observer&#x2019;s observations <italic>o</italic>
<sup>1</sup> and <italic>o</italic>
<sup>2</sup> under each <italic>s</italic>
<sub>
<italic>t</italic>
</sub> and multiplying them by the observation probabilities <inline-formula id="inf17">
<mml:math id="m23">
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf18">
<mml:math id="m24">
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>:<disp-formula id="equ4">
<mml:math id="m25">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x2190;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x2190;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x2190;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>where <italic>O</italic>
<sup>
<italic>obs</italic>
</sup>(<italic>s</italic>
<sub>
<italic>t</italic>
</sub>) and <italic>O</italic>
<sup>
<italic>act</italic>
</sup>(<italic>s</italic>
<sub>
<italic>t</italic>
</sub>) return the observer&#x2019;s and actor&#x2019;s observations, respectively, under&#x20;<italic>s</italic>
<sub>
<italic>t</italic>
</sub>.</p>
<p>During the update process, an <italic>s</italic>&#x20;&#x2208; <italic>S</italic>
<sub>
<italic>t</italic>
</sub> may appear such that <inline-formula id="inf19">
<mml:math id="m26">
<mml:msub>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:math>
</inline-formula>, which means that the observer no longer thinks that the actor holds any belief about <italic>s</italic> at all. We resample <italic>S</italic>
<sub>
<italic>t</italic>
</sub> by removing such states <italic>s</italic> with zero probability and making branches of samples with high probability.</p>
<p>The actor&#x2019;s state branches depending on the actor&#x2019;s choice of action. Let <inline-formula id="inf20">
<mml:math id="m27">
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> be a set of predicted environmental states to which the actor&#x2019;s action <italic>a</italic>
<sub>
<italic>t</italic>
</sub> will lead from <italic>s</italic>&#x20;&#x2208; <italic>S</italic>
<sub>
<italic>t</italic>
</sub>, and let <italic>S</italic>
<sub>
<italic>t</italic>&#x2b;1</sub> be the union of the states predicted under each action, <inline-formula id="inf21">
<mml:math id="m28">
<mml:msub>
<mml:mrow>
<mml:mo>&#x22c3;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>. The function <italic>Pred</italic> : <italic>S</italic>
<sub>
<italic>t</italic>
</sub> &#xd7; <italic>A</italic>&#x20;&#x2192; <italic>S</italic>
<sub>
<italic>t</italic>&#x2b;1</sub> returns the states at time <italic>t</italic>&#x20;&#x2b; 1 to which the environment transitions with each action from <italic>S</italic>
<sub>
<italic>t</italic>
</sub>. In this study, we trained a model for <italic>Pred</italic> by means of supervised learning. The new values of the filters &#x3a6;<sub>
<italic>t</italic>&#x2b;1</sub> are inherited from the previous values:<disp-formula id="equ5">
<mml:math id="m29">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x2190;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x2190;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x2190;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
</p>
<p>We assume that the actor can have only two intentions, namely, <italic>&#x3b9;</italic>
<sub>
<italic>a</italic>
</sub> and <italic>&#x3b9;</italic>
<sub>
<italic>p</italic>
</sub>, which are the intention to retrieve an apple and that to retrieve a pear, respectively. We also consider that the desire to retrieve a fruit directly generates the intention to retrieve it, that is, <inline-formula id="inf22">
<mml:math id="m30">
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula>, where <italic>d</italic>
<sub>
<italic>a</italic>
</sub> and <italic>d</italic>
<sub>
<italic>p</italic>
</sub> are the utility functions when the actor&#x2019;s target is an apple and a pear, respectively. Then, <inline-formula id="inf23">
<mml:math id="m31">
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> simplifies to <inline-formula id="inf24">
<mml:math id="m32">
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, which can be estimated with a model-free RL algorithm.<disp-formula id="equ6">
<mml:math id="m33">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2190;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>To generate legible motion, we need to determine <italic>P</italic>(<italic>&#x3b9;</italic>
<sup>2</sup>&#x7c;<italic>a</italic>), the probability that the observer will attribute an intention <italic>&#x3b9;</italic>
<sup>2</sup> to the actor given action <italic>a</italic>. In our implementation, we equate <italic>&#x3b9;</italic>
<sup>2</sup> with <italic>d</italic>
<sup>2</sup> and calculate <italic>P</italic>(<italic>d</italic>
<sup>2</sup>&#x7c;<italic>a</italic>) instead of <italic>P</italic>(<italic>&#x3b9;</italic>
<sup>2</sup>&#x7c;<italic>a</italic>). <italic>P</italic>(<italic>d</italic>
<sup>2</sup>&#x7c;<italic>a</italic>) can be obtained from the four filters as follows:<disp-formula id="equ7">
<mml:math id="m34">
<mml:mtable class="align-star" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mo>&#x3d;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2033;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2033;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2033;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
</p>
</sec>
<sec id="s4-3">
<title>4.3 Generating FalseProjective Legible Motion</title>
<p>Since the PublicSelf model infers the actor&#x2019;s own belief <italic>b</italic>
<sup>0</sup>, PublicSelf can also be used to generate FalseProjective legible motion. A filter <inline-formula id="inf25">
<mml:math id="m35">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, which represents the probability of an actor&#x2019;s own desire <italic>d</italic>
<sup>0</sup> estimated independently of the observer, enables us to generate FalseProjective legible motion in a manner similar to the generation of PublicSelf legible motion.</p>
</sec>
<sec id="s4-4">
<title>4.4 Generated Motion</title>
<p>Here, we present the motion generated in the example FetchFruit scenarios. We consider five example scenarios: Center, Side-Visible, Side-Invisible, Blind-Inside, and Blind-Outside. Table ?? summarizes the settings for these scenarios, which cover all 2 &#xd7; 2 possibilities with regard to whether the observer can see the actor&#x2019;s target and/or the nontarget objects. In every scenario, the target and nontarget objects are adjacent to each other. The actor is assumed to observe the positions of both fruits from its initial position; consequently, the actor does not need to explore the environment but simply moves to its target. We generated three types of motion in each of the example scenarios: the original motion, the FalseProjective legible motion, and the PublicSelf legible motion.</p>
<p>In the Center scenario (<xref ref-type="fig" rid="F4">Figure&#x20;4</xref>), the apple and pear are immediately in front of the observer. It is difficult to quickly infer the actor&#x2019;s intention from the original motion because the actor first moves straight to the point between the apple and the pear (<xref ref-type="fig" rid="F4">Figure&#x20;4A</xref>), whereas the FalseProjective motion enables the observer to infer the actor&#x2019;s intention more quickly because of the agent&#x2019;s curved movement toward its actual target (<xref ref-type="fig" rid="F4">Figure&#x20;4B</xref>). The PublicSelf legible motion follows the same route as the FalseProjective motion.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Motion in the Center scenario. Blue: Original. Orange: FalseProjective. Green: PublicSelf. FalseProjective and PublicSelf let the observer correctly infer the actor&#x2019;s target by showing the curved movement from the beginning. <bold>(A)</bold>Original motion. <bold>(B)</bold>FalseProjective/PublicSelf. <bold>(C)</bold>Bird&#x2019;s-eye&#x20;view.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g004.tif"/>
</fig>
<p>In the Side-Visible scenario (<xref ref-type="fig" rid="F5">Figure&#x20;5</xref>), the human observer can observe only the apple. With the original motion trajectory, the actor moves directly to the apple. Because the observer cannot see the pear next to the apple, the original motion presents much less ambiguity than in the Center scenario. Here, PublicSelf generates the same trajectory as that for the original motion. The FalseProjective motion, on the other hand, follows a different trajectory; the actor first moves forward toward the observer and then follows a curved trajectory toward the apple. This motion would avoid ambiguity if the observer knew the location of the pear; however, due to the observer&#x2019;s limited view, this motion may instead lead the observer to believe that the actor&#x2019;s target is behind him or her; thus, the actor&#x2019;s intention is less clear in this case than it would be if the actor moved directly to the&#x20;apple.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Motion in the Side-Visible scenario. FalseProjective shows a curved movement to avoid being considered that the actor intends to retrieve the pear, but is less effective than the straightforward movement of original and PublicSelf due to the observer&#x2019;s limited view. <bold>(A)</bold>Original motion/PublicSelf. <bold>(B)</bold>FalseProjective. <bold>(C)</bold>Bird&#x2019;s-eye&#x20;view.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g005.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F6">Figure&#x20;6</xref> shows the results in the Side-invisible scenario. With the original motion trajectory, the actor first begins to move to the space between the apple and pear and then shows a gently curved motion. Early motion could mislead the observer into thinking that the actor intends to retrieve the pear, whereas through the PublicSelf legible motion, the actor can convey that the pear is less likely to be its target by making a detour to avoid causing the observer to misunderstand the actor&#x2019;s intention. Here, the PublicSelf legible motion is the same as the FalseProjective motion.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Motion in the Side-Invisible scenario. FalseProjective and PublicSelf motion trajectories successfully avoid misleading the observer into considering that the actor is moving toward the pear. <bold>(A)</bold>Original motion. <bold>(B)</bold>FalseProjective/PublicSelf. <bold>(C)</bold>Bird&#x2019;s-eye&#x20;view.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g006.tif"/>
</fig>
<p>In the two Blind scenarios (<xref ref-type="fig" rid="F7">Figures 7</xref>, <xref ref-type="fig" rid="F8">8</xref>), the actor&#x2019;s motion does not convey any information to the observer, who does not know the location of either the apple or the pear; therefore, the FalseProjective motion merely introduces a detour and increases the time required to retrieve the apple. By contrast, the PublicSelf legible motion does not change the actor&#x2019;s actions because PublicSelf can infer that changing the motion would have no effect on the observer&#x2019;s inference.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Motion in the Blind-Inside scenario. Because the observer cannot see any target candidates, the detour of FalseProjective provides no information about the actor&#x2019;s target. <bold>(A)</bold>Original motion/PublicSelf. <bold>(B)</bold>FalseProjective. <bold>(C)</bold>Bird&#x2019;s-eye&#x20;view.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g007.tif"/>
</fig>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Motion in the Blind-Outside scenario. <bold>(A)</bold>Original motion/PublicSelf. <bold>(B)</bold>FalseProjective. <bold>(C)</bold>Bird&#x2019;s-eye&#x20;view.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g008.tif"/>
</fig>
</sec>
</sec>
<sec id="s5">
<title>5 Experiments</title>
<sec id="s5-1">
<title>5.1 Simulation Study</title>
<sec id="s5-1-1">
<title>5.1.1 Overview</title>
<p>The simulation study aimed to investigate the scalability of PublicSelf legible motions. We prepared additional FetchFruit scenarios beyond the examples in <xref ref-type="sec" rid="s4-4">Section 4.4</xref>, and compared the legibility of the three motion types using artificial observers that were trained to classify an actor&#x2019;s intentions from observations of its motion. We also analyzed the effects of information asymmetry on generating legible motions to investigate whether PublicSelf legible motion could effectively handle information asymmetry.</p>
</sec>
<sec id="s5-1-2">
<title>5.1.2 Procedure</title>
<p>We built two FetchFruit datasets for 1) training the artificial observers and 2) evaluating PublicSelf legible motion. Both are composed of the captured motions of an actor agent from the observer&#x2019;s viewpoint and ground-truth labels of the object that the actor intended to retrieve. An apple and a pear were randomly spawned within the field of view of the actor agent, but they were not necessarily within the observer&#x2019;s field of view. The datasets do not include conditions in which both an apple and a pear are in the observer&#x2019;s field of view because an observer cannot distinguish an actor&#x2019;s intention in such conditions. We prepared all three motion patterns for each fruit&#x2019;s position conditions. For the training datasets, we used only trials in which all three types of motions were completely identical to eliminate bias with regard to the motion type; 1,847 conditions satisfied this requirement. For the evaluation of PublicSelf, we excluded conditions in which the three motion patterns were all identical to focus on the differences between the motions. We acquired 695 conditions for the evaluation dataset.</p>
<p>The artificial observers were deep-learning classification models that were composed of convolutional layers, a long short-term memory layer, and fully connected layers. They were trained to infer whether the actor&#x2019;s target was the apple or pear based on sequences of captured images as a supervised learning problem using the training dataset.</p>
<p>We used the average probability of the inference of five classification models as the score for the legibility of each trajectory. The interrater reliability of the classification models was 0.936 (ICC(3,&#x20;<italic>k</italic>)).</p>
<p>Depending on the trajectories, the lengths of different trials could be different even when the initial positions were the same. Therefore, to compare different motion types under the same condition, we aligned the lengths of all trials with the same initial position by truncating them at the time when the shortest trial&#x20;ended.</p>
</sec>
<sec id="s5-1-3">
<title>5.1.3 Hypotheses</title>
<p>We expected that PublicSelf legible motions could adaptively handle various scenarios and thus the artificial observers would be able to infer the actor&#x2019;s intentions more accurately from PublicSelf than from other motion types:</p>
<p>
<bold>H1</bold> Legibility scores of PublicSelf are higher than those of the other motions.</p>
<p>In particular, we expected that PublicSelf would show better performance than FalseProjective in&#x20;situations with information asymmetry but not in symmetric situations if the differences in observations and beliefs between the actor and the observer are a key factor for generating legible motion in&#x20;situations with information asymmetry, and PublicSelf could successfully capture&#x20;it.</p>
<p>
<bold>H2</bold> Legibility scores of PublicSelf are higher than those of FalseProjective in&#x20;situations <italic>with</italic> information asymmetry but show no differences in&#x20;situations <italic>without</italic> information asymmetry.</p>
<p>Here, in the FetchFruit task, information asymmetry refers to the situation in which the observer does not know the locations of both an apple and a pear while the actor&#x20;does.</p>
</sec>
<sec id="s5-1-4">
<title>5.1.4 Results</title>
<p>
<bold>R1</bold> <xref ref-type="fig" rid="F9">Figure&#x20;9</xref> shows the averaged legibility scores for each motion type. Overall, PublicSelf legible motion showed higher scores than the other motions (3 &#x2264; <italic>t</italic>) except for time step <italic>t</italic>&#x20;&#x3d; 1, 2, where original motion scored the highest. For statistical analysis, we conducted Friedman test to compare the motion types for each time step. Because multiple testing inflates the type I error rate, <italic>p</italic> values were adjusted with the Holm-Sidak method. The results showed statistical significances in motion type at 3 &#x2264; <italic>t</italic>&#x20;&#x2264; 12 (<italic>p</italic>&#x20;&#x3d; 0.036 at <italic>t</italic>&#x20;&#x3d; 3 and <italic>p</italic>&#x20;&#x3c; 0.01 at 4 &#x2264; <italic>t</italic>). As post hoc analysis, we conducted multiple comparisons among the three motions for each time step using Wilcoxon signed-rank tests with the Holm-Sidak adjustments. <xref ref-type="fig" rid="F9">Figure&#x20;9</xref> shows the results of the post hoc comparisons. Both FalseProjective and PublicSelf recorded significantly higher scores than the original motion at 4 &#x2264; <italic>t</italic>&#x20;&#x2264; 12, and the scores of PublicSelf were significantly higher than those of FalseProjective at 6 &#x2264; <italic>t</italic>&#x20;&#x2264; 10 and marginally significant at <italic>t</italic>&#x20;&#x3d; 5. The maximum effect sizes of the Wilcoxon signed-rank test <italic>r</italic> was 0.23 at <italic>t</italic>&#x20;&#x3d; 12 between original and FalseProjective, 0.29 at <italic>t</italic>&#x20;&#x3d; 12 between original and PublicSelf, and 0.17 at <italic>t</italic>&#x20;&#x3d; 9 between FalseProjective and PublicSelf. From these results, we considered that we can accept&#x20;<bold>H1</bold>.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Changes in scores for each motion. Left: Original, Center: FalseProjective, Right: PublicSelf. X marks indicate the mean values. The symbols represent the results of multiple paired t-tests (&#x2a;&#x2a;: <italic>p</italic>&#x20;&#x3c; 0.01, &#x2a;: <italic>p</italic>&#x20;&#x3c; 0.05, &#x2020;: <italic>p</italic>&#x20;&#x3c; 0.1). On average, PublicSelf scored higher than the other motions.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g009.tif"/>
</fig>
<p>
<bold>R2</bold> We further investigated the effect of information asymmetry on the results. <xref ref-type="fig" rid="F10">Figures 10</xref>, <xref ref-type="fig" rid="F11">11</xref> illustrate the differences in scores between FalseProjective and PublicSelf in&#x20;situations with and without information asymmetry. In situations with information asymmetry, PublicSelf recorded better scores than FalseProjective, and Wilcoxon signed-rank tests revealed that there were significant differences in 5 &#x2264; <italic>t</italic>&#x20;&#x2264; 10. However, there was little difference in&#x20;situations without information asymmetry. We found no significant differences between the two motions. These results support our hypothesis&#x20;<bold>H2</bold>.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Legibility scores of FalseProjective and PublicSelf in&#x20;situations <italic>with</italic> information asymmetry. Left: FalseProjective, Right: PublicSelf.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g010.tif"/>
</fig>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Legibility scores of FalseProjective and PublicSelf in&#x20;situations <italic>without</italic> information asymmetry. No significant difference was found between FalseProjective and PublicSelf.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g011.tif"/>
</fig>
</sec>
<sec id="s5-1-5">
<title>5.1.5 Summary of Simulation Study</title>
<p>The first results (<bold>R1</bold>) demonstrated that PublicSelf scored higher than the other two motions for the large dataset. This result supports <bold>H1</bold> and suggests that PublicSelf legible motion could work robustly even in scenarios other than the examples. In addition, the second results (<bold>R2</bold>) supported <bold>H2</bold>. We found significant improvements from PublicSelf compared to FalseProjective in&#x20;situations with information asymmetry but did not in&#x20;situations without information asymmetry. From this result, we conclude that information asymmetry is a critical factor when conveying intentions with motions and that we can successfully address it by explicitly introducing the differences between the observations and beliefs of the actor and those of the observer to the&#x20;model.</p>
</sec>
</sec>
<sec id="s5-2">
<title>5.2 User Study</title>
<sec id="s5-2-1">
<title>5.2.1 Overview</title>
<p>In a user study, we investigated human inference of an actor agent&#x2019;s mind against the three motion types to verify that PublicSelf legible motion is effective for human observers. We compared the accuracy of the inferences among the three motion types. We also looked into the participants&#x2019; psychological perceptions of each motion type with a simple questionnaire.</p>
</sec>
<sec id="s5-2-2">
<title>5.2.2 Procedure</title>
<p>Twelve undergraduate and graduate students (6 female and 6 male; aged 20&#x2013;24, <italic>M</italic>&#x20;&#x3d; 22.6, SD &#x3d; 1.83) were recruited with compensation of 750 JPY and asked to predict whether an actor would reach an apple or a pear while observing its movement. A user interface displayed the actor&#x2019;s motion at ten frames per second, and the participants observed the actor and pushed the F key or the J key to, respectively, indicate whether they believed that the actor intended to retrieve the apple or the pear. The participants could also express that they could not determine which fruit the actor intended to retrieve by pushing neither key. The correspondence between the keys and the answers was randomly chosen for each participant.</p>
<p>Before the experiment, we instructed the participants that there would always be one apple and one pear at random locations in the room, while the initial positions of the observer and actor would be fixed. We also told them that the goal of the actor would be determined randomly for each scenario and that the actor might intend to retrieve either&#x20;fruit.</p>
<p>After familiarization, the participants were presented with nine scenarios for each motion type: original, FalseProjective, and PublicSelf. The order effect was fully counterbalanced. These nine scenarios included the example scenarios presented in <xref ref-type="fig" rid="F4">Figures 4</xref>&#x2013;<xref ref-type="fig" rid="F8">8</xref>, and the other scenarios were fake scenarios in which the locations of the apple and pear were randomized. We included these fake scenarios to decrease the possibility that the participants would notice that they were being presented with the same scenarios for each motion&#x20;type.</p>
<p>We collected subjective measures by means of a simple questionnaire after the inference session by asking three Likert-scale questions:</p>
<p>Q1. It was easy to predict which fruit the agent was going to retrieve (Legibility&#x20;1).</p>
<p>Q2. The agent moved in a manner that made its intention clear (Legibility&#x20;2).</p>
<p>Q3. The agent&#x2019;s behavior was consistent (Consistency).</p>
<p>Q1 and Q2 were questions adopted in a previous legible motion study (<xref ref-type="bibr" rid="B7">Dragan et&#x20;al., 2015a</xref>) to ask participants whether they thought the actor&#x2019;s motion was legible. We added the question about the consistency of the agent&#x2019;s behavior to investigate the observers&#x2019; perceptions of the unique behavior of FalseProjective legible motions in Side-Visible because we hypothesized that FalseProjective&#x2019;s roundabout behavior would be perceived as inconstant from the observer&#x2019;s perspective.</p>
</sec>
<sec id="s5-2-3">
<title>5.2.3 Hypotheses</title>
<p>We investigated the results for the human inferences using two metrics: rapidity and accuracy. Higher rapidity means that the motion presents less ambiguity, thus the participants required less time to infer the actor&#x2019;s true intention. Higher accuracy means that the motion resulted in fewer incorrect inferences.</p>
<p>
<xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T2">2</xref> summarize our hypotheses on human inferences. In the Center scenario, we hypothesized that FalseProjective and PublicSelf legible motions will result in better performance in the rapidity metrics because they present much less ambiguity than original motion does. On the other hand, we felt that there would be no difference in the accuracy metrics because none of the motions would mislead observers to make wrong inferences. In the Side-Visible scenario, we considered that FalseProjective, which acted with extra consideration of the actor and the non-target object, would be less rapid than the original and PublicSelf legible motions, which showed straightforward movements to the actor&#x2019;s target object. In addition, we expected that FalseProjective motion&#x2019;s early movement would mislead the observers to think that the actor was ignoring the target object and therefore would result in lower accuracy. A similar hypothesis about the accuracy metrics was formulated for Side-Invisible, in which the original motion could lead to incorrect inferences. We also considered that FalseProjective and PublicSelf legible motion would enable observers to rapidly infer the actor&#x2019;s correct intention by exaggerating that the object in the actor&#x2019;s view was not the target.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Hypothses on the <italic>rapidity</italic> measure.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Original</th>
<th align="center">FalseProjective</th>
<th align="center">PublicSelf</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Center</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
</tr>
<tr>
<td align="left">Side-Visible</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2713;</td>
</tr>
<tr>
<td align="left">Side-Invisible</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Hypothses on the <italic>accuracy</italic> measure.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Original</th>
<th align="center">FalseProjective</th>
<th align="center">PublicSelf</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Center</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
</tr>
<tr>
<td align="left">Side-Visible</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2713;</td>
</tr>
<tr>
<td align="left">Side-Invisible</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In terms of the subjective metrics, we expected that the PublicSelf legible motion would earn the best legibility scores among the three motion types. We assumed that original motion would earn the worst legibility because of its ambiguous nature. The FalseProjective legible motion was expected to be better than the original motion from the perspective of ambiguity but worse than the PublicSelf legible motion due to the unnecessary detours in the Side-Invisible and Blind-Inside scenarios. We also considered that FalseProjective legible motion would earn worse consistency scores than the others because the detours that mislead participants for wrong inferences could be perceived as inconstant from the observer&#x2019;s perspective. We did not expect that participants would sense inconsistency in original motion because it provided only ambiguity and did not mislead observers.</p>
</sec>
<sec id="s5-2-4">
<title>5.2.4 Results</title>
<p>
<xref ref-type="fig" rid="F12">Figure&#x20;12</xref> shows the results for the participants&#x2019; inferences in the Center, Side-Visible, and Side-Invisible scenarios. Here, we adopted the number of correct answers as the measure of rapidity, and the number of wrong answers as the measure of accuracy. <xref ref-type="table" rid="T3">Tables 3</xref>, <xref ref-type="table" rid="T4">4</xref> summarize the results for our hypotheses.</p>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Participants&#x2019; inference. The upper lines show the percentages of participants whose answers were correct and indicate the rapidity of their inference. The lower shows incorrect cases, indicating the accuracy measure. PublicSelf enabled participants to both correctly and rapidly infer the actor&#x2019;s intentions in the three episodes.<bold>(A)</bold>Center. <bold>(B)</bold>Side-Visible. <bold>(C)</bold>Side-Invisible.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g012.tif"/>
</fig>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Results for the <italic>rapidity</italic> measure.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Original</th>
<th align="center">FalseProjective</th>
<th align="center">PublicSelf</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Center</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
</tr>
<tr>
<td align="left">Side-Visible</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2713;</td>
</tr>
<tr>
<td align="left">Side-Invisible</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2717;</td>
<td align="center">&#x2713;</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Results for the <italic>accuracy</italic> measure.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Original</th>
<th align="center">FalseProjective</th>
<th align="center">PublicSelf</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Center</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
</tr>
<tr>
<td align="left">Side-Visible</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
<td align="center">&#x2713;</td>
</tr>
<tr>
<td align="left">Side-Invisible</td>
<td align="center">&#x2717;</td>
<td align="center">&#x25b3;</td>
<td align="center">&#x25b3;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Let us first focus on the rapidity measures. The results for the Center scenario were consistent with our expectations. The FalseProjective and PublicSelf legible motion trajectories could lead the participants to rapidly comprehend the actor&#x2019;s true intention, while for the original motion, it was not until the actor turned toward its target that the participants comprehended the actor&#x2019;s intention. The results in Side-Visible also supported our hypothesis. The FalseProjective legible motion failed to let participants infer the actor&#x2019;s intention rapidly, while participants identified correct intentions more rapidly when provided original and PublicSelf motions. In Side-Invisible, PublicSelf allowed more rapid inference than original, but unexpectedly, FalseProjective could not improve rapidity. A possible explanation of this result is that participants became careful and held their judgments while watching FalseProjective motions. Two participants reported that they felt FalseProjective motions were somehow roundabout, while no participant mentioned PublicSelf detours. Such perception can delay a participant&#x2019;s presentation of their judgment, resulting in less rapidity. In summary, 1) PublicSelf enabled human participants to rapidly infer the actor&#x2019;s intention by considering information asymmetry between the actor and observer. 2) FalseProjective motion&#x2019;s roundabout behavior seemed to make participants delay presentation of their judgments.</p>
<p>From the perspective of accuracy, few answers were wrong in the Center and Side-Visible scenarios for each motion type, and we find very few wrong answers for FalseProjective. This result is against our prior hypothesis but supports suggestion 2) in the last paragraph. That is, participants avoided wrong answers by delaying presentation of their judgment. In Side-Invisible, the original motion led to incorrect inferences, and all participants presented wrong judgments in 2,000&#xa0;ms. On the other hand, the FalseProjective and PublicSelf motion trajectories did reduce the number of incorrect answers compared to the original motion, although some participants still had wrong answers. In summary, 3) FalseProjective and PublicSelf could reduce the number of wrong inferences compared to the original motion. 4) In terms of the accuracy of human observers&#x2019; inferences, we did not find differences between FalseProjective and PublicSelf.</p>
<p>
<xref ref-type="fig" rid="F13">Figure&#x20;13</xref> shows the results for the subjective measures. The one-way repeated measures ANOVA showed that there were significant differences between the motion types for Q1 (<italic>F</italic>(2, 22) &#x3d; 4.52, <italic>p</italic>&#x20;&#x3c; 0.05, <italic>&#x3b7;</italic>
<sup>2</sup> &#x3d; 0.21) and Q2 (<italic>F</italic>(2, 22) &#x3d; 4.83, <italic>p</italic>&#x20;&#x3c; 0.05, <italic>&#x3b7;</italic>
<sup>2</sup> &#x3d; 0.26). For Q1 and Q2, post hoc Tukey tests revealed that the FalseProjective and PublicSelf legible motion trajectories were rated significantly higher than the original motion trajectories (<italic>p</italic>&#x20;&#x3c; 0.05). Two participants reported that the original motion in the Center scenario gave a strongly negative impression due to its illegibility. Contrary to our expectations, we did not find a significant difference between the FalseProjective and PublicSelf trajectories. Although two participants provided negative comments on FalseProjective&#x2019;s roundabout behaviors in the Side-Invisible and Blind scenarios, it seems that their effects were limited compared to FalseProjective&#x2019;s successful behavior in Center and Side-Visible. Similarly, the results did not show that the participants thought FalseProjective was inconsistent compared to the other motions. As a result, the subjective measures demonstrated participants&#x2019; positive perceptions toward both FalseProjective and PublicSelf motions.</p>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>Subjective measures. The error bars indicate standard errors. Both FalseProjective and PublicSelf earned higher scores in the legibility measures, but no statistical difference was found between FalseProjective and PublicSelf.</p>
</caption>
<graphic xlink:href="frobt-09-783863-g013.tif"/>
</fig>
</sec>
<sec id="s5-2-5">
<title>5.2.5 Summary of User Study</title>
<p>We found that legible motion generated with awareness of information asymmetry between an actor and an observer could successfully allow human observers to <italic>rapidly</italic> infer the actor&#x2019;s intention compared to original motion, which does not consider human inference of the actor agent&#x2019;s mind, and FalseProjective legible motion, which ignores information asymmetry. We did not find that FalseProjective motion misled human observers to wrong inferences, but the results suggest that FalseProjective&#x2019;s roundabout behavior made the participants more cautious, which delayed their judgments.</p>
</sec>
</sec>
</sec>
<sec id="s6">
<title>6 Future Work</title>
<p>The legible motion generated with PublicSelf has been shown to be effective in the simple situations presented here, which indicates that given awareness of the information asymmetry between an artificial agent and a human observer, PublicSelf can successfully convey an actor&#x2019;s certain intention. However, further evaluation is required to discuss the robustness of PublicSelf legible motion. Although the simulation study over 2,500 conditions is complementary to the user study with a small number of conditions, the data-driven artificial observers have an inference structure different from people, which can yield different results. For example, in our previous study (<xref ref-type="bibr" rid="B14">Fukuchi et&#x20;al., 2018</xref>), people occasionally doubted the assumption that the actor intends to get either an apple or a pear and lost confidence on their answers, which never happens in simulation. Moreover, people can be affected by former trials while the artificial observers does not change after training phase. In this paper, we controlled such effect by randomizing the order of episodes and providing a small number of episodes, but to build an agent that can develop a long-term relationship with users, we need continuous experiments with more episodes. The small number and limited demographic variety of participants are also limitaions of the user&#x20;study.</p>
<p>In addition, extensions of our method will be required for practical applications. One possible challenge is the calculations in PublicSelf. In this paper, we assumed the observer to be doing nothing other than observing the actor&#x2019;s behavior. However, in an actual human-agent cooperation scenario, the humans involved also move, perform actions, and affect the environment, thereby making predictions of human observations and environmental transitions much more difficult. Another problem is the assumption of initial knowledge. The actor agent was assumed to always know the locations of the apple and pear, but in actuality, the actor, as well as the observer, will typically be subject to uncertainty. Although PublicSelf theoretically should also work in such situations, the strategy for exhibiting publicly self-aware behavior becomes more complex. For example, the actor must judge whether generating publicly self-aware behavior is possible in a given situation. Placing obstacles in an actor&#x2019;s path can highlight interesing aspects of the strategy. It dramatically increases the complexity of an observer&#x2019;s expectations about the environment or agents such as the area that an agent can perceive or possible paths that an actor can choose. An actor may need feedbacks to know such expectations or communication to align them. Tuning the thresholds for balancing the improvement of legibility and the pursuit of an actor&#x2019;s true goal (<xref ref-type="other" rid="alg1">Algorithm 1</xref>) was done by hand in this paper. However, it will be a problem because the best threshold can differ depending on the situation or the importance of conveying an intention against pursuing it. Reinforcement learning can be a promising approach to enable an actor to balance them automatically.</p>
</sec>
<sec sec-type="conclusion" id="s7">
<title>7 Conclusion</title>
<p>This paper focused on conveying an artificial agent&#x2019;s certain intentions by motions. The main claim of this paper was that it is important to handle information asymmetry between an actor and its observer. To formalize this idea, we developed a method for generating motions that convey an agent&#x2019;s intention with the awareness of information asymmetry using our previously proposed PublicSelf model. We conducted a simulation study and a user study to validate our claim. In both experiments, we focused on legible motion, which conveys an actor&#x2019;s true intention to its observer. We compared PublicSelf&#x2019;s legible motion with FalseProjective motion, which was generated without considering information asymmetry in an approach similar to those taken in previous studies. As a result, PublicSelf legible motion could successfully allow observers to quickly infer an actor&#x2019;s intentions while FalseProjective sometimes compromised an observer&#x2019;s predictions of an actor&#x2019;s intentions in&#x20;situations with information asymmetry. This result suggests that by considering information asymmetry, an agent can more effectively convey intentions with motions.</p>
</sec>
</body>
<back>
<sec id="s8">
<title>Data Availability Statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s9">
<title>Ethics Statement</title>
<p>Ethical review and approval was not required for the study on human participants in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required for this study in accordance with the national legislation and the institutional requirements.</p>
</sec>
<sec id="s10">
<title>Author Contributions</title>
<p>All authors contributed to the conceptualization of this research. YF developed the software, conducted the experiments, and wrote the first draft in consultation with MO. HY, TT, and MI contributed to experimental designs, research discussions, and critical reviews.</p>
</sec>
<sec id="s11">
<title>Funding</title>
<p>This research is supported by the Research Grant of Keio Leading-edge Laboratory of Science and Technology and JST CREST Grant Number JPMJCR19A1, Japan.</p>
</sec>
<sec sec-type="COI-statement" id="s12">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baker</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Jara-Ettinger</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Saxe</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Tenenbaum</surname>
<given-names>J.&#x20;B.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Rational Quantitative Attribution of Beliefs, Desires and Percepts in Human Mentalizing</article-title>. <source>Nat. Hum. Behav</source>. <comment>1, 0064 EP</comment>. <pub-id pub-id-type="doi">10.1038/s41562-017-0064</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Cha</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Mataric</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Using Nonverbal Signals to Request Help during Human-Robot Collaboration</article-title>,&#x201d; in <conf-name>Proceeding of the 2016 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</conf-name>, <conf-loc>Daejeon, Korea (South)</conf-loc>, <conf-date>9-14 Oct. 2016</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>5070</fpage>&#x2013;<lpage>5076</lpage>. <pub-id pub-id-type="doi">10.1109/IROS.2016.7759744</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cohen</surname>
<given-names>P. R.</given-names>
</name>
<name>
<surname>Levesque</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>1990</year>). <article-title>Intention Is Choice with Commitment</article-title>. <source>Artif. Intelligence</source> <volume>42</volume>, <fpage>213</fpage>&#x2013;<lpage>261</lpage>. <pub-id pub-id-type="doi">10.1016/0004-3702(90)90055-5</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cruz</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Dazeley</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Vamplew</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Moreira</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Explainable Robotic Systems: Understanding Goal-Driven Actions in a Reinforcement Learning Scenario</article-title>. <source>Neural Comput. Appl</source>. <pub-id pub-id-type="doi">10.1007/s00521-021-06425-5</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dennett</surname>
<given-names>D. C.</given-names>
</name>
</person-group> (<year>1987</year>). <source>The Intentional Stance</source>. <publisher-loc>Cambridge, MA</publisher-loc>: <publisher-name>MIT Press</publisher-name>. </citation>
</ref>
<ref id="B6">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Doucet</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Freitas</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Gordon</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2001</year>). <source>An Introduction to Sequential Monte Carlo Methods</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer</publisher-name>, <fpage>3</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1007/978-1-4757-3437-9_1</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Dragan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bauman</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Forlizzi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Srinivasa</surname>
<given-names>S. S.</given-names>
</name>
</person-group> (<year>2015a</year>). &#x201c;<article-title>Effects of Robot Motion on Human-Robot Collaboration</article-title>,&#x201d; in <conf-name>Proceedings of the Tenth Annual ACM/IEEE International Conference on Human-Robot Interaction</conf-name>, <conf-loc>Portland, OR, USA</conf-loc>, <conf-date>2-5 March 2015</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>51</fpage>&#x2013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1145/2696454.2696473</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dragan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Holladay</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Srinivasa</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2015b</year>). <article-title>Deceptive Robot Motion: Synthesis, Analysis and Experiments</article-title>. <source>Auton. Robot</source> <volume>39</volume>, <fpage>331</fpage>&#x2013;<lpage>345</lpage>. <pub-id pub-id-type="doi">10.1007/s10514-015-9458-8</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dragan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Srinivasa</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Integrating Human Observer Inferences into Robot Motion Planning</article-title>. <source>Auton. Robot</source> <volume>37</volume>, <fpage>351</fpage>&#x2013;<lpage>368</lpage>. <pub-id pub-id-type="doi">10.1007/s10514-014-9408-x</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Duval</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wicklund</surname>
<given-names>R. A.</given-names>
</name>
</person-group> (<year>1972</year>). <source>A Theory of Objective Self-Awareness</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Academic Press</publisher-name>. </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Falewicz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bak</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Private vs. Public Self-Consciousness and Self-Discrepancies</article-title>. <source>cipp</source> <volume>1</volume>, <fpage>58</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.5114/cipp.2016.55762</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fenigstein</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Scheier</surname>
<given-names>M. F.</given-names>
</name>
<name>
<surname>Buss</surname>
<given-names>A. H.</given-names>
</name>
</person-group> (<year>1975</year>). <article-title>Public and Private Self-Consciousness: Assessment and Theory</article-title>. <source>J.&#x20;Consulting Clin. Psychol.</source> <volume>43</volume>, <fpage>522</fpage>&#x2013;<lpage>527</lpage>. <pub-id pub-id-type="doi">10.1037/h0076760</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Fukuchi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Osawa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yamakawa</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Imai</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Autonomous Self-Explanation of Behavior for Interactive Reinforcement Learning Agents</article-title>,&#x201d; in <conf-name>Proceedings of the 5th International Conference on Human Agent Interaction</conf-name>, <conf-loc>New York, NY, USA</conf-loc> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>97</fpage>&#x2013;<lpage>101</lpage>. <comment>HAI &#x2019;17</comment>. <pub-id pub-id-type="doi">10.1145/3125739.3125746</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Fukuchi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Osawa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yamakawa</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Takahashi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Imai</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Bayesian Inference of Self-Intention Attributed by Observer</article-title>,&#x201d; in <source>Proceedings of the 6th International Conference on Human-Agent Interaction</source> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>3</fpage>&#x2013;<lpage>10</lpage>. <comment>HAI &#x2019;18</comment>. <pub-id pub-id-type="doi">10.1145/3284432.3284438</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gergely</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>N&#xe1;dasdy</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Csibra</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>B&#xed;r&#xf3;</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Taking the Intentional Stance at 12&#x20;Months of Age</article-title>. <source>Cognition</source> <volume>56</volume>, <fpage>165</fpage>&#x2013;<lpage>193</lpage>. <pub-id pub-id-type="doi">10.1016/0010-0277(95)00661-H</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hayes</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Scassellati</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Challenges in Shared-Environment Human-Robot Collaboration</article-title>,&#x201d; in <source>Collaborative Manipulation Workshop at the ACM/IEEE International Conference on Human-Robot Interaction (HRI 2013)</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>, <volume>8</volume>, <fpage>9</fpage>. </citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hayes</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>J.&#x20;A.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Improving Robot Controller Transparency through Autonomous Policy Explanation</article-title>,&#x201d; in <conf-name>Proceeding pf the 2017&#x20;12th ACM/IEEE International Conference on Human-Robot Interaction (HRI)</conf-name>, <conf-loc>Vienna, Austria</conf-loc>, <conf-date>6-9 March 2017</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>303</fpage>&#x2013;<lpage>312</lpage>. <pub-id pub-id-type="doi">10.1145/2909824.3020233</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>C.-M.</given-names>
</name>
<name>
<surname>Mutlu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Anticipatory Robot Control for Efficient Human-Robot Collaboration</article-title>,&#x201d; in <conf-name>The Eleventh ACM/IEEE International Conference on Human Robot Interaction</conf-name>, <conf-loc>Christchurch, New&#x20;Zealand</conf-loc>, <conf-date>7-10 March 2016</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>83</fpage>&#x2013;<lpage>90</lpage>. <comment>HRI &#x2019;16</comment>. <pub-id pub-id-type="doi">10.1109/HRI.2016.7451737</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Iyer</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Lewis</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sundar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sycara</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Transparency and Explanation in Deep Reinforcement Learning Neural Networks</article-title>,&#x201d; in <source>Proceedings of the 2018 AAAI/ACM Conference on AI, Ethics, and Society</source> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>144</fpage>&#x2013;<lpage>150</lpage>. <comment>AIES &#x2019;18</comment>. <pub-id pub-id-type="doi">10.1145/3278721.3278776</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jara-Ettinger</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gweon</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Tenenbaum</surname>
<given-names>J.&#x20;B.</given-names>
</name>
<name>
<surname>Schulz</surname>
<given-names>L. E.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Children&#x27;s Understanding of the Costs and Rewards Underlying Rational Action</article-title>. <source>Cognition</source> <volume>140</volume>, <fpage>14</fpage>&#x2013;<lpage>23</lpage>. <pub-id pub-id-type="doi">10.1016/j.cognition.2015.03.006</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kahn</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Villaflor</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Abbeel</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Levine</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Self-supervised Deep Reinforcement Learning with Generalized Computation Graphs for Robot Navigation</article-title>,&#x201d; in <conf-name>Proceeding of the 2018 IEEE International Conference on Robotics and Automation (ICRA)</conf-name>, <conf-loc>Brisbane, QLD, Australia</conf-loc>, <conf-date>21-25 May 2018</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>5129</fpage>&#x2013;<lpage>5136</lpage>. <pub-id pub-id-type="doi">10.1109/ICRA.2018.8460655</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kalashnikov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Irpan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pastor</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ibarz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Herzog</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jang</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). &#x201c;<article-title>Scalable Deep Reinforcement Learning for Vision-Based Robotic Manipulation</article-title>,&#x201d; in <source>Proceedings of the 2nd Conference on Robot Learning</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Billard</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dragan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Peters</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Morimoto</surname>
<given-names>J.</given-names>
</name>
</person-group>. <publisher-loc>Cambridge, MA</publisher-loc>: <publisher-name>ML Research Press</publisher-name>, <volume>87</volume>, <fpage>651</fpage>&#x2013;<lpage>673</lpage>. <comment>(PMLR) Proceedings of Machine Learning Research</comment>. </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lasota</surname>
<given-names>P. A.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>J.&#x20;A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Analyzing the Effects of Human-Aware Motion Planning on Close-Proximity Human-Robot Collaboration</article-title>. <source>Hum. Factors</source> <volume>57</volume>, <fpage>21</fpage>&#x2013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.1177/0018720814565188</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Baillargeon</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Can a Self-Propelled Box Have a Goal?: Psychological Reasoning in 5-Month-Old Infants</article-title>. <source>Psychol. Sci.</source> <volume>16</volume>, <fpage>601</fpage>&#x2013;<lpage>608</lpage>. <pub-id pub-id-type="doi">10.1111/j.1467-9280.2005.01582.x</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marchesi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ghiglino</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ciardo</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Perez-Osorio</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Baykara</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Wykowska</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Do we Adopt the Intentional Stance toward Humanoid Robots?</article-title> <source>Front. Psychol.</source> <volume>10</volume>, <fpage>450</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2019.00450</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mnih</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Badia</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Mirza</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Graves</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Harley</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lillicrap</surname>
<given-names>T. P.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Asynchronous Methods for Deep Reinforcement Learning</article-title>. <source>Proc. 33rd Int. Conf. Int. Conf. Machine Learn.</source> <volume>48</volume>, <fpage>1928</fpage>&#x2013;<lpage>1937</lpage>. <comment>(JMLR.org), ICML&#x2019;16</comment>. </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mnih</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Kavukcuoglu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Silver</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Rusu</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Veness</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bellemare</surname>
<given-names>M. G.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Human-level Control through Deep Reinforcement Learning</article-title>. <source>Nature</source> <volume>518</volume>, <fpage>529</fpage>&#x2013;<lpage>533</lpage>. <pub-id pub-id-type="doi">10.1038/nature14236</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mott</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zoran</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Chrzanowski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wierstra</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Rezende</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Towards Interpretable Reinforcement Learning Using Attention Augmented Agents</source>. <publisher-loc>Red Hook, NY, USA</publisher-loc>: <publisher-name>Curran Associates Inc.</publisher-name> </citation>
</ref>
<ref id="B29">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Nikolaidis</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dragan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Srinivasa</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Viewpoint-based Legibility Optimization</article-title>,&#x201d; in <conf-name>The Eleventh ACM/IEEE International Conference on Human Robot Interaction</conf-name>, <conf-loc>Christchurch, New&#x20;Zealand</conf-loc>, <conf-date>7-10 March 2016</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>271</fpage>&#x2013;<lpage>278</lpage>. <comment>HRI &#x2019;16</comment>. <pub-id pub-id-type="doi">10.1109/HRI.2016.7451762</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Perez-Osorio</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wykowska</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Adopting the Intentional Stance toward Natural and Artificial Agents</article-title>. <source>Philosophical Psychol.</source> <volume>33</volume>, <fpage>369</fpage>&#x2013;<lpage>395</lpage>. <pub-id pub-id-type="doi">10.1080/09515089.2019.1688778</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Premack</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Premack</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Motor Competence as Integral to Attribution of Goal</article-title>. <source>Cognition</source> <volume>63</volume>, <fpage>235</fpage>&#x2013;<lpage>242</lpage>. <pub-id pub-id-type="doi">10.1016/s0010-0277(96)00790-1</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Premack</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Woodruff</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>1978</year>). <article-title>Does the Chimpanzee Have a Theory of Mind?</article-title> <source>Behav. Brain Sci.</source> <volume>1</volume>, <fpage>515</fpage>&#x2013;<lpage>526</lpage>. <pub-id pub-id-type="doi">10.1017/S0140525X00076512</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Raileanu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Denton</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Szlam</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fergus</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Modeling Others Using Oneself in Multi-Agent Reinforcement Learning</article-title>,&#x201d; in <source>Proceedings of the 35th International Conference on Machine Learning</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Dy</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Krause</surname>
<given-names>A.</given-names>
</name>
</person-group>, <volume>80</volume>, <fpage>4257</fpage>&#x2013;<lpage>4266</lpage>. <comment>(PMLR) Proceedings of Machine Learning Research</comment>. </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schellen</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Wykowska</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Intentional Mindset toward Robots&#x2013;Open Questions and Methodological Challenges</article-title>. <source>Front. Robotics AI</source> <volume>5</volume>, <fpage>139</fpage>. <pub-id pub-id-type="doi">10.3389/frobt.2018.00139</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scholl</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Tremoulet</surname>
<given-names>P. D.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Perceptual Causality and Animacy</article-title>. <source>Trends Cognitive Sciences</source> <volume>4</volume>, <fpage>299</fpage>&#x2013;<lpage>309</lpage>. <pub-id pub-id-type="doi">10.1016/s1364-6613(00)01506-0</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Silver</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Schrittwieser</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Simonyan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Antonoglou</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Guez</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Mastering the Game of Go without Human Knowledge</article-title>. <source>Nature</source> <volume>550</volume>, <fpage>354</fpage>&#x2013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1038/nature24270</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tamagnini</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Krause</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dasgupta</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bertini</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Interpreting Black-Box Classifiers Using Instance-Level Visual Explanations</article-title>,&#x201d; in <source>Proceedings of the 2nd Workshop on Human-In-The-Loop Data Analytics</source> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>). <comment>HILDA&#x2019;17</comment>. <pub-id pub-id-type="doi">10.1145/3077257.3077260</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wimmer</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Perner</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1983</year>). <article-title>Beliefs about Beliefs: Representation and Constraining Function of Wrong Beliefs in Young Children&#x2019;s Understanding of Deception</article-title>. <source>Cognition</source> <volume>13</volume>, <fpage>103</fpage>&#x2013;<lpage>128</lpage>. <pub-id pub-id-type="doi">10.1016/0010-0277(83)90004-5</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zettlemoyer</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Milch</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kaelbling</surname>
<given-names>L. P.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>Multi-agent Filtering with Infinitely Nested Beliefs</article-title>,&#x201d; in <source>Advances in Neural Information Processing Systems 21</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Koller</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Schuurmans</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bottou</surname>
<given-names>L.</given-names>
</name>
</person-group> (<publisher-loc>Cambridge, MA</publisher-loc>: <publisher-name>The MIT Press</publisher-name>), <fpage>1905</fpage>&#x2013;<lpage>1912</lpage>. </citation>
</ref>
</ref-list>
</back>
</article>