<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychiatry</journal-id>
<journal-title>Frontiers in Psychiatry</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychiatry</abbrev-journal-title>
<issn pub-type="epub">1664-0640</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyt.2023.1080668</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Psychiatry</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Simulating developmental diversity: Impact of neural stochasticity on atypical flexibility and hierarchy</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Soda</surname> <given-names>Takafumi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1864431/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Ahmadi</surname> <given-names>Ahmadreza</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1083627/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Tani</surname> <given-names>Jun</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/181/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Honda</surname> <given-names>Manabu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/74656/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Hanakawa</surname> <given-names>Takashi</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1303751/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Yamashita</surname> <given-names>Yuichi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/13049/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Information Medicine, National Institute of Neuroscience, National Center of Neurology and Psychiatry</institution>, <addr-line>Kodaira</addr-line>, <country>Japan</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of NCNP Brain Physiology and Pathology, Graduate School of Medical and Dental Sciences, Tokyo Medical and Dental University</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country></aff>
<aff id="aff3"><sup>3</sup><institution>Geobotica</institution>, <addr-line>Brisbane, QLD</addr-line>, <country>Australia</country></aff>
<aff id="aff4"><sup>4</sup><institution>Cognitive Neurorobotics Research Unit, Okinawa Institute of Science and Technology Graduate University</institution>, <addr-line>Okinawa</addr-line>, <country>Japan</country></aff>
<aff id="aff5"><sup>5</sup><institution>Integrated Neuroanatomy and Neuroimaging, Kyoto University Graduate School of Medicine</institution>, <addr-line>Kyoto</addr-line>, <country>Japan</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Chong Chen, Yamaguchi University Graduate School of Medicine, Japan</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Yasuhiro Mochizuki, Waseda University, Japan; Simone Battaglia, University of Turin, Italy</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Yuichi Yamashita <email>yamay&#x00040;ncnp.go.jp</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Psychopathology, a section of the journal Frontiers in Psychiatry</p></fn>
<fn fn-type="equal" id="fn002"><p>&#x02020;These authors have contributed equally to this work</p></fn></author-notes>
<pub-date pub-type="epub">
<day>15</day>
<month>03</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1080668</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>21</day>
<month>02</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2023 Soda, Ahmadi, Tani, Honda, Hanakawa and Yamashita.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Soda, Ahmadi, Tani, Honda, Hanakawa and Yamashita</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Investigating the pathological mechanisms of developmental disorders is a challenge because the symptoms are a result of complex and dynamic factors such as neural networks, cognitive behavior, environment, and developmental learning. Recently, computational methods have started to provide a unified framework for understanding developmental disorders, enabling us to describe the interactions among those multiple factors underlying symptoms. However, this approach is still limited because most studies to date have focused on cross-sectional task performance and lacked the perspectives of developmental learning. Here, we proposed a new research method for understanding the mechanisms of the acquisition and its failures in hierarchical Bayesian representations using a state-of-the-art computational model, referred to as in silico neurodevelopment framework for atypical representation learning.</p></sec>
<sec>
<title>Methods</title>
<p>Simple simulation experiments were conducted using the proposed framework to examine whether manipulating the neural stochasticity and noise levels in external environments during the learning process can lead to the altered acquisition of hierarchical Bayesian representation and reduced flexibility.</p></sec>
<sec>
<title>Results</title>
<p>Networks with normal neural stochasticity acquired hierarchical representations that reflected the underlying probabilistic structures in the environment, including higher-order representation, and exhibited good behavioral and cognitive flexibility. When the neural stochasticity was high during learning, top-down generation using higher-order representation became atypical, although the flexibility did not differ from that of the normal stochasticity settings. However, when the neural stochasticity was low in the learning process, the networks demonstrated reduced flexibility and altered hierarchical representation. Notably, this altered acquisition of higher-order representation and flexibility was ameliorated by increasing the level of noises in external stimuli.</p></sec>
<sec>
<title>Discussion</title>
<p>These results demonstrated that the proposed method assists in modeling developmental disorders by bridging between multiple factors, such as the inherent characteristics of neural dynamics, acquisitions of hierarchical representation, flexible behavior, and external environment.</p></sec></abstract>
<kwd-group>
<kwd>autism spectrum disorder (ASD)</kwd>
<kwd>computational psychiatry</kwd>
<kwd>predictive coding</kwd>
<kwd>flexibility</kwd>
<kwd>representation learning</kwd>
<kwd>neural noise</kwd>
<kwd>Bayesian brain</kwd>
<kwd>neural network</kwd>
</kwd-group>
<contract-num rid="cn001">JP20H00001</contract-num>
<contract-num rid="cn001">JP20H00625</contract-num>
<contract-num rid="cn002">JPMJCR16E2</contract-num>
<contract-num rid="cn002">JPMJCR21P4</contract-num>
<contract-num rid="cn002">JPMJMS2031</contract-num>
<contract-num rid="cn002">JPMJSP2120</contract-num>
<contract-num rid="cn003">Intramural Research Grant (3-9, 4-6) for Neurological and Psychiatric Disorders of NCNP</contract-num>
<contract-sponsor id="cn001">Japan Society for the Promotion of Science<named-content content-type="fundref-id">10.13039/501100001691</named-content></contract-sponsor>
<contract-sponsor id="cn002">Japan Science and Technology Agency<named-content content-type="fundref-id">10.13039/501100002241</named-content></contract-sponsor>
<contract-sponsor id="cn003">National Center of Neurology and Psychiatry<named-content content-type="fundref-id">10.13039/501100009438</named-content></contract-sponsor>
<counts>
<fig-count count="11"/>
<table-count count="0"/>
<equation-count count="4"/>
<ref-count count="92"/>
<page-count count="20"/>
<word-count count="12361"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1. Introduction</title>
<p>Developmental disorders, such as autism spectrum disorders (ASDs), represent various symptoms involving perceptual, behavioral, cognitive, and social dysfunctions, and elucidating their pathological mechanisms is a challenging task. A fundamental difficulty in understanding developmental disorders is the fact that their symptoms are the results of complex and dynamic processes involving multiple factors, including neural systems, cognitive behavior, environment, and development learning. At the levels of cognition and behavior, in addition to their symptoms, people with ASD were reported to show reduced performance in a wide range of cognitive and behavioral tasks (<xref ref-type="bibr" rid="B1">1</xref>&#x02013;<xref ref-type="bibr" rid="B4">4</xref>). At the level of the neural system, there are many findings related to the pathology of ASD, such as imbalance of neural excitations and inhibitions (<xref ref-type="bibr" rid="B5">5</xref>), altered variability in neural dynamics (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>), alterations in alpha oscillations (<xref ref-type="bibr" rid="B8">8</xref>), and abnormalities in subcortical areas including frontolimbic circuit, brainstem including superior colliculus, and autonomic nervous system (<xref ref-type="bibr" rid="B9">9</xref>&#x02013;<xref ref-type="bibr" rid="B15">15</xref>). At the external environment level, it has been known that cognitive-behavioral interventions, such as structuring the environment and reducing stimulus ambiguity, alleviate symptoms of ASD (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>). However, despite the accumulation of these findings, existing theories of atypical development remain fragmentary because the target symptoms and the levels of explanations for each of these findings are different (<xref ref-type="bibr" rid="B18">18</xref>).</p>
<p>To address this issue, computational study has been expected to play a key role (<xref ref-type="bibr" rid="B18">18</xref>&#x02013;<xref ref-type="bibr" rid="B21">21</xref>). This is because computational models can provide explanations bridging multiple levels in complex dynamical systems of the brain through quantitative simulations of the processes of neural, cognitive, and behavioral interactions that are difficult to observe and manipulate in actual biological systems.</p>
<p>One of the promising computational theories for developmental disorders is Bayesian brain hypothesis (<xref ref-type="bibr" rid="B22">22</xref>), also referred to as predictive coding theory (<xref ref-type="bibr" rid="B23">23</xref>, <xref ref-type="bibr" rid="B24">24</xref>), Bayesian cognitive modeling (<xref ref-type="bibr" rid="B25">25</xref>&#x02013;<xref ref-type="bibr" rid="B27">27</xref>), and free energy principle (<xref ref-type="bibr" rid="B28">28</xref>). In Bayesian brain hypothesis, the brain is considered to have the hierarchical Bayesian model that reflects the probabilistic structures in environment, and a hierarchical and probabilistic predictive process enables adaptive cognition and behavior. From the aspect of Bayesian brain hypothesis, it is proposed that symptoms of ASD are failures in Bayesian inference and abnormal acquisition of a hierarchical Bayesian model. Furthermore, the Bayesian brain hypothesis argued that these failures in inference and acquisition result from circular interactions between external stimuli and the internal brain dynamics in short- and long-term timescales (<xref ref-type="bibr" rid="B29">29</xref>&#x02013;<xref ref-type="bibr" rid="B32">32</xref>). However, most ASD studies using the Bayesian brain hypothesis have focused on cross-sectional (i.e., short-term) behavioral measures such as reasoning and decision making, and there have been few studies focusing on long-term effects of environmental interactions and the acquisition/developmental learning process. For example, some studies attempted to fit theoretically driven hierarchical Bayesian models to behavioral data, and group differences in estimated values of model parameters between healthy and atypical developmental groups were investigated (<xref ref-type="bibr" rid="B33">33</xref>&#x02013;<xref ref-type="bibr" rid="B35">35</xref>). In those studies, because a hierarchical Bayesian model has been constructed by researchers a priori, the process of acquiring a hierarchical Bayesian representation has not been examined.</p>
<p>Artificial neural networks, one of the computational modeling methods for brain function (<xref ref-type="bibr" rid="B36">36</xref>&#x02013;<xref ref-type="bibr" rid="B38">38</xref>), could help investigate the developmental learning process because neural network models acquired internal representation reflecting external environment through synapse updating (<xref ref-type="bibr" rid="B39">39</xref>&#x02013;<xref ref-type="bibr" rid="B43">43</xref>). In particular, a hierarchical recurrent neural network (RNN) model (<xref ref-type="bibr" rid="B44">44</xref>&#x02013;<xref ref-type="bibr" rid="B46">46</xref>) has been widely applied for modeling higher cognitive function in the brain because this model has high similarity to the hierarchical system of the brain and capacity to reproduce complex dynamics. In addition to typical development (<xref ref-type="bibr" rid="B47">47</xref>&#x02013;<xref ref-type="bibr" rid="B49">49</xref>), some studies investigated developmental disorders (<xref ref-type="bibr" rid="B50">50</xref>&#x02013;<xref ref-type="bibr" rid="B53">53</xref>) and schizophrenia (<xref ref-type="bibr" rid="B54">54</xref>) as failures in the hierarchical neural system using hierarchical RNNs, and examined behavioral phenotypes and its relations to representations acquired in neural networks. These studies, referred to as neurorobotics, are promising for psychiatric research because they investigated the acquisition process of higher-order representations based on realistic and multidimensional sensorimotor sequence with the interaction of physical environment using a humanoid robot driven by an RNN (<xref ref-type="bibr" rid="B50">50</xref>&#x02013;<xref ref-type="bibr" rid="B52">52</xref>, <xref ref-type="bibr" rid="B54">54</xref>).</p>
<p>Recently, a neural network model that combines the properties of a hierarchical Bayesian model and RNN, referred to as predictive-coding-inspired variational recurrent neural network (PV-RNN), has been proposed (<xref ref-type="bibr" rid="B55">55</xref>). PV-RNN can embed complex stochastic sensorimotor signals in neural dynamics as a hierarchical Bayesian model through the developmental learning process. Therefore, PV-RNN can be considered a powerful tool for investigating the Bayesian brain hypothesis. Indeed, PV-RNN was useful for modeling uncertainty estimations (<xref ref-type="bibr" rid="B55">55</xref>), goal-oriented behavior (<xref ref-type="bibr" rid="B56">56</xref>), sensory attenuation (<xref ref-type="bibr" rid="B57">57</xref>), and social interaction (<xref ref-type="bibr" rid="B58">58</xref>&#x02013;<xref ref-type="bibr" rid="B60">60</xref>).</p>
<p>In this study, we propose a novel and useful framework using PV-RNN for the understanding of typical and atypical developmental process, referred to as &#x0201C;<italic>in silico</italic> neurodevelopment framework for atypical representation learning&#x0201D; (<xref ref-type="fig" rid="F1">Figure 1</xref>). The key point of the proposed framework is the integration of computational theory of hierarchical Bayesian models and neural network models as dynamical systems from the perspective of developmental learning. Specifically, in this framework, the developmental learning process of an agent is simulated in which the neural system acquires a hierarchical Bayesian representation in a self-organizing manner thorough interacting with the environment (<xref ref-type="fig" rid="F1">Figure 1A</xref>). Furthermore, by manipulating the inherent characteristics of neural dynamics and environmental factors, this framework can reproduce the diversity in the developmental process, including typical and atypical development and possible interventions (<xref ref-type="fig" rid="F1">Figure 1B</xref>). Namely, in the simulations, the environment generated observable signals based on the unobserved hierarchical and probabilistic generative process reflecting cognitive behavioral tasks. Through the developmental learning in this environment, the agent is needed to acquire hierarchical Bayesian models reflecting the environment structures under various conditions. After this process, the performance of the agent in the cognitive behavioral tasks and the effects of manipulations are evaluated. In these ways, the relationships between the inherent characteristics of neural dynamics, acquisitions of hierarchical Bayesian representation, behavioral phenotypes, and the effects of environmental factors including possible interventions can be quantitatively analyzed.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The scheme of &#x0201C;&#x02018;<italic>in silico</italic> neurodevelopment framework for atypical representation learning&#x0201D; proposed in this study. <bold>(A)</bold> The agent modeled by the hierarchical Bayesian neural network model (PV-RNN) must learn the hierarchical and probabilistic structure hidden in the observations in the developmental learning process. <bold>(B)</bold> The inherent characteristics of neural dynamics and environmental factors are simulated as experimental manipulation to understand divergence in the developmental process. <italic>z</italic><sub><italic>t</italic></sub> and <italic>x</italic><sub><italic>t</italic></sub> represent latent and observed variables, respectively.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0001.tif"/>
</fig>
<p>As a proof of concept, we conducted a simulation experiment using the &#x02018;<italic>in silico</italic> neurodevelopment framework for atypical representation learning framework (<xref ref-type="fig" rid="F2">Figure 2</xref>). Specifically, we focused on the relationship between the acquisition of hierarchical and probabilistic representations reflecting environment structures and &#x0201C;reduced flexibility.&#x0201D; Indeed, reduced flexibility is one of the representative cognitive-behavioral phenotypes in ASD (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B61">61</xref>, <xref ref-type="bibr" rid="B62">62</xref>). Although many neural foundations related to reduced flexibility have been reported (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B62">62</xref>), the mechanism between these neural alterations and the reduced inflexibility has not been well known. Therefore, in the simulations, we examined: (1) whether manipulating inherent characteristics of neural dynamics and external environment induces reduced flexibility; (2) whether these manipulations lead to the normal/abnormal acquisition of hierarchical Bayesian representations; (3) how the abnormalities in hierarchical Bayesian representation are related to reduced flexibility.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p><bold>(A)</bold> The simulation experiments based on the proposed framework. In the experiments, as behavioral and cognitive task, flexibility task was used. To understand atypical developmental process, (a) the stochasticity in neural dynamics of lower layer, (b) noise level of observation signal was manipulated. <italic>dist</italic>. represents distribution. <bold>(B)</bold> An example of training sequences in the simulation experiments. These sequences repeated state transitions to LEFT or RIGHT (&#x0201C;target state&#x0201D;). The probability that the transition from HOME to LEFT is likely to occur is determined by &#x0201C;transition bias.&#x0201D; Transition bias was set to 0.76 (LEFT-biased sequences), and &#x0201C;signal noise&#x0201D; was set to low (stable environment condition) in the presented sequence. In the test phase, the transition bias switched at the middle point in the sequence to quantify flexibility.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0002.tif"/>
</fig></sec>
<sec sec-type="materials and methods" id="s2">
<title>2. Materials and methods</title>
<sec>
<title>2.1. Overview</title>
<p>The simulation experiments based on the proposed framework consisted of two components including an environment (left side in <xref ref-type="fig" rid="F2">Figure 2A</xref>) and an agent (right side in <xref ref-type="fig" rid="F2">Figure 2A</xref>). The environment generated observable signals following the unobserved hierarchical and probabilistic generative process, which is designed to measure flexibility. The agent was required to embed the covert hierarchical structures of environment into neural dynamics using only the observed signals through the developmental learning process. After the learning process, the ability of flexibility was tested in this environment. In the experiments, stochasticity in the neural networks (i.e., agent side) and noise level of the observation signals (i.e., environment side) in the learning process were manipulated as inherent characteristics and external environmental factor, respectively. Then, we investigated whether the changes in these factors impacted on the acquisition of hierarchical representations and flexibility.</p>
</sec>
<sec>
<title>2.2. Environmental stimuli and task setting</title>
<p>The observable signals were two-dimensional trajectories of objects that mimic reaching movements (<xref ref-type="fig" rid="F2">Figure 2B</xref>) and were generated by three unobservable, hierarchical, and stochastic variables: &#x0201C;transition bias,&#x0201D; &#x0201C;target state,&#x0201D; and &#x0201C;signal noise&#x0201D; (left side of <xref ref-type="fig" rid="F2">Figure 2A</xref>). Specifically, these sequences repeated the state transitions from HOME to LEFT or RIGHT (target) and return to HOME from LEFT or RIGHT. The transition bias represented the probability of the transition from HOME to LEFT, as a highest-order context in the environment. The target states (LEFT or RIGHT) were sampled from Bernoulli distributions parameterized by transition bias. The observable goal positions in one reaching movement were sampled from Gaussian distributions whose mean parameter corresponded to a central coordination of each target state and variance parameter corresponded to signal noise.</p>
<p>For the training, two sets of nine sequences (18 sequences in total) with 512 steps were generated with nine different transition probabilities (0.98, 0.87, 0.76, 0.65, 0.54, 0.43, 0.32, 0.21, and 0.10). Asymmetry of transition bias was used to improve the divergence of variances in the sequences. The agent learned to reproduce these sequences with diverse transition probabilities through the developmental learning process. In the test phase, the &#x0201C;flexibility&#x0201D; of the agent was tested using unknown test sequences whose transition bias was switched at the middle of the sequences. Namely, for the test sequences, two sequences with different transition biases (256 steps) were connected in which the transition bias in the second half of the test sequence was randomly sampled from the values in opposite directions to the transition bias of the first half.</p>
<p>The flexibility of the agent was evaluated in terms of the capability to perceive and follow change in the observations and unobservable context (i.e., transition bias) in these unknown test sequences. This quantification was inspired by flexibility tasks, such as the Wisconsin card sorting task (<xref ref-type="bibr" rid="B61">61</xref>), in which participants are required to detect changes of a rule or context throughout the task. The flexibility of the agent was evaluated by using two types of performance measures: 1) how accurately the network predicted observations (behavioral flexibility) and 2) how accurately the network inferred unobservable transition bias of the current sequence (cognitive flexibility). The details of the signal generation and quantification methods are shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Methods 1.1</xref>, <xref ref-type="supplementary-material" rid="SM1">1.2</xref>, respectively.</p>
<p>The task settings presented here were designed to integrate motor control tasks and Wisconsin card sorting tasks. People with ASD have been reported to have alterations in sensorimotor processing (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B63">63</xref>), including the altered performance in the reaching movement task (<xref ref-type="bibr" rid="B3">3</xref>, <xref ref-type="bibr" rid="B63">63</xref>). Based on these findings, observation signals in the current task were synthetically created to mimic reaching behavior, including seeing an object, predicting the movements of the object, and reaching the object. The observation signals in our task setting correspond to the moves of the target object, and the outputs of the neural network model correspond to visual and proprioceptive signals. In addition, the current task also includes a component of cognitive function measured by the Wisconsin card sorting tasks, i.e., flexibility. Indeed, individuals with ASD have been also reported to have reduced performance in the flexibility task (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B61">61</xref>, <xref ref-type="bibr" rid="B62">62</xref>). This component was implemented in the form that rules of object transitions (i.e., the transition bias) were switched without any notifications, and the agent needs to discover the switch.</p>
</sec>
<sec>
<title>2.3. Neurocognitive model</title>
<sec>
<title>2.3.1. Architecture of PV-RNN</title>
<p>The task for the agent was to acquire an internal representation that reflects the abovementioned hidden environment structure and flexibly adapt to unknown sequences. According to the Bayesian brain hypothesis, this problem for the agent can be described as follows. The agent constructs the statistical model <italic>p</italic>(<italic>x</italic><sub>&#x02264; <italic>T</italic></sub>) &#x0003D; <italic>p</italic>(<italic>x</italic><sub>1</sub>, <italic>x</italic><sub>2</sub>, &#x02026;, <italic>x</italic><sub><italic>T</italic></sub>) approximating the true data distribution of the environment in which <italic>x</italic> and <italic>T</italic> represent the observed signals and length of sequences, respectively. The model of agent, PV-RNN (<xref ref-type="bibr" rid="B55">55</xref>), factorizes this distribution by introducing two latent variables, neural dynamic units <italic>d</italic><sub><italic>t</italic></sub> and probabilistic latent state units <italic>z</italic><sub><italic>t</italic></sub> (right side in <xref ref-type="fig" rid="F2">Figure 2A</xref>).</p>
<disp-formula id="E1"><mml:math id="M1"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>&#x0222B;</mml:mo><mml:mo>&#x000B7;</mml:mo><mml:mo>&#x000B7;</mml:mo><mml:mo>&#x000B7;</mml:mo><mml:mo>&#x0222B;</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>=</mml:mo><mml:mo>&#x0222B;</mml:mo><mml:mo>&#x000B7;</mml:mo><mml:mo>&#x000B7;</mml:mo><mml:mo>&#x000B7;</mml:mo><mml:mo>&#x0222B;</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x0220F;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>This equation indicates that the PV-RNN constructs <italic>p</italic>(<italic>x</italic><sub>&#x02264; <italic>T</italic></sub>) using three components: prior distribution <italic>p</italic>(<italic>z</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>), recurrent distribution <italic>p</italic>(<italic>d</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>, <italic>z</italic><sub><italic>t</italic></sub>), and generative distribution <italic>p</italic>(<italic>x</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic></sub>). In addition, to estimate the latent states based on observations, approximate posterior (inference) distribution <italic>q</italic>(<italic>z</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>, <italic>a</italic><sub><italic>t</italic></sub>) was introduced. It should be noted that adaptive variables <italic>a</italic><sub><italic>t</italic></sub> are learnable parameters and save the error information about each training sequence. For the approximate posterior, the PV-RNN (<xref ref-type="bibr" rid="B55">55</xref>) uses <italic>q</italic>(<italic>z</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>, <italic>a</italic><sub><italic>t</italic></sub>), instead of <italic>q</italic>(<italic>z</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>, <italic>x</italic><sub><italic>t</italic></sub>) used in the variational recurrent neural network model (<xref ref-type="bibr" rid="B64">64</xref>). The use of <italic>q</italic>(<italic>z</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>, <italic>a</italic><sub><italic>t</italic></sub>) is inspired by the predictive coding theory (<xref ref-type="bibr" rid="B24">24</xref>), namely the posterior of latent states is inferred not directly based on external inputs <italic>x</italic><sub><italic>t</italic></sub>, but based on prediction error.</p>
<p>These probabilistic distributions of mapping from the inputs to outputs were implemented in neural network models and refined through the learning (update of synaptic weights). For example, prior distribution <italic>p</italic>(<italic>z</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>) assumed to follow the Gaussian distribution was represented using the mean and variance units (top-right in <xref ref-type="fig" rid="F3">Figure 3</xref>). The neural network model corresponding to prior distribution inferred the mean and variance of latent units <italic>z</italic><sub><italic>t</italic></sub> using neural dynamics of <italic>d</italic><sub><italic>t</italic></sub>.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>The graphical representation of PV-RNN architecture <bold>(left)</bold>. PV-RNN constructs hierarchical generation process in which the higher layer has larger time constant (slow neural dynamics) while the lower layer has smaller time constant (fast neural dynamics), as shown in <bold>(bottom-right)</bold>. In <bold>(top-right)</bold>, the inference process of <italic>z</italic><sub><italic>t</italic></sub> is illustrated. The right superscripts of symbols (i.e., <italic>p</italic> and <italic>q</italic>) are used to distinguish prior and approximate posterior distributions. The inference of posterior latent units <inline-formula><mml:math id="M2"><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> is performed by propagating the errors in the reverse direction of arrows and updating the adaptive variables <italic>a</italic><sub><italic>t</italic></sub>. This figure is simplified to improve readability, and detailed and accurate information of PV-RNN is shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Methods 1.3</xref>, <xref ref-type="supplementary-material" rid="SM1">1.4</xref>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0003.tif"/>
</fig>
<p>The neural network corresponding to recurrent distribution <italic>p</italic>(<italic>d</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>, <italic>z</italic><sub><italic>t</italic></sub>) has a key role in the top-down and bottom-up flows of information in a hierarchical network (bottom-right in <xref ref-type="fig" rid="F3">Figure 3</xref>). It is well known that the brain has hierarchical properties such as differed intrinsic neural timescales and distinctive anatomical connections, and the hierarchy may contribute to the complex cognitive functions (<xref ref-type="bibr" rid="B65">65</xref>, <xref ref-type="bibr" rid="B66">66</xref>). The hierarchical nature of the PV-RNN was implemented to imitate these biological findings by providing different time constants for each layer and restricting the connections between the higher and lower layer units [multiple timescale RNN: MTRNN (<xref ref-type="bibr" rid="B46">46</xref>, <xref ref-type="bibr" rid="B54">54</xref>)]. In addition, prior distribution <italic>p</italic>(<italic>z</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>) and posterior distribution <italic>q</italic>(<italic>z</italic><sub><italic>t</italic></sub>|<italic>d</italic><sub><italic>t</italic>&#x02212;1</sub>, <italic>a</italic><sub><italic>t</italic></sub>) have similar restrictions of the connections between the layers. For example, <italic>z</italic><sub><italic>t</italic></sub> units in the higher layer are inferred only using <italic>d</italic><sub><italic>t</italic></sub> units in the higher layer. Considering this hierarchy, the data distribution <italic>p</italic>(<italic>x</italic><sub>&#x02264; <italic>T</italic></sub>) constructed by PV-RNN is factorized as follows (left side in <xref ref-type="fig" rid="F3">Figure 3</xref>):</p>
<disp-formula id="E2"><mml:math id="M8"><mml:mtable columnalign='left'><mml:mtr><mml:mtd><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo>=</mml:mo><mml:mstyle displaystyle='true'><mml:mrow><mml:mo>&#x0222B;</mml:mo><mml:mrow><mml:mn>...</mml:mn><mml:mstyle displaystyle='true'><mml:mrow><mml:mo>&#x0222B;</mml:mo><mml:mrow><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>&#x0007C;</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mn>1</mml:mn><mml:mn>1</mml:mn></mml:msubsup><mml:mo stretchy='false'>)</mml:mo><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>L</mml:mi></mml:munderover><mml:mi>p</mml:mi></mml:mstyle><mml:mo stretchy='false'>(</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mn>1</mml:mn><mml:mi>l</mml:mi></mml:msubsup><mml:mo stretchy='false'>)</mml:mo><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>L</mml:mi></mml:munderover><mml:mrow><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mn>1</mml:mn><mml:mi>l</mml:mi></mml:msubsup><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mrow></mml:mstyle></mml:mrow></mml:mrow></mml:mstyle></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mi>T</mml:mi></mml:munderover><mml:mo>&#x0007B;</mml:mo></mml:mstyle><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>&#x0007C;</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msubsup><mml:mo stretchy='false'>)</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mo>&#x0007B;</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msubsup><mml:mo>&#x0007C;</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mn>1</mml:mn></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msubsup><mml:mo stretchy='false'>)</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mi>L</mml:mi></mml:msubsup><mml:mo>&#x0007C;</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>L</mml:mi></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>t</mml:mi><mml:mi>L</mml:mi></mml:msubsup><mml:mo stretchy='false'>)</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover><mml:mrow><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mi>l</mml:mi></mml:msubsup><mml:mo>&#x0007C;</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>l</mml:mi></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>t</mml:mi><mml:mi>l</mml:mi></mml:msubsup><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mstyle><mml:mo>&#x0007D;</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;&#x000A0;</mml:mtext><mml:mo>&#x0007B;</mml:mo><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x0220F;</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>L</mml:mi></mml:munderover><mml:mi>p</mml:mi></mml:mstyle><mml:mo stretchy='false'>(</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>t</mml:mi><mml:mi>l</mml:mi></mml:msubsup><mml:mo>&#x0007C;</mml:mo><mml:msubsup><mml:mi>d</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>l</mml:mi></mml:msubsup><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x0007D;</mml:mo><mml:mo>&#x0007D;</mml:mo><mml:mi>d</mml:mi><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub><mml:mi>d</mml:mi><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>In this study, the number of layers was set to three. The number of <italic>d</italic><sub><italic>t</italic></sub> neural units and <italic>z</italic><sub><italic>t</italic></sub> units were set to (20, 10, 10) and (2, 2, 2), respectively, with the time constant at (2, 8, 32). Because <italic>d</italic><sub><italic>t</italic></sub> was used as deterministic variables, the integral of <italic>d</italic><sub><italic>t</italic></sub> was omitted in the following. The detailed architecture and generative processes are provided in the <xref ref-type="supplementary-material" rid="SM1">Supplementary Method 1.3</xref>.</p></sec>
<sec>
<title>2.3.2. Loss function in the learning and test phase</title>
<p>Updates of synaptic weights in the learning phase and inference of latent states in the test phase follow the unified principle of minimizing the loss function. In the learning phase, losses were minimized by iteratively updating the synaptic weights and adaptive variables <italic>a</italic><sub><italic>t</italic></sub>. As a result of learning, PV-RNN was expected to acquire efficient mapping from observed sensorimotor signals to hierarchical Bayesian representations. On the other hand, during the test phase, inference of latent states in posterior distribution was performed through modification of the adaptive variables <italic>a</italic><sub><italic>t</italic></sub> based on minimizing of the losses with fixing synaptic weights, called &#x0201C;error regression&#x0201D; (<xref ref-type="bibr" rid="B49">49</xref>).</p>
<p>In mathematical terms, the model parameters, such as synaptic weights and adaptive variables, were adjusted to maximize the similarity between the statistical model <italic>p</italic>(<italic>x</italic><sub>&#x02264; <italic>T</italic></sub>) and the true data distribution of the environment. This is achieved by minimizing the negative of marginal log likelihood &#x02212;log<italic>p</italic>(<italic>x</italic>). Using variational inference (<xref ref-type="bibr" rid="B67">67</xref>),</p>
<disp-formula id="E3"><mml:math id="M4"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mo>-</mml:mo><mml:mo class="qopname">log</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02264;</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mstyle displaystyle="true"><mml:munder accentunder="false"><mml:mrow><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mo>&#x0FE38;</mml:mo></mml:munder></mml:mstyle></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Reconstruction</mml:mtext><mml:mo>&#x000A0;</mml:mo><mml:mtext>errors</mml:mtext></mml:mrow></mml:munder></mml:mstyle></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x02003;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mstyle displaystyle="true"><mml:munder accentunder="false"><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>q</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mo>&#x0FE38;</mml:mo></mml:munder></mml:mstyle></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Regularization&#x000A0;errors</mml:mtext></mml:mrow></mml:munder></mml:mstyle><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>The right-hand side in this inequality is called variational free energy, and its negative is equivalent to the evidence lower bound (<xref ref-type="bibr" rid="B55">55</xref>, <xref ref-type="bibr" rid="B68">68</xref>, <xref ref-type="bibr" rid="B69">69</xref>). The first term, also called the reconstruction errors, is the negative log likelihood and reflects the differences between the data observations and predictions generated by the model. The second term, in which <italic>D</italic><sub><italic>KL</italic></sub> represents Kullback-Leibuler divergence, reflects the similarity between the prior distribution and posterior distribution and was proposed to have a regularization role (<xref ref-type="bibr" rid="B55">55</xref>). In PV-RNN, the weighting factor <italic>w</italic><sup><italic>l</italic></sup> for each hierarchy <italic>l</italic> was introduced to control the similarity between the prior distribution and posterior distribution as follows:</p>
<disp-formula id="E4"><mml:math id="M5"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mi>L</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mi>q</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x02003;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msup><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>q</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>The weighting factor <italic>w</italic><sup><italic>l</italic></sup>, referred to as &#x0201C;meta-prior,&#x0201D; was considered to control the stochasticity of neural dynamics (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures S1</xref>, <xref ref-type="supplementary-material" rid="SM1">S2</xref>) through the developmental learning process (<xref ref-type="bibr" rid="B55">55</xref>). In the developmental learning process, the neural dynamics is stochastic when the meta-prior is weak, while that is deterministic when the meta-prior is strong. In the test phase, the meta-prior plays a role in controlling the impact of the prior on the posterior; That is, a high meta-prior in the test phase leads to a strong effect of the prior on the posterior, while a low meta-prior weakens the effect. It is noted that the effects of meta-prior differ in the learning and test phase because synaptic weights are fixed in the test phase, and only inferred latent units in the posterior were updated. All parameters of PV-RNN (the synaptic weight and adaptive variables <italic>a</italic><sub><italic>t</italic></sub>) were optimized using backpropagation through time by minimizing the loss function. As an optimizer, Adam (<xref ref-type="bibr" rid="B70">70</xref>) was used. The detail of loss deviation is provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary Method 1.4</xref>.</p>
</sec></sec>
<sec>
<title>2.4. Simulations of diversity in neural development</title>
<p>We manipulated several parameters in the simulation of the learning phase to investigate the relationships between inherent characteristics of neural dynamics, hierarchical Bayesian representation, behavioral and cognitive flexibility, and external environmental factors. First, as the inherent characteristics of neural dynamics, the stochasticity of the network in the developmental learning process was manipulated; This was implemented by changing the value of the meta-prior that controls the balance of two terms (reconstruction errors and regularization errors) in the loss function. This manipulation was attempted based on the previous theoretical studies suggesting that the stochasticity of the network (high or low neural noise) contributes to autistic symptoms (<xref ref-type="bibr" rid="B71">71</xref>, <xref ref-type="bibr" rid="B72">72</xref>). In fact, some non-invasive studies have reported that participants with ASD showed altered neural noise (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B73">73</xref>, <xref ref-type="bibr" rid="B74">74</xref>). Based on these findings and hypotheses, we expected that autistic-like phenomena, i.e., reduced flexibility, would be observed under both weak (high stochasticity) and strong (low stochasticity) meta-prior conditions, and the reduced flexibility would be induced by an abnormality in acquired hierarchical Bayesian representation. As a specific simulation setting, the meta-prior in the lower layer was set to 0.1, 1.0, and 10 as the weak, normal, and strong meta-prior conditions, respectively; the meta-prior in other layers was set to 1.0.</p>
<p>The second manipulated parameter was the level of noises included in the environmental stimulus during the developmental learning process; This is motivated by the well-known observations that reducing ambiguity in stimulations and the structuring environment promotes learning and improve behavioral and cognitive functions in children with ASD (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>). The large noise condition and small noise condition were tested by changing the levels of signal noise corresponding to the changes in the ambiguity of the states (LEFT, RIGHT, and HOME). Based on the findings related to interventions for people with ASD (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>), we hypothesized that less flexibility and alterations in the hierarchical Bayesian representations would be observed under large noise condition (noisy environment) than small noise condition (stable environment).</p>
</sec>
<sec>
<title>2.5. Implementation and statistical analysis</title>
<p>Python and PyTorch (<xref ref-type="bibr" rid="B75">75</xref>) were used in the experimental simulation to generate training and test sequences and implement the neural network model. Both R (<xref ref-type="bibr" rid="B76">76</xref>) and Python were used for visualization and statistical analysis. The 20 networks were trained in each condition. In each analysis, values outside of 1.5 times the quantile range in each condition were removed as outliers. Therefore, the number of conditions was inconsistent in each analysis. To compare between meta-prior conditions, analyses of variance (ANOVA) between-subject were used (three levels, normal, strong, and weak meta-prior). The interaction effects of meta-prior and signal noise were analyzed using a three (meta-prior conditions) &#x000D7; two (stable and noisy environments) ANOVA. In <italic>post-hoc</italic> multiple comparison, Shaffer&#x00027;s modified sequentially rejective Bonferroni procedure was used.</p></sec></sec>
<sec sec-type="results" id="s3">
<title>3. Results</title>
<sec>
<title>3.1. Behavioral and cognitive flexibility in hierarchical Bayesian RNN</title>
<p>The representative example of generation of behavioral sequence and neural activities with the value of meta-prior referred to as the &#x0201C;normal meta-prior&#x0201D; condition was presented in <xref ref-type="fig" rid="F4">Figure 4</xref>. The output sequences of RNN and test sequences were seemingly concordant not only at the observation signal (xy-coordinate) level, but also at the state transition level (i.e., HOME/LEFT/RIGHT). This indicated that the network successfully predicted unknown observations and adapted to the changes in the observation signals based on hierarchical internal representations acquired through the developmental learning.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>An example of flexibility tasks under normal meta-prior condition. In the top of figure, RNN generations and test sequence are plotted on two-dimensional plane <bold>(left)</bold> and along time axis <bold>(right)</bold>. The unit0 and unit1, unit2 and unit3, and unit4 and unit5 reflect the lower layer, middle layer, and higher layer, respectively. The latent units coding mean parameters of Gaussian distributions are plotted in the figure rather than <italic>z</italic><sub><italic>t</italic></sub> itself. In the figure, only the 128 steps before and after switching of the transition bias are plotted.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0004.tif"/>
</fig>
<p>Qualitative inspection indicated that the hierarchical representation of each latent unit played a different functional role. For example, the activities of latent units of the lower layer (unit0 and unit1) were synchronized with the y-axis in the behavioral trajectories. In the middle layer, unit2 was active when target states moved to RIGHT and unit3 was active in the opposite direction. The higher layer units, such as unit4 and unit5, appeared to be related to the probability of transitions to LEFT and RIGHT. Specifically, unit5 was active in generating LEFT-biased sequence (first half of <xref ref-type="fig" rid="F4">Figure 4</xref>), and unit4 was active in generating RIGHT-biased sequence (last half of <xref ref-type="fig" rid="F4">Figure 4</xref>). The distinct role of the middle layer and higher layer can be clearly observed in the last half of <xref ref-type="fig" rid="F4">Figure 4</xref>. In this period, unit4 was continuously active because of RIGHT-biased generation even when LEFT-transition occurred (probabilistic effect on outputs). In contrast, unit3 was only active when LEFT-transition occurred (direct effects on target states). These observations indicated that the PV-RNN with normal meta-prior condition acquired hierarchical representation, which reflected the structures of environment and were flexible enough to adapt not only to the observable stimulus changes but also to the unobservable context switching.</p>
<p>Under the strong meta-prior condition, the network failed to accurately predict the observations in the test phase. For example, movement timing of generated sequence did not match to a test sequence (arrowheads in <xref ref-type="fig" rid="F5">Figure 5A</xref>). In addition, the network under strong meta-prior condition was unable to respond to the changes in context (probability of transitions) in the target states and repeated previous output patterns (perseveration errors; arrows in <xref ref-type="fig" rid="F5">Figure 5A</xref>). Indeed, activities of higher layer units (unit4 and unit5) did not change at the point when transition bias switched in the test sequence. On the other hand, these failures in behavior including perseverative errors were not observed under the weak meta-prior condition (<xref ref-type="fig" rid="F5">Figure 5B</xref>). However, neural activities seemed to be relatively noisy and unstable, and the functional roles of each layer of latent units were not clear compared to the normal meta-prior.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p><bold>(A)</bold> Example of flexibility tasks under strong meta-prior condition. The arrows and arrowheads represent perseveration errors and timing mismatches, respectively. The latent units coding mean parameters of Gaussian distributions were plotted in figure rather than <italic>z</italic><sub><italic>t</italic></sub> itself. <bold>(B)</bold> Example of flexibility tasks under weak meta-prior condition. The range of color plot adjusted to activities of higher latent units although the max and min values in lower- and middle-units surpassed the ranges of those plotted. In the figures <bold>(A, B)</bold>, only the 128 steps before and after switching of the transition bias are plotted.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0005.tif"/>
</fig>
<p>To confirm this qualitative evaluation, two types of measure were introduced: behavioral and cognitive flexibility. Behavioral flexibility was the ability to accurately adapt to observable signal changes and quantified using the percentage of the agreement between the states of observations and the states of predictions by the networks. On the other hand, cognitive flexibility was evaluated using the correlations between true values of transition bias in the test sequences and the activities of latent units in the higher layer of the networks. Therefore, cognitive flexibility reflects the efficacy of representation learning in terms of passive inference for higher-order context and the &#x0201C;insight&#x0201D; for changes of higher-order hidden context (transition bias) in the environment.</p>
<p>Consistent with the qualitative evaluations, the behavioral flexibility was declined under strong meta-prior condition [<italic>F</italic><sub>(2, 51)</sub> = 152.5871; <italic>p</italic> &#x0003C; 0.0001 using ANOVA, and <italic>t</italic><sub>(51)</sub> = 15.0647; <italic>p</italic> &#x0003C; 0.0001 at normal &#x0003E; strong, and <italic>t</italic><sub>(51)</sub> = 14.9831; <italic>p</italic> &#x0003C; 0.0001 at weak &#x0003E; strong in <italic>post-hoc</italic> tests; <xref ref-type="fig" rid="F6">Figure 6A</xref>]. Furthermore, cognitive flexibility declined more in strong meta-prior condition than weak and normal prior conditions [<italic>F</italic><sub>(2, 56)</sub> = 15.6619; <italic>p</italic> &#x0003C; 0.0001 using ANOVA, and <italic>t</italic><sub>(56)</sub> = 4.6497; <italic>p</italic> &#x0003C; 0.0001 at normal &#x0003E; strong, <italic>t</italic><sub>(56)</sub> = 5.0041; <italic>p</italic> &#x0003C; 0.0001 at weak &#x0003E; strong in <italic>post-hoc</italic> tests; <xref ref-type="fig" rid="F6">Figure 6B</xref>].</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>The quantitative evaluation about behavioral flexibility <bold>(A)</bold> and cognitive flexibility <bold>(B)</bold>. MP represents meta-prior.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0006.tif"/>
</fig>
</sec>
<sec>
<title>3.2. Hierarchical and probabilistic representation for active generation</title>
<p>To further examine the functional role of the latent units in each layer of PV-RNN, we adapt deep learning technique called &#x0201C;latent space traversal (LST).&#x0201D; In the LST, the changes in the network predictions were investigated when the activity of single target latent unit was intentionally manipulated (<xref ref-type="bibr" rid="B77">77</xref>, <xref ref-type="bibr" rid="B78">78</xref>). This makes it possible to functionally, causally, and operationally examine whether neural units code output information and to reject the possibility that the activity of higher layer units is passively responding to bottom-up signals. Therefore, the LST method focused on the decoding (active generation) ability while cognitive flexibility focused on the encoding ability (passive inference), although both were used for evaluation of representation learning.</p>
<p>The LST analysis was conducted as follows. One sequence of 1,024 time steps was generated by setting the activity of the target latent unit at a particular fixing value. This process was repeated by changing the fixing values ranging from &#x02013;1.0 to 1.0. Properties of the generated sequences were evaluated in terms of the ratios of time steps staying with HOME and the number of LEFT transitions, and so on. Examples of generated sequences using LST were shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure S3</xref>.</p>
<p>The LST under normal meta-prior condition demonstrated that the lower the activities in unit0 and unit1, the more time steps staying with HOME state (<xref ref-type="fig" rid="F7">Figure 7A</xref>), suggesting that the lower layer units (unit0 and unit1) coded the y-axis movement. Similarly, the manipulations in the activities of middle layer units (unit2 and unit3) and higher layer units (unit4) lead the changes in the transition to the LEFT state, suggesting that these units coded LEFT/RIGHT transitions (<xref ref-type="fig" rid="F7">Figure 7B</xref>). Note that the slope of the changes in the number of LEFT transitions induced by the higher layer unit manipulations is shallower than those induced by the middle layer unit manipulations. This observation suggests that the activity of the higher layer unit likely codes probabilistic information (i.e., transition bias), while the activities of middle layer units directly were associated with target state (i.e., LEFT or RIGHT) with an all-or-nothing manner. In addition, LST analysis applied to the variance units demonstrated that the variances of generated sequences increased as the activities of variance unit in the lower layers increased (<xref ref-type="fig" rid="F7">Figure 7C</xref>). This unit seemed to code the amount of noise in the predicted signals (i.e., signal noise). These results suggested that the PV-RNN under the normal meta-prior condition could acquire hidden hierarchical and probabilistic structures of the environment in terms of not only passive inference but also active generation.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>The results of latent space traversal under normal meta-prior condition. The properties of generated sequences (y-axis) changed depending on fixed activation values (x-axis) of one particular unit. Changes in the number of steps staying with HOME states <bold>(A)</bold> and the numbers of transition to LEFT states <bold>(B)</bold> were plotted. <bold>(C)</bold> Changes in the variances of generations were plotted when activities of units inferring variances of latent units were fixed. Irrelevant lines are plotted in a pale color to improve readability.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0007.tif"/>
</fig>
<p>The LST analysis with different meta-prior setting conditions demonstrated altered hierarchical representations. For example, under the strong meta-prior condition, lower layer units and higher layer units did not have distinct roles, and several levels of functions are intermingled in the middle layer. Namely, the activities of unit0 and unit1 (lower layer) did not have the effects on steps staying in HOME state and the number of LEFT transitions (<xref ref-type="fig" rid="F8">Figures 8A</xref>, <xref ref-type="fig" rid="F8">B</xref>). The activities of unit4 (higher layer) did not have clear effects on the sequence generations (i.e., association of activities and properties of generated sequences have several outliers) (<xref ref-type="fig" rid="F8">Figure 8B</xref>). On the other hand, changes in unit2 and unit3 (middle layer) had effects on the time steps staying HOME state (<xref ref-type="fig" rid="F8">Figure 8A</xref>) and in the LEFT transitions (<xref ref-type="fig" rid="F8">Figure 8B</xref>). These observations suggested that under the strong meta-prior condition, the representations in each layer were not good, which was consistent with the observations of poor behavioral and cognitive flexibilities.</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>The results of latent space traversal under strong <bold>(A, B)</bold> and weak meta-prior condition <bold>(C, D)</bold>. Irrelevant lines are plotted in a pale color to improve readability.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0008.tif"/>
</fig>
<p>On the other hand, under the weak meta-prior condition, unit1 and unit0 had very clear effects on the y-axis movements (<xref ref-type="fig" rid="F8">Figure 8C</xref>) and LEFT transition (<xref ref-type="fig" rid="F8">Figure 8D</xref>), respectively. However, unit2, unit3 (middle layer), unit4, and unit5 (higher layer) had no effects on generated sequences. Therefore, under weak meta-prior condition, it seemed that latent representations in the lower layer were effective, but those in higher layer were ineffective.</p>
<p>To quantitatively confirm these findings, we defined a measure referred to as &#x0201C;generative hierarchy,&#x0201D; which represents the total amount of the causal effect of a network in terms of active generation. Namely, the latent units of the network have stronger causal effects for output sequences when the generative hierarchy of a network is high. The detailed procedure is as follows: first, in the LST analysis, correlations between the manipulated values of a particular latent unit (horizontal axis in <xref ref-type="fig" rid="F7">Figures 7</xref>, <xref ref-type="fig" rid="F8">8</xref>) and behavioral properties of generated sequences including the number of transitions to each state, the number of stay steps in each state, and the variance in each state (i.e., the vertical axis in <xref ref-type="fig" rid="F7">Figures 7</xref>, <xref ref-type="fig" rid="F8">8</xref>) were calculated. The maximum value of the correlations over all properties was calculated based on the assumption that this value represents the efficacy of each latent unit on behavioral generation. Finally, the average of the efficacy of the latent units in each layer was used as the generative hierarchy of each layer in one network.</p>
<p><xref ref-type="fig" rid="F9">Figure 9</xref> depicts the generative hierarchy under each meta-prior condition. As expected, to sum up all the layers, generative hierarchy of the latent representations in normal meta-prior conditions seemed to be better than in other conditions. On the other hand, under weak meta-prior condition, the generative hierarchy in the middle and higher layer was poor, although that in the lower layer was comparable to normal meta-prior condition. The generative hierarchy under strong meta-prior condition was reduced in all layers compared to normal meta-prior condition except for noise representations of variance units.</p>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p><bold>(A&#x02013;D)</bold> The generative hierarchy under each condition. MP represents meta-prior.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0009.tif"/>
</fig>
<p>These observations were confirmed by the following statistical analyses. The generative hierarchy was best under weak meta-prior condition in the lower layer [<xref ref-type="fig" rid="F9">Figure 9A</xref>; <italic>F</italic><sub>(2, 56)</sub> = 361.8663; <italic>p</italic> &#x0003C; 0.0001, <italic>t</italic><sub>(56)</sub> = 3.4104; <italic>p</italic> = 0.0012 at weak &#x0003E; normal, <italic>t</italic><sub>(56)</sub> = 24.8918; <italic>p</italic> &#x0003C; 0.0001 at weak &#x0003E; strong, and <italic>t</italic><sub>(56)</sub> = 21.1603; <italic>p</italic> &#x0003C; 0.0001 at normal &#x0003E; strong]. However, in the middle layer (<xref ref-type="fig" rid="F9">Figure 9B</xref>), generative hierarchy was the best in normal meta-prior condition than strong and weak meta-prior conditions [<italic>F</italic><sub>(2, 53)</sub> = 33.5184; <italic>p</italic> &#x0003C; 0.0001, <italic>t</italic><sub>(53)</sub> = 8.1367; <italic>p</italic> &#x0003C; 0.0001 at normal &#x0003E; weak, <italic>t</italic><sub>(53)</sub> = 3.5753; <italic>p</italic> = 0.0008 at normal &#x0003E; strong, <italic>t</italic><sub>(53)</sub> = 4.7977; <italic>p</italic> &#x0003C; 0.0001 at strong &#x0003E; weak]. Similar to the middle layer, in the higher layer (<xref ref-type="fig" rid="F9">Figure 9C</xref>), the normal meta-prior condition showed the best generative hierarchy [<italic>F</italic><sub>(2, 54)</sub> = 24.3196; <italic>p</italic> &#x0003C; 0.0001, <italic>t</italic><sub>(54)</sub> = 6.9503; <italic>p</italic> &#x0003C; 0.0001 at normal &#x0003E; weak, <italic>t</italic><sub>(54)</sub> = 3.2721; <italic>p</italic> = 0.0019 at normal &#x0003E;strong, and <italic>t</italic><sub>(54)</sub> = 3.8371; <italic>p</italic> = 0.0003 at strong &#x0003E;weak]. The differences in the generative hierarchy of variance units were not significant [<xref ref-type="fig" rid="F9">Figure 9D</xref>; <italic>F</italic><sub>(2, 57)</sub> = 0.0769; <italic>p</italic> = 0.9261].</p>
<p>These results suggested that the networks under a weak meta-prior condition generated using only lower layers; it did not have sufficient hierarchical and disentangled representations in term of active generation, and the hierarchical representations were effective only during passive inference. On the other hand, the networks under a strong meta-prior condition showed the abnormalities in hierarchical representations in terms of both active generation and passive inference.</p>
</sec>
<sec>
<title>3.3. The buffering effect of environment on representation learning</title>
<p>As an external environmental factor during the developmental learning process, the noise level of the observation signals was manipulated. This experiment is motivated by the well-known phenomenon in education and support for children with ASD, namely that reducing ambiguity in stimulations and the structuring environment promote learning and improve behavioral and cognitive functions (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>). In this experiment, we manipulated the signal noise level included in the training and test sequences and examined the interaction effect between meta-prior and noise level on the representation learning. <xref ref-type="fig" rid="F10">Figure 10A</xref> illustrated a representative example of behavioral sequence for training under the &#x0201C;noisy&#x0201D; environment condition in which LEFT and RIGHT states were not clearly distinguishable, in contrast to &#x0201C;stable&#x0201D; signal noise condition (<xref ref-type="fig" rid="F2">Figure 2B</xref>).</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p><bold>(A)</bold> The training sequence in noisy environment condition in which transition bias was set to 0.76 (LEFT-biased sequences). <bold>(B&#x02013;E)</bold> The interaction effect between the environment and meta-prior. The results of statistical test were showed under only strong meta-prior condition.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0010.tif"/>
</fig>
<p>As described earlier, strong meta-prior condition showed reduced flexibilities (behavioral and cognitive: <xref ref-type="fig" rid="F6">Figure 6</xref>) and poor generative hierarchy (<xref ref-type="fig" rid="F9">Figure 9</xref>). However, in the noisy environment condition, the behavioral flexibility under strong meta-prior conditions was improved (<xref ref-type="fig" rid="F10">Figure 10B</xref>). Furthermore, the networks under strong meta-prior and noisy environment condition partly acquired the hierarchical representations, specifically in the middle (<xref ref-type="fig" rid="F10">Figure 10D</xref>) and higher layer (<xref ref-type="fig" rid="F10">Figure 10E</xref>). However, noisy environment under strong meta-prior condition did not induce the improvement of generative hierarchy in the lower layer (<xref ref-type="fig" rid="F10">Figure 10C</xref>).</p>
<p>These observations were confirmed by the following statistical analyses. There were significant main effects of meta-prior and environment [<italic>F</italic><sub>(2, 105)</sub> = 116.4491; <italic>p</italic> &#x0003C; 0.0001 in meta-prior, <italic>F</italic><sub>(1, 105)</sub> = 90.8178, <italic>p</italic> &#x0003C; 0.0001 in environment]. In addition, the interaction effect between the environment and meta-prior on behavioral flexibility was significant [interaction effect <italic>F</italic><sub>(2, 105)</sub> = 72.7390, <italic>p</italic> &#x0003C; 0.0001]. Furthermore, the difference of behavioral flexibility between environment conditions was significant under strong meta-prior condition [the simple effect of environment on strong meta-prior condition <italic>F</italic><sub>(1, 105)</sub> = 222.8984; <italic>p</italic> &#x0003C; 0.0001]. However, the interaction effects on cognitive flexibility were not significant [<italic>F</italic><sub>(2, 112)</sub> = 1.7649; <italic>p</italic> = 0.1759], although main effects of meta-prior [<italic>F</italic><sub>(2, 112)</sub> = 28.7819; <italic>p</italic> &#x0003C; 0.0001] and noise level of environment [<italic>F</italic><sub>(1, 112)</sub> = 5.5784; <italic>p</italic> = 0.0199] were significant. Moreover, the strong meta-prior condition under noisy environment improved generative hierarchy in the middle layer [interaction effect <italic>F</italic><sub>(2, 107)</sub> = 6.3325; <italic>p</italic> = 0.0025, and simple effect of environment on strong meta-prior <italic>F</italic><sub>(1, 107)</sub> = 16.1836; <italic>p</italic> = 0.0001] and in the higher layer [interaction effect <italic>F</italic><sub>(2, 106)</sub> = 7.1059; <italic>p</italic> = 0.0013, and simple effect of environment on strong meta-prior <italic>F</italic><sub>(1, 106)</sub> = 14.5995; <italic>p</italic> = 0.0002]. The interaction effect between the environment and meta-prior on lower representations was not significant [<italic>F</italic><sub>(2, 111)</sub> = 0.3530, <italic>p</italic> = 0.7033].</p>
<p>Therefore, under the strong meta-prior condition, increased signal noise improved the behavioral flexibility and acquisition of the hierarchical Bayesian representation.</p></sec></sec>
<sec sec-type="discussion" id="s4">
<title>4. Discussion</title>
<p>In this study, we proposed a new research framework for understanding the pathological mechanisms of the atypical developmental process, using state-of-the-art computational model, PV-RNN. This framework comprehensively includes simulations of the multiple factors related to developmental disorders, for example, the neural dynamics, hierarchical Bayesian representation, cognitive-behavioral phenotypes, developmental learning processes, and the environment. In this framework, these factors could be manipulated without any restriction and analyzed quantitatively.</p>
<p>As an example, in experiments using this framework, we analyzed the relationships between inherent characteristics of neural dynamics, hierarchical Bayesian representation, the properties of external stimulus, and inflexibility, which is cognitive-behavioral phenotype observed in patients with ASD. Particularly, this study investigated: (1) whether manipulating inherent characteristics of neural dynamics and external environment induces reduced flexibility; (2) whether these manipulations lead to the normal/abnormal acquisition of hierarchical Bayesian representations; and (3) how the abnormalities in hierarchical Bayesian representations are related to reduced flexibility. <xref ref-type="fig" rid="F11">Figure 11</xref> summarizes the results for these questions.</p>
<fig id="F11" position="float">
<label>Figure 11</label>
<caption><p>The results of simulation experiments were graphically summarized. The networks with normal neural stochasticity were able to acquire hierarchical representations, including higher-order representation, and exhibited good behavioral and cognitive flexibility. When the neural stochasticity was high in the learning process, top-down generation using higher-order representation (i.e., generative hierarchy) was impaired, although the flexibility did not differ from that of the normal settings. On the other hand, when the neural stochasticity was low in the learning process, the networks demonstrated reduced flexibility and abnormal hierarchical representation. However, this altered acquisition of higher-order representation and flexibility was ameliorated by increasing the level of noises in external stimuli.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-14-1080668-g0011.tif"/>
</fig>
<sec>
<title>4.1. Reduced flexibility and pathology of ASD</title>
<p>The normal and weak (high stochasticity) meta-prior conditions did not show reduced flexibility regardless of external environment condition. In contrast, the networks with strong meta-prior (low stochasticity) condition showed less behavioral and cognitive flexibility in the stable environment. On the other hand, the noisy environment improved the behavioral flexibility under strong meta-prior condition.</p>
<p>This result of reduced flexibility under strong meta-prior condition is consistent with the finding reported by Wirkuttis and Tani (<xref ref-type="bibr" rid="B59">59</xref>) that the PV-RNN with higher meta-prior had stronger intention and less flexible interaction with others because the top-down prior belief had more effects on generated behaviors than bottom-up sensory signals. In addition to reproducing this finding, we found that behavioral flexibility was improved by increasing stimulus noise under the strong meta-prior condition. From an information theory view of PV-RNN, the network with strong meta-prior condition underestimates the reconstruction errors and overestimates the regularization errors in the loss function compared to the other conditions. The reason why the flexibility improved under noisy environment was that increasing stimulus noise led to an increase of reconstruction errors, resulting in amelioration of the balance between the reconstruction and regularization errors. Therefore, the combination of appropriate meta-prior and noise levels in the environment seems to be important for the flexible behavior. An alternative explanation for this amelioration effect is that increasing stimulus noise worked similar to the machine learning techniques to improve generalization capability such as augmentation (<xref ref-type="bibr" rid="B79">79</xref>) or denoising (<xref ref-type="bibr" rid="B80">80</xref>).</p>
<p>The findings that low stochasticity dynamics was related to reduced flexibility may provide new insights into the hypothesis that neural noise is involved in the formation of ASD. Previous theoretical studies have proposed conflicting hypotheses: one is there is more noise in the brain of people with ASD (<xref ref-type="bibr" rid="B72">72</xref>) and another is low noise in the brain of people with ASD (<xref ref-type="bibr" rid="B71">71</xref>). Our results support the hypothesis that low neural noise is associated with ASD. Furthermore, these results are consistent with the experimental findings using magnetic resonance imaging and electroencephalography that lower neural noise was associated with worse task performance in a typical developmental group (<xref ref-type="bibr" rid="B81">81</xref>, <xref ref-type="bibr" rid="B82">82</xref>) and that lower neural noise was observed in ASD (<xref ref-type="bibr" rid="B73">73</xref>, <xref ref-type="bibr" rid="B74">74</xref>). Moreover, in the <xref ref-type="supplementary-material" rid="SM1">Supplementary Results 2.3</xref>, we reported that some networks with low stochasticity dynamics generated sequences similar to restrictive and repeated behaviors. However, some studies have reported high neural noise in ASD (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>). Indeed, much noise intuitively seemed to lead to unstable and chaotic predictions and reduced task performance. The reason why the network with lower stochastic dynamics did not show inflexibility is that the flexibility task demanded to predict only one-step-ahead. For this reason, even if the disturbance of network dynamics by neural noise occurred, the network could sufficiently modify the predictions using observations. If the networks were required to predict a more longer future than one step, the noise would accumulate in the neural network, and the performance of the task is likely to deteriorate (<xref ref-type="bibr" rid="B55">55</xref>). It remains unclear why the higher neural noise induced better task performance in typical development but more severe symptoms in ASD, and refining experimental settings may contribute to solve this question.</p>
<p>In addition, the amelioration effect of environmental noise for flexibility was a novel finding of the current study. Indeed, although the effects of environment in developmental learning in ASD has been clinically well known (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>), there are few studies directly testing this topic from the computational aspect. For example, some studies discussed the environmental effects on mental disorders using computational theories only at the conceptual level (<xref ref-type="bibr" rid="B30">30</xref>, <xref ref-type="bibr" rid="B31">31</xref>, <xref ref-type="bibr" rid="B83">83</xref>). Our study demonstrated empirically that if the networks possessed risks for reduced flexibility, such as low stochasticity in neural dynamics, they could be ameliorated by increasing ambiguity in the external environment. On the other hand, clinical findings suggest that structuring the environment and removing ambiguity in stimulus were effective for people with ASD (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>). Although these findings may seem contradictory, our findings do not necessarily conflict with clinical findings, because many exposure methods for anxiety disorders have suggested that increasing prediction errors was important for correcting mislearning (<xref ref-type="bibr" rid="B84">84</xref>). Given the hypothesis that ASDs have a higher aversion to prediction errors, it is possible that these interventions, such as structuring the environment, do not contribute to learning, but only to emotional stabilization.</p>
</sec>
<sec>
<title>4.2. Acquisition of hierarchical and probabilistic representation</title>
<p>The current study demonstrated that stochasticity of neural dynamics (controlled by the level of meta-prior) was indeed associated with acquisition of the internal representations reflecting hierarchical and probabilistic environment structures. The neural network model under normal meta-prior condition could acquire the hierarchical and probabilistic representations in terms of passive inference (cognitive flexibility) and active generation (generative hierarchy). However, under weak meta-prior condition, there was an anomaly in the active decoding process rather than in the passive encoding process; namely, cognitive flexibility showed good performance, although the generative hierarchy in the higher layer showed poor scores. This may be because the learning of the prior distribution (used in the LST) did not progress as well as the posterior distribution (used in the test phase). As the properties of PV-RNN, the posterior distribution learns more easily and quickly than the prior distribution because the posterior distribution can use adaptive variables <italic>a</italic><sub><italic>t</italic></sub> in addition to neural dynamics units <italic>d</italic><sub><italic>t</italic></sub>. Furthermore, under weak meta-prior condition, excessive neural noise might interfere information transmission to the higher layer from the lower layer and inhibit learning in the higher layer.</p>
<p>These results of simulation experiments can provide several insights for understanding the altered uncertainty estimation process assumed in ASD (<xref ref-type="bibr" rid="B29">29</xref>&#x02013;<xref ref-type="bibr" rid="B32">32</xref>). The current experiment demonstrated that the mean unit in PV-RNN encoded higher-order probability (transition bias) in data sequences, and the variance units in the lower-order layer encoded sensorimotor noises (signal noise). This is not perfectly consistent with the predictive coding theory suggesting that the human brain represents uncertainty in the environment using the precision (inverse of variance) of Gaussian distribution (<xref ref-type="bibr" rid="B24">24</xref>). This inconsistency may be simply because of the fact that the higher-order hidden variables in the environment followed Bernoulli distribution and therefore neural networks did not need to use the variance units. However, there is still a possibility that the role of precision, as indicated by predictive coding theory, may be too normative. In fact, in a hierarchical neural network, estimation of precision can have broader effects beyond the weighting of information values assumed in the conceptual level of predictive coding theory such as disturbing neural dynamics observed in the weak meta-prior condition. Investigations using neural network implementation of predictive coding theory can contribute to further understanding of the roles of precision estimation and its alternation in developmental disorders.</p>
<p>The hierarchical Bayesian model has been treated as a very general and rational cognitive model for performing numerous tasks (<xref ref-type="bibr" rid="B26">26</xref>, <xref ref-type="bibr" rid="B27">27</xref>). However, a hierarchical Bayesian model has been constructed by researchers a priori, and acquisition of representations reflecting the hierarchical Bayesian model have not been sufficiently addressed in cognitive neuroscience (<xref ref-type="bibr" rid="B33">33</xref>&#x02013;<xref ref-type="bibr" rid="B35">35</xref>). In the area of machine learning and neurorobotics, although some studies focused on acquisition of hierarchical or probabilistic representations, these have some limitations. For example, some studies focusing on hierarchical representations did not assume sequential data because of using a variational auto-encoder (<xref ref-type="bibr" rid="B78">78</xref>, <xref ref-type="bibr" rid="B85">85</xref>, <xref ref-type="bibr" rid="B86">86</xref>) and did not use stochastic dynamics in RNN (<xref ref-type="bibr" rid="B42">42</xref>). Although there is research investigating internal representation using PV-RNN, the previous studies used lower-order probability (e.g., target state and signal noise) and did not consider explicitly higher-order probabilistic variables such as transition bias (<xref ref-type="bibr" rid="B55">55</xref>&#x02013;<xref ref-type="bibr" rid="B60">60</xref>). The current result showing that artificial neural network models can acquire hierarchical Bayesian representations in a self-organizing manner is a crucial step to understanding underlying mechanisms for embedding the hierarchical Bayesian model into the brain system through developmental learning. Furthermore, our proposed research framework has applicability to a wide range of behavioral and cognitive phenotypes if its latent cognitive processes can be described using the Bayesian method (<xref ref-type="bibr" rid="B25">25</xref>, <xref ref-type="bibr" rid="B26">26</xref>), for example, signal detection theory and drift-diffusion model in decision-making tasks.</p>
</sec>
<sec>
<title>4.3. Relationships between multiple developmental factors</title>
<p>It was observed that changes in the acquisition of hierarchical Bayesian representation did not necessarily induce inflexibility. Indeed, additional analysis demonstrated that the positive association between hierarchical representations (generative hierarchy) and behavioral flexibility was found only under strong meta-prior condition (<xref ref-type="supplementary-material" rid="SM1">Supplementary Results 2.1</xref>). However, under weak meta-prior condition, the behavioral and cognitive flexibility was comparable to normal meta-prior condition, but generative hierarchy in the higher layer was significantly lower.</p>
<p>This coexistence of good task performance and poor representation in the weak meta-prior condition is remarkable because the observable phenomena in performing tasks was equivalent while the underlying mechanism behind performing tasks was different between normal and weak meta-prior conditions. This finding is conceptualized as the issue of &#x0201C;equifinality&#x0201D; and &#x0201C;multifinality,&#x0201D; which are fundamental difficulties in understanding neurodevelopmental disorders (<xref ref-type="bibr" rid="B87">87</xref>). In particular, multiple factors leading to one developmental disorder exist (equifinality, for example, genetically distinct individuals may develop common social dysfunction), and conversely, the same cause may result in diverse and heterogeneous phenotypes (multifinality, for example, a particular gene can be associated with distinct psychiatric disorders).</p>
<p>From the aspect of equifinality, possible pathways other than the manipulations of meta-prior and signal noise leading to inflexibility were investigated (<xref ref-type="supplementary-material" rid="SM1">Supplementary Results 2.2</xref>&#x02013;<xref ref-type="supplementary-material" rid="SM1">2.4</xref>). Specifically, the effects of different learning lengths were tested, motivated by the theoretical hypothesis that autistic characteristics in perception and cognition can be understood as &#x0201C;over-learning/over-fitting&#x0201D; (<xref ref-type="bibr" rid="B88">88</xref>). This additional experiment showed that the excessive learning length led to reductions in behavioral and cognitive flexibility (<xref ref-type="supplementary-material" rid="SM1">Supplementary Results 2.2</xref>, <xref ref-type="supplementary-material" rid="SM1">2.3</xref>). Furthermore, from the aspect of episodic psychiatric disorders, even after normal development of hierarchical representation, altered flexibility can occur. To simulate this situation, we confirmed that hyper- and hypo-prior distributions (<xref ref-type="bibr" rid="B89">89</xref>) in the test phase can also induce inflexibility (<xref ref-type="supplementary-material" rid="SM1">Supplementary Results 2.4</xref>). Therefore, the reduced flexibility was caused both by alterations in the long-term developmental learning process (alterations of meta-prior, signal noise, and the learning length) and by abnormal prior influences in the short-term test phase. These simulations may contribute to constructing a unified explanation of inflexibility, which is a transdiagnostic phenotype observed in not only developmental disorders but also episodic mental disorders such as depression and schizophrenia.</p>
<p>It is also important that the simulations under strong meta-prior condition suggested that our proposed method can provide computational simulation frameworks for investigating multifinal phenomenon including treatment effects. Namely, the differences in external environmental stimulus induced the differences in generative hierarchy and flexibility under strong meta-prior condition, although settings of the individual network between environmental conditions were the same.</p>
<p>Equifinality and multifinality are widespread not only in developmental disorders but also in mental disorders and threaten the validity of the current diagnosis classification system (<xref ref-type="bibr" rid="B90">90</xref>). Resolving this problem may lead to the development of an effective intervention strategy that considers the individual differences (precision psychiatry), and the research handling equifinal and multifinal nature has been desirable. We are convinced that the proposed research framework contributes to understanding the multiple pathways leading to mental disorders.</p>
</sec>
<sec>
<title>4.4. Limitation and future directions</title>
<p>The simulation experiments had some limitations, which should be investigated in future research. First, the proposed framework is limited to &#x02018;<italic>in silico</italic> simulation, and the findings obtained in the proposed framework are exploratory hypotheses. Therefore, the findings &#x02018;<italic>in silico</italic> simulation should be verified with real data. For example, findings in the current experiments suggest that flexibility and/or hierarchical representation are impaired under strong and weak meta-prior conditions, suggesting that ASD may be a heterogeneous disorder. Given that flexibility was significantly reduced and hierarchical representation learning was impaired under the strong meta-prior condition, the neural dynamics with severe ASD may be low stochastic (highly deterministic). Conversely, mild ASD individuals, whose performance in flexibility task are close to the typical development group and who do not explicitly exhibit restrictive and repeated behaviors, may have high stochasticity in neural dynamics and may have problems with top-down predictions. These exploratory hypotheses could be verified using real data to refine the proposed framework.</p>
<p>The proposed framework has the potential to be extended for more diverse experimental settings beyond the simulations conducted in this study. For example, as mentioned above, cognitive-behavioral tasks other than the flexibility task are also applicable to the proposed framework. Furthermore, the direct effects of altered biological features other than meta-prior must be investigated in our framework as prior works on ASD and schizophrenia using neural network model utilized various virtual lesion to neural system (<xref ref-type="bibr" rid="B51">51</xref>&#x02013;<xref ref-type="bibr" rid="B54">54</xref>). Moreover, the sequential data also has room for improvement. In the current study, the sequential data was two-dimensional and insufficient to reflect the real environment and sensorimotor signals. To overcome this problem, using neurorobotics experiments in which humanoid robots are used to interact with the external world to collect sensorimotor (e.g., vision and proprioception) signals would be useful (<xref ref-type="bibr" rid="B51">51</xref>, <xref ref-type="bibr" rid="B52">52</xref>, <xref ref-type="bibr" rid="B54">54</xref>, <xref ref-type="bibr" rid="B91">91</xref>). Although the simulation experiments were still simple and were not sufficient to describe the interactions between multiple factors, these extended experiments based on the proposed framework will contribute to a deeper understanding of complex developmental processes.</p>
<p>In the simulation experiments, there were several technical issues. For example, the meta-prior, which was manipulated in experiments, was used as the hyper-parameter, which controls the stochasticity in neural dynamics. The relationship between meta-prior and stochasticity was confirmed in prior research (<xref ref-type="bibr" rid="B55">55</xref>) and in our simulations (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures S1</xref>, <xref ref-type="supplementary-material" rid="SM1">S2</xref>). However, meta-prior affects neural dynamics through mediating loss function rather than directly. Therefore, the process that meta-prior affected neural dynamics was more complex, and the roles of manipulating meta-prior required more careful discussion.</p>
<p>It was also unclear how to decide the meta-prior in the test phase, which affected the strength of prior belief. These values were decided by experimenter&#x00027;s trial and error in our study. The experimental results suggest that appropriate prior strength is required for good performance in both behavioral and cognitive flexibility (<xref ref-type="supplementary-material" rid="SM1">Supplementary Results 2.4</xref>); This is probably because it is better to ignore the prior information and use a copy of the last observations to enhance only behavioral flexibility. On the other hand, when inferring latent states, such as cognitive flexibility, both higher-order prior knowledge and observation are important to avoid adapting to accidental changes rather than true context switching. Therefore, there was a trade-off between behavioral and cognitive flexibility, and the system controlling exact prior strength may exist in humans and animals. Mathematically, this calculation may be automatically executed using Bayesian optimization or the prediction errors in the previous time step, such as deep active inference (<xref ref-type="bibr" rid="B39">39</xref>).</p>
<p>Furthermore, the variances of metrics, particularly cognitive flexibility and generative hierarchy, were big even in the same condition. The unstable results of learning were reported in the deep learning domain and often observed in the representation learning (<xref ref-type="bibr" rid="B92">92</xref>). Reducing these high variances is a new and important topic that needs to be discussed in both artificial neural networks and cognitive neuroscience domains.</p>
</sec>
<sec>
<title>4.5. Conclusion</title>
<p>In this study, to understand the relationships among hierarchical Bayesian representation, neural dynamics, the environment, and behavioral phenotype in developmental disorders, we proposed a new framework combining PV-RNN and the environment with hierarchical generative process. Through the experiments using this framework, we investigated whether inflexibility resulted from various factors (e.g., stochasticity in neural dynamics and the level of noises included in the environmental stimulus) with focus on hierarchical Bayesian representation learning. As a result, we found that the networks with normal stochastic dynamics acquired hierarchical and probabilistic representation reflecting the environmental structures and adapted flexibly to the new environment. Furthermore, we found that even if the networks possessed risks for reduced flexibility, such as low stochasticity in neural dynamics, they could be ameliorated by increasing ambiguity in the external environment. The networks with high stochastic dynamics had the hierarchical representations in terms of passive inference but did not have sufficient hierarchical and disentangled representations in terms of active generation. Therefore, our proposed method is useful for understanding atypical development such as reduced flexibility observed in ASD by bridging multiple factors including the neural dynamics, acquisitions of hierarchical representation, and the external environment.</p></sec></sec>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The data presented in the study are deposited in the Github repository, which could be accessed via <ext-link ext-link-type="uri" xlink:href="https://github.com/ncnp-cpsy/SimulatingDevelopmentalDiversity.git">https://github.com/ncnp-cpsy/SimulatingDevelopmentalDiversity.git</ext-link>.</p></sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>TS, JT, and YY designed the experiment and analysis. TS performed the experiment and analyzed the data. AA developed the model and advised on the programming of the experiment. TS, AA, JT, MH, TH, and YY wrote the manuscript. All authors contributed to the article and approved the submitted version.</p></sec>
</body>
<back>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>This work was partly supported by JSPS KAKENHI (JP20H00001 and JP20H00625), JST CREST (JPMJCR16E2 and JPMJCR21P4), JST Moonshot R&#x00026;D (JPMJMS2031), JST SPRING (JPMJSP2120), and Intramural Research Grant (3-9 and 4-6) for Neurological and Psychiatric Disorders of NCNP.</p>
</sec>

<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>AA was employed by Geobotica. The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>

<sec sec-type="supplementary-material" id="s9">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpsyt.2023.1080668/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpsyt.2023.1080668/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.PDF" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baron-Cohen</surname> <given-names>S</given-names></name> <name><surname>Leslie</surname> <given-names>AM</given-names></name> <name><surname>Frith</surname> <given-names>U</given-names></name></person-group>. <article-title>Does the autistic child have a &#x0201C;theory of mind&#x0201D;?</article-title> <source>Cognition</source>. (<year>1985</year>) <volume>21</volume>:<fpage>37</fpage>&#x02013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1016/0010-0277(85)90022-8</pub-id><pub-id pub-id-type="pmid">2934210</pub-id></citation></ref>
<ref id="B2">
<label>2.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dajani</surname> <given-names>DR</given-names></name> <name><surname>Uddin</surname> <given-names>LQ</given-names></name></person-group>. <article-title>Demystifying cognitive flexibility: implications for clinical and developmental neuroscience</article-title>. <source>Trends Neurosci</source>. (<year>2015</year>) <volume>38</volume>:<fpage>571</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1016/j.tins.2015.07.003</pub-id><pub-id pub-id-type="pmid">26343956</pub-id></citation></ref>
<ref id="B3">
<label>3.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haswell</surname> <given-names>CC</given-names></name> <name><surname>Izawa</surname> <given-names>J</given-names></name> <name><surname>Dowell</surname> <given-names>LR</given-names></name> <name><surname>Mostofsky</surname> <given-names>SH</given-names></name> <name><surname>Shadmehr</surname> <given-names>R</given-names></name></person-group>. <article-title>Representation of internal models of action in the autistic brain</article-title>. <source>Nat Neurosci</source>. (<year>2009</year>) <volume>12</volume>:<fpage>970</fpage>&#x02013;<lpage>2</lpage>. <pub-id pub-id-type="doi">10.1038/nn.2356</pub-id><pub-id pub-id-type="pmid">19578379</pub-id></citation></ref>
<ref id="B4">
<label>4.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marco</surname> <given-names>EJ</given-names></name> <name><surname>Hinkley</surname> <given-names>LB</given-names></name> <name><surname>Hill</surname> <given-names>SS</given-names></name> <name><surname>Nagarajan</surname> <given-names>SS</given-names></name></person-group>. <article-title>Sensory processing in autism: a review of neurophysiologic findings</article-title>. <source>Pediatr Res</source>. (<year>2011</year>) <volume>69</volume>:<fpage>48</fpage>&#x02013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1203/PDR.0b013e3182130c54</pub-id><pub-id pub-id-type="pmid">21289533</pub-id></citation></ref>
<ref id="B5">
<label>5.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>E</given-names></name> <name><surname>Lee</surname> <given-names>J</given-names></name> <name><surname>Kim</surname> <given-names>E</given-names></name></person-group>. <article-title>Excitation/inhibition imbalance in animal models of autism spectrum disorders</article-title>. <source>Biol Psychiatry</source>. (<year>2017</year>) <volume>81</volume>:<fpage>838</fpage>&#x02013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1016/j.biopsych.2016.05.011</pub-id><pub-id pub-id-type="pmid">27450033</pub-id></citation></ref>
<ref id="B6">
<label>6.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dinstein</surname> <given-names>I</given-names></name> <name><surname>Heeger</surname> <given-names>DJ</given-names></name> <name><surname>Lorenzi</surname> <given-names>L</given-names></name> <name><surname>Minshew</surname> <given-names>NJ</given-names></name> <name><surname>Malach</surname> <given-names>R</given-names></name> <name><surname>Behrmann</surname> <given-names>M</given-names></name></person-group>. <article-title>Unreliable evoked responses in autism</article-title>. <source>Neuron</source>. (<year>2012</year>) <volume>75</volume>:<fpage>981</fpage>&#x02013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuron.2012.07.026</pub-id><pub-id pub-id-type="pmid">22998867</pub-id></citation></ref>
<ref id="B7">
<label>7.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Milne</surname> <given-names>E</given-names></name></person-group>. <article-title>Increased intra-participant variability in children with autistic spectrum disorders: evidence from single-trial analysis of evoked EEG</article-title>. <source>Front Psychol</source>. (<year>2011</year>) <volume>2</volume>:<fpage>51</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2011.00051</pub-id><pub-id pub-id-type="pmid">21716921</pub-id></citation></ref>
<ref id="B8">
<label>8.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ippolito</surname> <given-names>G</given-names></name> <name><surname>Bertaccini</surname> <given-names>R</given-names></name> <name><surname>Tarasi</surname> <given-names>L</given-names></name> <name><surname>Di Gregorio</surname> <given-names>F</given-names></name> <name><surname>Trajkovic</surname> <given-names>J</given-names></name> <name><surname>Battaglia</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>The role of alpha oscillations among the main neuropsychiatric disorders in the adult and developing human brain: evidence from the last 10 years of research</article-title>. <source>Biomedicines</source>. (<year>2022</year>) <volume>10</volume>:<fpage>3189</fpage>. <pub-id pub-id-type="doi">10.3390/biomedicines10123189</pub-id><pub-id pub-id-type="pmid">36551945</pub-id></citation></ref>
<ref id="B9">
<label>9.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Battaglia</surname> <given-names>S</given-names></name> <name><surname>Thayer</surname> <given-names>JF</given-names></name></person-group>. <article-title>Functional interplay between central and autonomic nervous systems in human fear conditioning</article-title>. <source>Trends Neurosci</source>. (<year>2022</year>) <volume>45</volume>:<fpage>504</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1016/j.tins.2022.04.003</pub-id><pub-id pub-id-type="pmid">35577621</pub-id></citation></ref>
<ref id="B10">
<label>10.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Burstein</surname> <given-names>O</given-names></name> <name><surname>Geva</surname> <given-names>R</given-names></name></person-group>. <article-title>The brainstem-informed autism framework: early life neurobehavioral markers</article-title>. <source>Front Integr Neurosci</source>. (<year>2021</year>) <volume>15</volume>:<fpage>759614</fpage>. <pub-id pub-id-type="doi">10.3389/fnint.2021.759614</pub-id><pub-id pub-id-type="pmid">34858145</pub-id></citation></ref>
<ref id="B11">
<label>11.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Y</given-names></name> <name><surname>Vangel</surname> <given-names>M</given-names></name> <name><surname>Chen</surname> <given-names>H</given-names></name> <name><surname>Eshel</surname> <given-names>M</given-names></name> <name><surname>Cheng</surname> <given-names>M</given-names></name> <name><surname>Lu</surname> <given-names>T</given-names></name> <etal/></person-group>. <article-title>The impaired subcortical pathway from superior colliculus to the amygdala in boys with autism spectrum disorder</article-title>. <source>Front Integr Neurosci</source>. (<year>2022</year>) <volume>16</volume>:<fpage>666439</fpage>. <pub-id pub-id-type="doi">10.3389/fnint.2022.666439</pub-id><pub-id pub-id-type="pmid">35784498</pub-id></citation></ref>
<ref id="B12">
<label>12.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jure</surname> <given-names>R</given-names></name></person-group>. <article-title>Autism pathogenesis: the superior colliculus</article-title>. <source>Front Neurosci</source>. (<year>2019</year>) <volume>12</volume>:<fpage>1029</fpage>. <pub-id pub-id-type="doi">10.3389/fnins.2018.01029</pub-id><pub-id pub-id-type="pmid">30686990</pub-id></citation></ref>
<ref id="B13">
<label>13.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jure</surname> <given-names>R</given-names></name></person-group>. <article-title>The &#x0201C;Primitive brain dysfunction&#x0201D; theory of autism: the superior colliculus role</article-title>. <source>Front Integr Neurosci</source>. (<year>2022</year>) <volume>16</volume>:<fpage>797391</fpage>. <pub-id pub-id-type="doi">10.3389/fnint.2022.797391</pub-id><pub-id pub-id-type="pmid">35712344</pub-id></citation></ref>
<ref id="B14">
<label>14.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kushki</surname> <given-names>A</given-names></name> <name><surname>Drumm</surname> <given-names>E</given-names></name> <name><surname>Pla Mobarak</surname> <given-names>M</given-names></name> <name><surname>Tanel</surname> <given-names>N</given-names></name> <name><surname>Dupuis</surname> <given-names>A</given-names></name> <name><surname>Chau</surname> <given-names>T</given-names></name> <etal/></person-group>. <article-title>Investigating the autonomic nervous system response to anxiety in children with autism spectrum disorders</article-title>. <source>PLoS ONE</source>. (<year>2013</year>) <volume>8</volume>:<fpage>e59730</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0059730</pub-id><pub-id pub-id-type="pmid">23577072</pub-id></citation></ref>
<ref id="B15">
<label>15.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>London</surname> <given-names>E</given-names></name> <name><surname>Gaspar</surname> <given-names>P</given-names></name> <name><surname>Puelles</surname> <given-names>L</given-names></name> <name><surname>Jure</surname> <given-names>RE</given-names></name> <name><surname>Kulesza</surname> <given-names>RJ</given-names></name></person-group>. <article-title>The role of the brainstem and cerebellum in autism and related neurodevelopmental disorders (DD)</article-title>. <source>Front Integr Neurosci</source>. (<year>2022</year>) <volume>16</volume>:<fpage>957003</fpage>. <pub-id pub-id-type="doi">10.3389/978-2-83250-197-9</pub-id><pub-id pub-id-type="pmid">36118119</pub-id></citation></ref>
<ref id="B16">
<label>16.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Virues-Ortega</surname> <given-names>J</given-names></name> <name><surname>Julio</surname> <given-names>FM</given-names></name> <name><surname>Pastor-Barriuso</surname> <given-names>R</given-names></name></person-group>. <article-title>The TEACCH program for children and adults with autism: a meta-analysis of intervention studies</article-title>. <source>Clin Psychol Rev</source>. (<year>2013</year>) <volume>33</volume>:<fpage>940</fpage>&#x02013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1016/j.cpr.2013.07.005</pub-id><pub-id pub-id-type="pmid">23988454</pub-id></citation></ref>
<ref id="B17">
<label>17.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wong</surname> <given-names>C</given-names></name> <name><surname>Odom</surname> <given-names>SL</given-names></name> <name><surname>Hume</surname> <given-names>KA</given-names></name> <name><surname>Cox</surname> <given-names>AW</given-names></name> <name><surname>Fettig</surname> <given-names>A</given-names></name> <name><surname>Kucharczyk</surname> <given-names>S</given-names></name> <etal/></person-group>. <article-title>Evidence-based practices for children, youth, and young adults with autism spectrum disorder: a comprehensive review</article-title>. <source>J Autism Dev Disord</source>. (<year>2015</year>) <volume>45</volume>:<fpage>1951</fpage>&#x02013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1007/s10803-014-2351-z</pub-id><pub-id pub-id-type="pmid">25578338</pub-id></citation></ref>
<ref id="B18">
<label>18.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Montague</surname> <given-names>PR</given-names></name> <name><surname>Dolan</surname> <given-names>RJ</given-names></name> <name><surname>Friston</surname> <given-names>KJ</given-names></name> <name><surname>Dayan</surname> <given-names>P</given-names></name></person-group>. <article-title>Computational psychiatry</article-title>. <source>Trends Cogn Sci</source>. (<year>2012</year>) <volume>16</volume>:<fpage>72</fpage>&#x02013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1016/j.tics.2011.11.018</pub-id><pub-id pub-id-type="pmid">22177032</pub-id></citation></ref>
<ref id="B19">
<label>19.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Friston</surname> <given-names>KJ</given-names></name> <name><surname>Stephan</surname> <given-names>KE</given-names></name> <name><surname>Montague</surname> <given-names>R</given-names></name> <name><surname>Dolan</surname> <given-names>RJ</given-names></name></person-group>. <article-title>Computational psychiatry: the brain as a phantastic organ</article-title>. <source>Lancet Psychiatry</source>. (<year>2014</year>) <volume>1</volume>:<fpage>148</fpage>&#x02013;<lpage>58</lpage>. <pub-id pub-id-type="doi">10.1016/S2215-0366(14)70275-5</pub-id><pub-id pub-id-type="pmid">26360579</pub-id></citation></ref>
<ref id="B20">
<label>20.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Maia</surname> <given-names>TV</given-names></name> <name><surname>Frank</surname> <given-names>MJ</given-names></name></person-group>. <article-title>From reinforcement learning models to psychiatric and neurological disorders</article-title>. <source>Nat Neurosci</source>. (<year>2011</year>) <volume>14</volume>:<fpage>154</fpage>&#x02013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1038/nn.2723</pub-id><pub-id pub-id-type="pmid">21270784</pub-id></citation></ref>
<ref id="B21">
<label>21.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>XJ</given-names></name> <name><surname>Krystal</surname> <given-names>JH</given-names></name></person-group>. <article-title>Computational psychiatry</article-title>. <source>Neuron</source>. (<year>2014</year>) <volume>84</volume>:<fpage>638</fpage>&#x02013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuron.2014.10.018</pub-id><pub-id pub-id-type="pmid">25442941</pub-id></citation></ref>
<ref id="B22">
<label>22.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Knill</surname> <given-names>DC</given-names></name> <name><surname>Pouget</surname> <given-names>A</given-names></name></person-group>. <article-title>The Bayesian brain: the role of uncertainty in neural coding and computation</article-title>. <source>Trends Neurosci</source>. (<year>2004</year>) <volume>27</volume>:<fpage>712</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.tins.2004.10.007</pub-id><pub-id pub-id-type="pmid">15541511</pub-id></citation></ref>
<ref id="B23">
<label>23.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rao</surname> <given-names>RP</given-names></name> <name><surname>Ballard</surname> <given-names>DH</given-names></name></person-group>. <article-title>Predictive coding in the visual cortex: a functional interpretation of some extra-classical receptive-field effects</article-title>. <source>Nat Neurosci</source>. (<year>1999</year>) <volume>2</volume>:<fpage>79</fpage>&#x02013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.1038/4580</pub-id><pub-id pub-id-type="pmid">10195184</pub-id></citation></ref>
<ref id="B24">
<label>24.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Friston</surname> <given-names>KJ</given-names></name> <name><surname>Kiebel</surname> <given-names>S</given-names></name></person-group>. <article-title>Predictive coding under the free-energy principle</article-title>. <source>Philos Trans R Soc Lond B Biol Sci</source>. (<year>2009</year>) <volume>364</volume>:<fpage>1211</fpage>&#x02013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1098/rstb.2008.0300</pub-id><pub-id pub-id-type="pmid">19528002</pub-id></citation></ref>
<ref id="B25">
<label>25.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ahn</surname> <given-names>WY</given-names></name> <name><surname>Haines</surname> <given-names>N</given-names></name> <name><surname>Zhang</surname> <given-names>L</given-names></name></person-group>. <article-title>Revealing neurocomputational mechanisms of reinforcement learning and decision-making with the hBayesDM package</article-title>. <source>Comput Psychiatry</source>. (<year>2017</year>) <volume>1</volume>:<fpage>24</fpage>. <pub-id pub-id-type="doi">10.1162/CPSY_a_00002</pub-id><pub-id pub-id-type="pmid">29601060</pub-id></citation></ref>
<ref id="B26">
<label>26.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>MD</given-names></name> <name><surname>Wagenmakers</surname> <given-names>EJ</given-names></name></person-group>. <source>Bayesian Cognitive Modeling: A Practical Course</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name> (<year>2014</year>).</citation>
</ref>
<ref id="B27">
<label>27.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tenenbaum</surname> <given-names>JB</given-names></name> <name><surname>Kemp</surname> <given-names>C</given-names></name> <name><surname>Griffiths</surname> <given-names>TL</given-names></name> <name><surname>Goodman</surname> <given-names>ND</given-names></name></person-group>. <article-title>How to grow a mind: statistics, structure, and abstraction</article-title>. <source>Science</source>. (<year>2011</year>) <volume>331</volume>:<fpage>1279</fpage>&#x02013;<lpage>1285</lpage>. <pub-id pub-id-type="doi">10.1126/science.1192788</pub-id><pub-id pub-id-type="pmid">21393536</pub-id></citation></ref>
<ref id="B28">
<label>28.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Friston</surname> <given-names>KJ</given-names></name></person-group>. <article-title>The free-energy principle: a unified brain theory?</article-title> <source>Nat Rev Neurosci</source>. (<year>2010</year>) <volume>11</volume>:<fpage>127</fpage>&#x02013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1038/nrn2787</pub-id><pub-id pub-id-type="pmid">20068583</pub-id></citation></ref>
<ref id="B29">
<label>29.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Van de Cruys</surname> <given-names>S</given-names></name> <name><surname>Evers</surname> <given-names>K</given-names></name> <name><surname>Van der Hallen</surname> <given-names>R</given-names></name> <name><surname>Van Eylen</surname> <given-names>L</given-names></name> <name><surname>Boets</surname> <given-names>B</given-names></name> <name><surname>De-Wit</surname> <given-names>L</given-names></name> <etal/></person-group>. <article-title>Precise minds in uncertain worlds: predictive coding in autism</article-title>. <source>Psychol Rev</source>. (<year>2014</year>) <volume>121</volume>:<fpage>649</fpage>. <pub-id pub-id-type="doi">10.1037/a0037665</pub-id><pub-id pub-id-type="pmid">25347312</pub-id></citation></ref>
<ref id="B30">
<label>30.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Van de Cruys</surname> <given-names>S</given-names></name> <name><surname>Van der Hallen</surname> <given-names>R</given-names></name> <name><surname>Wagemans</surname> <given-names>J</given-names></name></person-group>. <article-title>Disentangling signal and noise in autism spectrum disorder</article-title>. <source>Brain Cogn</source>. (<year>2017</year>) <volume>112</volume>:<fpage>78</fpage>&#x02013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1016/j.bandc.2016.08.004</pub-id><pub-id pub-id-type="pmid">27651171</pub-id></citation></ref>
<ref id="B31">
<label>31.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haker</surname> <given-names>H</given-names></name> <name><surname>Schneebeli</surname> <given-names>M</given-names></name> <name><surname>Stephan</surname> <given-names>KE</given-names></name></person-group>. <article-title>Can Bayesian theories of autism spectrum disorder help improve clinical practice?</article-title> <source>Front Psychiatry</source>. (<year>2016</year>) <volume>7</volume>:<fpage>107</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyt.2016.00107</pub-id><pub-id pub-id-type="pmid">27378955</pub-id></citation></ref>
<ref id="B32">
<label>32.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pellicano</surname> <given-names>E</given-names></name> <name><surname>Burr</surname> <given-names>D</given-names></name></person-group>. <article-title>When the world becomes &#x00027;too real&#x00027;: a Bayesian explanation of autistic perception</article-title>. <source>Trends Cogn Sci</source>. (<year>2012</year>) <volume>16</volume>:<fpage>504</fpage>&#x02013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1016/j.tics.2012.08.009</pub-id><pub-id pub-id-type="pmid">22959875</pub-id></citation></ref>
<ref id="B33">
<label>33.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Asakura</surname> <given-names>N</given-names></name> <name><surname>Inui</surname> <given-names>T</given-names></name></person-group>. <article-title>A bayesian framework for false belief reasoning in children: a rational integration of theory-theory and simulation theory</article-title>. <source>Front Psychol</source>. (<year>2016</year>) <volume>7</volume>:<fpage>2019</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2016.02019</pub-id><pub-id pub-id-type="pmid">28082941</pub-id></citation></ref>
<ref id="B34">
<label>34.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Crawley</surname> <given-names>D</given-names></name> <name><surname>Zhang</surname> <given-names>L</given-names></name> <name><surname>Jones</surname> <given-names>EJ</given-names></name> <name><surname>Ahmad</surname> <given-names>J</given-names></name> <name><surname>Oakley</surname> <given-names>B</given-names></name> <name><surname>San Jose Caceres</surname> <given-names>A</given-names></name> <etal/></person-group>. <article-title>Modeling flexible behavior in childhood to adulthood shows age-dependent learning mechanisms and less optimal learning in autism in each age group</article-title>. <source>PLoS Biology</source>. (<year>2020</year>) <volume>18</volume>:<fpage>e3000908</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pbio.3000908</pub-id><pub-id pub-id-type="pmid">33108370</pub-id></citation></ref>
<ref id="B35">
<label>35.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lawson</surname> <given-names>RP</given-names></name> <name><surname>Mathys</surname> <given-names>C</given-names></name> <name><surname>Rees</surname> <given-names>G</given-names></name></person-group>. <article-title>Adults with autism overestimate the volatility of the sensory environment</article-title>. <source>Nat Neurosci</source>. (<year>2017</year>) <volume>20</volume>:<fpage>1293</fpage>&#x02013;<lpage>1299</lpage>. <pub-id pub-id-type="doi">10.1038/nn.4615</pub-id><pub-id pub-id-type="pmid">28758996</pub-id></citation></ref>
<ref id="B36">
<label>36.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dayan</surname> <given-names>P</given-names></name> <name><surname>Hinton</surname> <given-names>GE</given-names></name> <name><surname>Neal</surname> <given-names>RM</given-names></name> <name><surname>Zemel</surname> <given-names>RS</given-names></name></person-group>. <article-title>The helmholtz machine</article-title>. <source>Neural Comput</source>. (<year>1995</year>) <volume>7</volume>:<fpage>889</fpage>&#x02013;<lpage>904</lpage>. <pub-id pub-id-type="doi">10.1162/neco.1995.7.5.889</pub-id><pub-id pub-id-type="pmid">7584891</pub-id></citation></ref>
<ref id="B37">
<label>37.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yamins</surname> <given-names>DL</given-names></name> <name><surname>Hong</surname> <given-names>H</given-names></name> <name><surname>Cadieu</surname> <given-names>CF</given-names></name> <name><surname>Solomon</surname> <given-names>EA</given-names></name> <name><surname>Seibert</surname> <given-names>D</given-names></name> <name><surname>DiCarlo</surname> <given-names>JJ</given-names></name></person-group>. <article-title>Performance-optimized hierarchical models predict neural responses in higher visual cortex</article-title>. <source>Proc Natl Acad Sci USA</source>. (<year>2014</year>) <volume>111</volume>:<fpage>8619</fpage>&#x02013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1403112111</pub-id><pub-id pub-id-type="pmid">24812127</pub-id></citation></ref>
<ref id="B38">
<label>38.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sohn</surname> <given-names>H</given-names></name> <name><surname>Narain</surname> <given-names>D</given-names></name> <name><surname>Meirhaeghe</surname> <given-names>N</given-names></name> <name><surname>Jazayeri</surname> <given-names>M</given-names></name></person-group>. <article-title>Bayesian computation through cortical latent dynamics</article-title>. <source>Neuron</source>. (<year>2019</year>) <volume>103</volume>:<fpage>934</fpage>&#x02013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1016/j.neuron.2019.06.012</pub-id><pub-id pub-id-type="pmid">31320220</pub-id></citation></ref>
<ref id="B39">
<label>39.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fountas</surname> <given-names>Z</given-names></name> <name><surname>Sajid</surname> <given-names>N</given-names></name> <name><surname>Mediano</surname> <given-names>P</given-names></name> <name><surname>Friston</surname> <given-names>KJ</given-names></name></person-group>. <article-title>Deep active inference agents using monte-carlo methods</article-title>. In: <source>Advances in Neural Information Processing Systems (Virtual)</source>, Vol. 33. (<year>2020</year>). p. <fpage>11662</fpage>&#x02013;<lpage>75</lpage>.<pub-id pub-id-type="pmid">27534393</pub-id></citation></ref>
<ref id="B40">
<label>40.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pollock</surname> <given-names>E</given-names></name> <name><surname>Jazayeri</surname> <given-names>M</given-names></name></person-group>. <article-title>Engineering recurrent neural networks from task-relevant manifolds and dynamics</article-title>. <source>PloS Comput Biol</source>. (<year>2020</year>) <volume>16</volume>:<fpage>e1008128</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1008128</pub-id><pub-id pub-id-type="pmid">32785228</pub-id></citation></ref>
<ref id="B41">
<label>41.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Recanatesi</surname> <given-names>S</given-names></name> <name><surname>Farrell</surname> <given-names>M</given-names></name> <name><surname>Lajoie</surname> <given-names>G</given-names></name> <name><surname>Deneve</surname> <given-names>S</given-names></name> <name><surname>Rigotti</surname> <given-names>M</given-names></name> <name><surname>Shea-Brown</surname> <given-names>E</given-names></name></person-group>. <article-title>Predictive learning as a network mechanism for extracting low-dimensional latent space representations</article-title>. <source>Nat Commun</source>. (<year>2021</year>) <volume>12</volume>:<fpage>1</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1038/s41467-021-21696-1</pub-id><pub-id pub-id-type="pmid">33658520</pub-id></citation></ref>
<ref id="B42">
<label>42.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Schaeffer</surname> <given-names>R</given-names></name> <name><surname>Khona</surname> <given-names>M</given-names></name> <name><surname>Meshulam</surname> <given-names>L</given-names></name> <name><surname>International</surname> <given-names>BL</given-names></name> <name><surname>Fiete</surname> <given-names>I</given-names></name></person-group>. <article-title>Reverse-engineering recurrent neural network solutions to a hierarchical inference task for mice</article-title>. In: Larochelle H, Ranzato M, Hadsell R, Balcan MF, Lin H, editors. <source>Advances in Neural Information Processing Systems. vol. 33</source>. Curran Associates, Inc. (<year>2020</year>) p. <fpage>4584</fpage>&#x02013;<lpage>96</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper/2020/file/30f0641c041f03d94e95a76b9d8bd58f-Paper.pdf">https://proceedings.neurips.cc/paper/2020/file/30f0641c041f03d94e95a76b9d8bd58f-Paper.pdf</ext-link> (accessed March 3, 2023).</citation>
</ref>
<ref id="B43">
<label>43.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>GR</given-names></name> <name><surname>Joglekar</surname> <given-names>MR</given-names></name> <name><surname>Song</surname> <given-names>HF</given-names></name> <name><surname>Newsome</surname> <given-names>WT</given-names></name> <name><surname>Wang</surname> <given-names>XJ</given-names></name></person-group>. <article-title>Task representations in neural networks trained to perform many cognitive tasks</article-title>. <source>Nat Neurosci</source>. (<year>2019</year>) <volume>22</volume>:<fpage>297</fpage>&#x02013;<lpage>306</lpage>. <pub-id pub-id-type="doi">10.1038/s41593-018-0310-2</pub-id><pub-id pub-id-type="pmid">30643294</pub-id></citation></ref>
<ref id="B44">
<label>44.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Murata</surname> <given-names>S</given-names></name> <name><surname>Namikawa</surname> <given-names>J</given-names></name> <name><surname>Arie</surname> <given-names>H</given-names></name> <name><surname>Sugano</surname> <given-names>S</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Learning to reproduce fluctuating time series by inferring their time-dependent stochastic properties: application in robot learning via tutoring</article-title>. <source>IEEE Trans Auton Ment Dev</source>. (<year>2013</year>) <volume>5</volume>:<fpage>298</fpage>&#x02013;<lpage>310</lpage>. <pub-id pub-id-type="doi">10.1109/TAMD.2013.2258019</pub-id></citation>
</ref>
<ref id="B45">
<label>45.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Murata</surname> <given-names>S</given-names></name> <name><surname>Yamashita</surname> <given-names>Y</given-names></name> <name><surname>Arie</surname> <given-names>H</given-names></name> <name><surname>Ogata</surname> <given-names>T</given-names></name> <name><surname>Sugano</surname> <given-names>S</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Learning to perceive the world as probabilistic or deterministic via interaction with others: a neuro-robotics experiment</article-title>. <source>IEEE Trans Neural Netw Learn Syst</source>. (<year>2017</year>) <volume>28</volume>:<fpage>830</fpage>&#x02013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2015.2492140</pub-id><pub-id pub-id-type="pmid">26595928</pub-id></citation></ref>
<ref id="B46">
<label>46.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yamashita</surname> <given-names>Y</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Emergence of functional hierarchy in a multiple timescale neural network model: a humanoid robot experiment</article-title>. <source>PloS Comput Biol</source>. (<year>2008</year>) <volume>4</volume>:<fpage>e1000220</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1000220</pub-id><pub-id pub-id-type="pmid">18989398</pub-id></citation></ref>
<ref id="B47">
<label>47.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Asada</surname> <given-names>M</given-names></name> <name><surname>MacDorman</surname> <given-names>KF</given-names></name> <name><surname>Ishiguro</surname> <given-names>H</given-names></name> <name><surname>Kuniyoshi</surname> <given-names>Y</given-names></name></person-group>. <article-title>Cognitive developmental robotics as a new paradigm for the design of humanoid robots</article-title>. <source>Robot Auton Syst</source>. (<year>2001</year>) <volume>37</volume>:<fpage>185</fpage>&#x02013;<lpage>93</lpage>. <pub-id pub-id-type="doi">10.1016/S0921-8890(01)00157-9</pub-id></citation>
</ref>
<ref id="B48">
<label>48.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pfeifer</surname> <given-names>R</given-names></name> <name><surname>Lungarella</surname> <given-names>M</given-names></name> <name><surname>Iida</surname> <given-names>F</given-names></name></person-group>. <article-title>Self-organization, embodiment, and biologically inspired robotics</article-title>. <source>Science</source>. (<year>2007</year>) <volume>318</volume>:<fpage>1088</fpage>&#x02013;<lpage>93</lpage>. <pub-id pub-id-type="doi">10.1126/science.1145803</pub-id><pub-id pub-id-type="pmid">18006736</pub-id></citation></ref>
<ref id="B49">
<label>49.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tani</surname> <given-names>J</given-names></name> <name><surname>Ito</surname> <given-names>M</given-names></name></person-group>. <article-title>Self-organization of behavioral primitives as multiple attractor dynamics: a robot experiment</article-title>. <source>IEEE Trans Syst Man Cybern A Syst Hum</source>. (<year>2003</year>) <volume>33</volume>:<fpage>481</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/TSMCA.2003.809171</pub-id></citation>
</ref>
<ref id="B50">
<label>50.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Idei</surname> <given-names>H</given-names></name> <name><surname>Murata</surname> <given-names>S</given-names></name> <name><surname>Chen</surname> <given-names>Y</given-names></name> <name><surname>Yamashita</surname> <given-names>Y</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name> <name><surname>Ogata</surname> <given-names>T</given-names></name></person-group>. <article-title>A neurorobotics simulation of autistic behavior induced by unusual sensory precision</article-title>. <source>Comput Psychiatry</source>. (<year>2018</year>) <volume>2</volume>:<fpage>164</fpage>. <pub-id pub-id-type="doi">10.1162/CPSY_a_00019</pub-id><pub-id pub-id-type="pmid">30627669</pub-id></citation></ref>
<ref id="B51">
<label>51.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Idei</surname> <given-names>H</given-names></name> <name><surname>Murata</surname> <given-names>S</given-names></name> <name><surname>Yamashita</surname> <given-names>Y</given-names></name> <name><surname>Ogata</surname> <given-names>T</given-names></name></person-group>. <article-title>Homogeneous intrinsic neuronal excitability induces overfitting to sensory noise: a robot model of neurodevelopmental disorder</article-title>. <source>Front Psychiatry</source>. (<year>2020</year>) <volume>11</volume>:<fpage>762</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyt.2020.00762</pub-id><pub-id pub-id-type="pmid">32903328</pub-id></citation></ref>
<ref id="B52">
<label>52.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Idei</surname> <given-names>H</given-names></name> <name><surname>Murata</surname> <given-names>S</given-names></name> <name><surname>Yamashita</surname> <given-names>Y</given-names></name> <name><surname>Ogata</surname> <given-names>T</given-names></name></person-group>. <article-title>Paradoxical sensory reactivity induced by functional disconnection in a robot model of neurodevelopmental disorder</article-title>. <source>Neural Netw</source>. (<year>2021</year>) <volume>138</volume>:<fpage>150</fpage>&#x02013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.1016/j.neunet.2021.01.033</pub-id><pub-id pub-id-type="pmid">33652371</pub-id></citation></ref>
<ref id="B53">
<label>53.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Takahashi</surname> <given-names>Y</given-names></name> <name><surname>Murata</surname> <given-names>S</given-names></name> <name><surname>Idei</surname> <given-names>H</given-names></name> <name><surname>Tomita</surname> <given-names>H</given-names></name> <name><surname>Yamashita</surname> <given-names>Y</given-names></name></person-group>. <article-title>Neural network modeling of altered facial expression recognition in autism spectrum disorders based on predictive processing framework</article-title>. <source>Sci Rep</source>. (<year>2021</year>) <volume>11</volume>:<fpage>1</fpage>&#x02013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-94067-x</pub-id><pub-id pub-id-type="pmid">34465867</pub-id></citation></ref>
<ref id="B54">
<label>54.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yamashita</surname> <given-names>Y</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Spontaneous prediction error generation in schizophrenia</article-title>. <source>PLoS ONE</source>. (<year>2012</year>) <volume>7</volume>:<fpage>e37843</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0037843</pub-id><pub-id pub-id-type="pmid">22666398</pub-id></citation></ref>
<ref id="B55">
<label>55.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ahmadi</surname> <given-names>A</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>A novel predictive-coding-inspired variational RNN model for online prediction and recognition</article-title>. <source>Neural Comput</source>. (<year>2019</year>) <volume>31</volume>:<fpage>2025</fpage>&#x02013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1162/neco_a_01228</pub-id><pub-id pub-id-type="pmid">31525309</pub-id></citation></ref>
<ref id="B56">
<label>56.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Matsumoto</surname> <given-names>T</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Goal-Directed planning for habituated agents by active inference using a variational recurrent neural network</article-title>. <source>Entropy</source>. (<year>2020</year>) <volume>22</volume>:<fpage>564</fpage>. <pub-id pub-id-type="doi">10.3390/e22050564</pub-id><pub-id pub-id-type="pmid">33286336</pub-id></citation></ref>
<ref id="B57">
<label>57.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Idei</surname> <given-names>H</given-names></name> <name><surname>Ohata</surname> <given-names>W</given-names></name> <name><surname>Yamashita</surname> <given-names>Y</given-names></name> <name><surname>Ogata</surname> <given-names>T</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Emergence of sensory attenuation based upon the free-energy principle</article-title>. <source>Sci Rep</source>. (<year>2022</year>) <volume>12</volume>:<fpage>1</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-022-18207-7</pub-id><pub-id pub-id-type="pmid">36008463</pub-id></citation></ref>
<ref id="B58">
<label>58.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ohata</surname> <given-names>W</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Investigation of the sense of agency in social cognition, based on frameworks of predictive coding and active inference: a simulation study on multimodal imitative interaction</article-title>. <source>Front Neurorobot</source>. (<year>2020</year>) <volume>14</volume>:<fpage>61</fpage>. <pub-id pub-id-type="doi">10.3389/fnbot.2020.00061</pub-id><pub-id pub-id-type="pmid">33013346</pub-id></citation></ref>
<ref id="B59">
<label>59.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wirkuttis</surname> <given-names>N</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Leading or following? Dyadic robot imitative interaction using the active inference framework</article-title>. <source>IEEE Robot Autom Lett</source>. (<year>2021</year>) <volume>6</volume>:<fpage>6024</fpage>&#x02013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1109/LRA.2021.3090015</pub-id></citation>
</ref>
<ref id="B60">
<label>60.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wirkuttis</surname> <given-names>N</given-names></name> <name><surname>Ohata</surname> <given-names>W</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Turn-Taking mechanisms in imitative interaction: robotic social interaction based on the free energy principle</article-title>. <source>Entropy</source>. (<year>2023</year>) <volume>25</volume>:<fpage>263</fpage>. <pub-id pub-id-type="doi">10.3390/e25020263</pub-id><pub-id pub-id-type="pmid">36832633</pub-id></citation></ref>
<ref id="B61">
<label>61.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Landry</surname> <given-names>O</given-names></name> <name><surname>Al-Taie</surname> <given-names>S</given-names></name></person-group>. <article-title>A meta-analysis of the Wisconsin card sort task in autism</article-title>. <source>J Autism Dev Disord</source>. (<year>2016</year>) <volume>46</volume>:<fpage>1220</fpage>&#x02013;<lpage>35</lpage>. <pub-id pub-id-type="doi">10.1007/s10803-015-2659-3</pub-id><pub-id pub-id-type="pmid">26614085</pub-id></citation></ref>
<ref id="B62">
<label>62.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Uddin</surname> <given-names>LQ</given-names></name></person-group>. <article-title>Cognitive and behavioural flexibility: neural mechanisms and clinical considerations</article-title>. <source>Nat Rev Neurosci</source>. (<year>2021</year>) <volume>22</volume>:<fpage>167</fpage>&#x02013;<lpage>79</lpage>. <pub-id pub-id-type="doi">10.1038/s41583-021-00428-w</pub-id><pub-id pub-id-type="pmid">33536614</pub-id></citation></ref>
<ref id="B63">
<label>63.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Izawa</surname> <given-names>J</given-names></name> <name><surname>Pekny</surname> <given-names>SE</given-names></name> <name><surname>Marko</surname> <given-names>MK</given-names></name> <name><surname>Haswell</surname> <given-names>CC</given-names></name> <name><surname>Shadmehr</surname> <given-names>R</given-names></name> <name><surname>Mostofsky</surname> <given-names>SH</given-names></name></person-group>. <article-title>Motor learning relies on integrated sensory inputs in ADHD, but over-selectively on proprioception in autism spectrum conditions</article-title>. <source>Autism Res</source>. (<year>2012</year>) <volume>5</volume>:<fpage>124</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1002/aur.1222</pub-id><pub-id pub-id-type="pmid">22359275</pub-id></citation></ref>
<ref id="B64">
<label>64.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chung</surname> <given-names>J</given-names></name> <name><surname>Kastner</surname> <given-names>K</given-names></name> <name><surname>Dinh</surname> <given-names>L</given-names></name> <name><surname>Goel</surname> <given-names>K</given-names></name> <name><surname>Courville</surname> <given-names>AC</given-names></name> <name><surname>Bengio</surname> <given-names>Y</given-names></name></person-group>. <article-title>A recurrent latent variable model for sequential data</article-title>. In: <source>Advances in Neural Information Processing Systems, Vol. 28.</source> Montreal, QC (<year>2015</year>).</citation>
</ref>
<ref id="B65">
<label>65.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fuster</surname> <given-names>JM</given-names></name></person-group>. <article-title>The prefrontal cortex&#x02013;an update: time is of the essence</article-title>. <source>Neuron</source>. (<year>2001</year>) <volume>30</volume>:<fpage>319</fpage>&#x02013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.1016/S0896-6273(01)00285-9</pub-id><pub-id pub-id-type="pmid">11394996</pub-id></citation></ref>
<ref id="B66">
<label>66.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Murray</surname> <given-names>JD</given-names></name> <name><surname>Bernacchia</surname> <given-names>A</given-names></name> <name><surname>Freedman</surname> <given-names>DJ</given-names></name> <name><surname>Romo</surname> <given-names>R</given-names></name> <name><surname>Wallis</surname> <given-names>JD</given-names></name> <name><surname>Cai</surname> <given-names>X</given-names></name> <etal/></person-group>. <article-title>A hierarchy of intrinsic timescales across primate cortex</article-title>. <source>Nat Neurosci</source>. (<year>2014</year>) <volume>17</volume>:<fpage>1661</fpage>&#x02013;<lpage>3</lpage>. <pub-id pub-id-type="doi">10.1038/nn.3862</pub-id><pub-id pub-id-type="pmid">25383900</pub-id></citation></ref>
<ref id="B67">
<label>67.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bishop</surname> <given-names>CM</given-names></name> <name><surname>Nasrabadi</surname> <given-names>NM</given-names></name></person-group>. <source>Pattern Recognition and Machine Learning</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2006</year>).</citation>
</ref>
<ref id="B68">
<label>68.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>DP</given-names></name> <name><surname>Welling</surname> <given-names>M</given-names></name></person-group>. <article-title>Auto-encoding variational bayes</article-title>. <source>arXiv preprint arXiv:13126114</source>. (<year>2013</year>). <pub-id pub-id-type="doi">10.48550/arXiv.1312.6114</pub-id><pub-id pub-id-type="pmid">32176273</pub-id></citation></ref>
<ref id="B69">
<label>69.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rezende</surname> <given-names>DJ</given-names></name> <name><surname>Mohamed</surname> <given-names>S</given-names></name> <name><surname>Wierstra</surname> <given-names>D</given-names></name></person-group>. <article-title>Stochastic back propagation and approximate inference in deep generative models</article-title>. In: <source>International Conference on Machine Learning</source>. <publisher-loc>Beijing</publisher-loc> (<year>2014</year>). p. <fpage>1278</fpage>&#x02013;<lpage>86</lpage>.</citation>
</ref>
<ref id="B70">
<label>70.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>DP</given-names></name> <name><surname>Ba</surname> <given-names>J</given-names></name></person-group>. <article-title>Adam: a method for stochastic optimization</article-title>. <source>arXiv preprint arXiv:14126980</source>. (<year>2014</year>). <pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id></citation>
</ref>
<ref id="B71">
<label>71.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Davis</surname> <given-names>G</given-names></name> <name><surname>Plaisted-Grant</surname> <given-names>K</given-names></name></person-group>. <article-title>Low endogenous neural noise in autism</article-title>. <source>Autism</source>. (<year>2015</year>) <volume>19</volume>:<fpage>351</fpage>&#x02013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1177/1362361314552198</pub-id><pub-id pub-id-type="pmid">25662293</pub-id></citation></ref>
<ref id="B72">
<label>72.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simmons</surname> <given-names>DR</given-names></name> <name><surname>Robertson</surname> <given-names>AE</given-names></name> <name><surname>McKay</surname> <given-names>LS</given-names></name> <name><surname>Toal</surname> <given-names>E</given-names></name> <name><surname>McAleer</surname> <given-names>P</given-names></name> <name><surname>Pollick</surname> <given-names>FE</given-names></name></person-group>. <article-title>Vision in autism spectrum disorders</article-title>. <source>Vision Res</source>. (<year>2009</year>) <volume>49</volume>:<fpage>2705</fpage>&#x02013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.1016/j.visres.2009.08.005</pub-id><pub-id pub-id-type="pmid">19682485</pub-id></citation></ref>
<ref id="B73">
<label>73.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Easson</surname> <given-names>AK</given-names></name> <name><surname>McIntosh</surname> <given-names>AR</given-names></name></person-group>. <article-title>BOLD signal variability and complexity in children and adolescents with and without autism spectrum disorder</article-title>. <source>Dev Cogn Neurosci</source>. (<year>2019</year>) <volume>36</volume>:<fpage>100630</fpage>. <pub-id pub-id-type="doi">10.1016/j.dcn.2019.100630</pub-id><pub-id pub-id-type="pmid">30878549</pub-id></citation></ref>
<ref id="B74">
<label>74.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hecker</surname> <given-names>L</given-names></name> <name><surname>Wilson</surname> <given-names>M</given-names></name> <name><surname>Tebartz van Elst</surname> <given-names>L</given-names></name> <name><surname>Kornmeier</surname> <given-names>J</given-names></name></person-group>. <article-title>Altered EEG variability on different time scales in participants with autism spectrum disorder: an exploratory study</article-title>. <source>Sci Rep</source>. (<year>2022</year>) <volume>12</volume>:<fpage>1</fpage>&#x02013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-022-17304-x</pub-id><pub-id pub-id-type="pmid">35906301</pub-id></citation></ref>
<ref id="B75">
<label>75.</label>
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Paszke</surname> <given-names>A</given-names></name> <name><surname>Gross</surname> <given-names>S</given-names></name> <name><surname>Massa</surname> <given-names>F</given-names></name> <name><surname>Lerer</surname> <given-names>A</given-names></name> <name><surname>Bradbury</surname> <given-names>J</given-names></name> <name><surname>Chanan</surname> <given-names>G</given-names></name> <etal/></person-group>. <article-title>PyTorch: an imperative style, high-performance deep learning library</article-title>. In: Wallach H, Larochelle H, Beygelzimer A, d&#x00027; Alch&#x000E9;-Buc F, Fox E, Garnett R, editors. <source>Advances in Neural Information Processing Systems 32</source>. Curran Associates, Inc. (<year>2019</year>). p. <fpage>8024</fpage>&#x02013;<lpage>35</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://papers.neurips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf">http://papers.neurips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf</ext-link> (accessed March 3, 2023).</citation>
</ref>
<ref id="B76">
<label>76.</label>
<citation citation-type="web"><person-group person-group-type="author"><collab>R Core Team</collab></person-group>. <source>R: A Language Environment for Statistical Computing</source>. <publisher-loc>Vienna</publisher-loc> (<year>2020</year>). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/">https://www.R-project.org/</ext-link> (accessed March 3, 2023).</citation>
</ref>
<ref id="B77">
<label>77.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Higgins</surname> <given-names>I</given-names></name> <name><surname>Matthey</surname> <given-names>L</given-names></name> <name><surname>Pal</surname> <given-names>A</given-names></name> <name><surname>Burgess</surname> <given-names>C</given-names></name> <name><surname>Glorot</surname> <given-names>X</given-names></name> <name><surname>Botvinick</surname> <given-names>M</given-names></name> <etal/></person-group>. <article-title>Beta-vae: Learning basic visual concepts with a constrained variational framework</article-title>. In: <source>International Conference on Learning Representations</source>. San Juan (<year>2016</year>).</citation>
</ref>
<ref id="B78">
<label>78.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Z</given-names></name> <name><surname>Murkute</surname> <given-names>JV</given-names></name> <name><surname>Gyawali</surname> <given-names>PK</given-names></name> <name><surname>Wang</surname> <given-names>L</given-names></name></person-group>. <article-title>Progressive learning and disentanglement of hierarchical representations</article-title>. <source>arXiv preprint arXiv:200210549</source>. (<year>2020</year>). <pub-id pub-id-type="doi">10.48550/arXiv.2002.10549</pub-id></citation>
</ref>
<ref id="B79">
<label>79.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shorten</surname> <given-names>C</given-names></name> <name><surname>Khoshgoftaar</surname> <given-names>TM</given-names></name></person-group>. <article-title>A survey on image data augmentation for deep learning</article-title>. <source>J of Big Data</source>. (<year>2019</year>) <volume>6</volume>:<fpage>1</fpage>&#x02013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.1186/s40537-019-0197-0</pub-id></citation>
</ref>
<ref id="B80">
<label>80.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vincent</surname> <given-names>P</given-names></name> <name><surname>Larochelle</surname> <given-names>H</given-names></name> <name><surname>Bengio</surname> <given-names>Y</given-names></name> <name><surname>Manzagol</surname> <given-names>PA</given-names></name></person-group>. <article-title>Extracting and composing robust features with denoising autoencoders</article-title>. In: <source>Proceedings of the 25th International Conference on Machine Learning</source>. <publisher-loc>Helsinki</publisher-loc> (<year>2008</year>). p. <fpage>1096</fpage>&#x02013;<lpage>103</lpage>. <pub-id pub-id-type="doi">10.1145/1390156.1390294</pub-id></citation>
</ref>
<ref id="B81">
<label>81.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Armbruster-Gen&#x000E7;</surname> <given-names>DJ</given-names></name> <name><surname>Ueltzh&#x000F6;ffer</surname> <given-names>K</given-names></name> <name><surname>Fiebach</surname> <given-names>CJ</given-names></name></person-group>. <article-title>Brain signal variability differentially affects cognitive flexibility and cognitive stability</article-title>. <source>J Neurosci</source>. (<year>2016</year>) <volume>36</volume>:<fpage>3978</fpage>&#x02013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.1523/JNEUROSCI.2517-14.2016</pub-id><pub-id pub-id-type="pmid">27053205</pub-id></citation></ref>
<ref id="B82">
<label>82.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McIntosh</surname> <given-names>AR</given-names></name> <name><surname>Kovacevic</surname> <given-names>N</given-names></name> <name><surname>Itier</surname> <given-names>RJ</given-names></name></person-group>. <article-title>Increased brain signal variability accompanies lower behavioral variability in development</article-title>. <source>PloS Comput Biol</source>. (<year>2008</year>) <volume>4</volume>:<fpage>e1000106</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1000106</pub-id><pub-id pub-id-type="pmid">18604265</pub-id></citation></ref>
<ref id="B83">
<label>83.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Benrimoh</surname> <given-names>D</given-names></name> <name><surname>Sheldon</surname> <given-names>A</given-names></name> <name><surname>Sibarium</surname> <given-names>E</given-names></name> <name><surname>Powers</surname> <given-names>AR</given-names></name></person-group>. <article-title>Computational mechanism for the effect of psychosis community treatment: a conceptual review from neurobiology to social interaction</article-title>. <source>Front Psychiatry</source>. (<year>2021</year>) <volume>12</volume>:<fpage>685390</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyt.2021.685390</pub-id><pub-id pub-id-type="pmid">34385938</pub-id></citation></ref>
<ref id="B84">
<label>84.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Craske</surname> <given-names>MG</given-names></name> <name><surname>Treanor</surname> <given-names>M</given-names></name> <name><surname>Conway</surname> <given-names>CC</given-names></name> <name><surname>Zbozinek</surname> <given-names>T</given-names></name> <name><surname>Vervliet</surname> <given-names>B</given-names></name></person-group>. <article-title>Maximizing exposure therapy: an inhibitory learning approach</article-title>. <source>Behav Res Ther</source>. (<year>2014</year>) <volume>58</volume>:<fpage>10</fpage>&#x02013;<lpage>23</lpage>. <pub-id pub-id-type="doi">10.1016/j.brat.2014.04.006</pub-id><pub-id pub-id-type="pmid">24864005</pub-id></citation></ref>
<ref id="B85">
<label>85.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>S&#x000F8;nderby</surname> <given-names>CK</given-names></name> <name><surname>Raiko</surname> <given-names>T</given-names></name> <name><surname>Maal&#x000F8;e</surname> <given-names>L</given-names></name> <name><surname>S&#x000F8;nderby</surname> <given-names>SK</given-names></name> <name><surname>Winther</surname> <given-names>O</given-names></name></person-group>. <article-title>How to train deep variational autoencoders and probabilistic ladder networks</article-title>. <source>arXiv [Preprint]</source>. (<year>2016</year>). arXiv: 1602.02282.</citation>
</ref>
<ref id="B86">
<label>86.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Salakhutdinov</surname> <given-names>R</given-names></name></person-group>. <article-title>Learning deep generative models</article-title>. <source>Ann Rev Stat Its Appl</source>. (<year>2015</year>) <volume>2</volume>:<fpage>361</fpage>&#x02013;<lpage>85</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-statistics-010814-020120</pub-id></citation>
</ref>
<ref id="B87">
<label>87.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cicchetti</surname> <given-names>D</given-names></name> <name><surname>Rogosch</surname> <given-names>FA</given-names></name></person-group>. <article-title>Equifinality and multifinality in developmental psychopathology</article-title>. <source>Dev Psychopathol</source>. (<year>1996</year>) <volume>8</volume>:<fpage>597</fpage>&#x02013;<lpage>600</lpage>. <pub-id pub-id-type="doi">10.1017/S0954579400007318</pub-id></citation>
</ref>
<ref id="B88">
<label>88.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bakouie</surname> <given-names>F</given-names></name> <name><surname>Zendehrouh</surname> <given-names>S</given-names></name> <name><surname>Gharibzadeh</surname> <given-names>S</given-names></name></person-group>. <article-title>Does a kind of over-fitting occur in the brain of autistic patients?</article-title> <source>J Neuropsychiatry Clin Neurosci</source>. (<year>2009</year>) <volume>21</volume>:<fpage>343</fpage>. <pub-id pub-id-type="doi">10.1176/jnp.2009.21.3.343</pub-id><pub-id pub-id-type="pmid">19776319</pub-id></citation></ref>
<ref id="B89">
<label>89.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Adams</surname> <given-names>RA</given-names></name> <name><surname>Stephan</surname> <given-names>KE</given-names></name> <name><surname>Brown</surname> <given-names>HR</given-names></name> <name><surname>Frith</surname> <given-names>CD</given-names></name> <name><surname>Friston</surname> <given-names>KJ</given-names></name></person-group>. <article-title>The computational anatomy of psychosis</article-title>. <source>Front Psychiatry</source>. (<year>2013</year>) <volume>4</volume>:<fpage>47</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyt.2013.00047</pub-id><pub-id pub-id-type="pmid">23750138</pub-id></citation></ref>
<ref id="B90">
<label>90.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Feczko</surname> <given-names>E</given-names></name> <name><surname>Fair</surname> <given-names>DA</given-names></name></person-group>. <article-title>Methods and challenges for assessing heterogeneity</article-title>. <source>Biol Psychiatry</source>. (<year>2020</year>) <volume>88</volume>:<fpage>9</fpage>&#x02013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1016/j.biopsych.2020.02.015</pub-id><pub-id pub-id-type="pmid">32386742</pub-id></citation></ref>
<ref id="B91">
<label>91.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hwang</surname> <given-names>J</given-names></name> <name><surname>Kim</surname> <given-names>J</given-names></name> <name><surname>Ahmadi</surname> <given-names>A</given-names></name> <name><surname>Choi</surname> <given-names>M</given-names></name> <name><surname>Tani</surname> <given-names>J</given-names></name></person-group>. <article-title>Predictive coding-based deep dynamic neural network for visuomotor learning</article-title>. In: <source>2017 Joint IEEE International Conference on Development and Learning and Epigenetic Robotics (ICDL-EpiRob)</source>. Rovisco Pais: IEEE (<year>2017</year>). p. <fpage>132</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1109/DEVLRN.2017.8329798</pub-id></citation>
</ref>
<ref id="B92">
<label>92.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Locatello</surname> <given-names>F</given-names></name> <name><surname>Bauer</surname> <given-names>S</given-names></name> <name><surname>Lucic</surname> <given-names>M</given-names></name> <name><surname>Raetsch</surname> <given-names>G</given-names></name> <name><surname>Gelly</surname> <given-names>S</given-names></name> <name><surname>Scholkopf</surname> <given-names>B</given-names></name> <etal/></person-group>. <article-title>Challenging common assumptions in the unsupervised learning of disentangled representations</article-title>. In: <source>International Conference on Machine Learning.</source> Long Beach, CA (<year>2019</year>). p. <fpage>4114</fpage>&#x02013;<lpage>24</lpage>.</citation>
</ref>
</ref-list> 
</back>
</article>