<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="review-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Neural Circuits</journal-id>
<journal-title-group>
<journal-title>Frontiers in Neural Circuits</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Neural Circuits</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1662-5110</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fncir.2026.1781811</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Mini Review</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Social learning and exploration&#x2013;exploitation dilemma in decision-making</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Morishita</surname>
<given-names>Gota</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3339938"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Suzuki</surname>
<given-names>Shinsuke</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/78389"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Centre for Brain, Mind and Markets, The University of Melbourne</institution>, <city>Parkville</city>, <state>VIC</state>, <country country="au">Australia</country></aff>
<aff id="aff2"><label>2</label><institution>Faculty of Social Data Science, Hitotsubashi University</institution>, <city>Kunitachi</city>, <country country="jp">Japan</country></aff>
<aff id="aff3"><label>3</label><institution>Brain Research Center, Hitotsubashi Institute for Advanced Study, Hitotsubashi University</institution>, <city>Kunitachi</city>, <country country="jp">Japan</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Gota Morishita, <email xlink:href="mailto:gota.morishita@gmail.com">gota.morishita@gmail.com</email>; Shinsuke Suzuki, <email xlink:href="mailto:shinsuke.szk@gmail.com">shinsuke.szk@gmail.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-02">
<day>02</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>20</volume>
<elocation-id>1781811</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>20</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Morishita and Suzuki.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Morishita and Suzuki</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-02">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>This mini review examines the neurocomputational principles of social learning through the lens of the exploration&#x2013;exploitation dilemma. While the neural mechanisms of learning from others&#x2014;mediated by distinct signals in the ventromedial and lateral prefrontal cortices&#x2014;are well established, less is known about how these mechanisms interact with the fundamental trade-off between gathering information (&#x201C;exploration&#x201D;) and maximizing rewards (&#x201C;exploitation&#x201D;). We discuss how social environments shape this trade-off, leading to strategic behaviors such as informational free-riding or conformity. A central focus of this review is the issue of source selection: how agents decide whom to observe. We present recent evidence suggesting that, contrary to the predictions of optimal information-seeking theories, humans often exhibit a &#x201C;reliability-seeking&#x201D; bias, preferring to learn from consistent, exploitation-oriented partners rather than highly exploratory ones. We conclude by discussing the limitations of current paradigms, specifically the inherent confounding of social cues such as competence and predictability, and outline a computational framework for isolating the specific drivers of adaptive social decision-making.</p>
</abstract>
<kwd-group>
<kwd>computational model</kwd>
<kwd>decision-making</kwd>
<kwd>imitation</kwd>
<kwd>observational learning</kwd>
<kwd>reinforcement learning</kwd>
<kwd>reward</kwd>
<kwd>social cognition</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by JSPS KAKENHI Grant Numbers JP22K21357 (SS).</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="57"/>
<page-count count="6"/>
<word-count count="5157"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<title>Introduction</title>
<p>Adaptive decision-making to attain rewards is fundamental for animal survival. The reinforcement learning framework posits that individuals can acquire optimal behaviors that maximize cumulative rewards through trial and error (<xref ref-type="bibr" rid="ref44">Sutton and Barto, 1998</xref>). This learning is formalized as updating the value of a choice option based on the reward prediction error, defined as the discrepancy between the obtained reward and the current value of the chosen option. Furthermore, the reward prediction error is known to be encoded by dopaminergic neural activity (<xref ref-type="bibr" rid="ref40">Schultz et al., 1997</xref>; <xref ref-type="bibr" rid="ref16">Glimcher, 2011</xref>). In humans, the error signal is correlated with the blood-oxygenation-level-dependent (BOLD) signals in the ventral striatum (<xref ref-type="fig" rid="fig1">Figure 1a</xref>), a major projection site of dopamine (<xref ref-type="bibr" rid="ref32">O&#x2019;Doherty et al., 2004</xref>; <xref ref-type="bibr" rid="ref38">Rutledge et al., 2010</xref>). This framework is highly influential, as it successfully captures both behavior and neural activity of animals, including humans (<xref ref-type="bibr" rid="ref35">Rangel et al., 2008</xref>).</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Brain regions implemented in individual and social learning. <bold>(a)</bold> Ventral striatum. <bold>(b)</bold> Lateral prefrontal cortex (lPFC). <bold>(c)</bold> Ventromedial prefrontal cortex (vmPFC).</p>
</caption>
<graphic xlink:href="fncir-20-1781811-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three coronal brain MRI slices labeled a, b, and c show different anatomical regions marked with orange circles, highlighting bilateral or central brain areas at specified y-coordinates: Y equals ten for a and b, Y equals forty for c.</alt-text>
</graphic>
</fig>
<p>Individuals can learn not only from direct experience but also from the experiences of others&#x2014;a process known as social learning (<xref ref-type="bibr" rid="ref33">Olsson et al., 2020</xref>; <xref ref-type="bibr" rid="ref13">Gari&#x00E9;py et al., 2014</xref>; <xref ref-type="bibr" rid="ref2">Behrens et al., 2009</xref>). For example, Ad&#x00E9;lie penguins utilize social learning to assess predation risk before foraging (<xref ref-type="bibr" rid="ref8">Chamley, 2004</xref>). Rather than diving immediately, the group waits to observe the outcome of the first penguin&#x2019;s entry. If no predator appears, the others follow, inferring that the environment is safe. Analogously, humans rely on social learning in value-based decision-making. When selecting a restaurant, for instance, individuals often consult online reviews or peer recommendations. This leverages others&#x2019; experiences to identify high-quality ones while mitigating the risk of a poor outcome. Thus, social learning supports adaptive decision-making by enabling individuals to acquire optimal behaviors without relying solely on direct trial and error.</p>
<p>In this mini review, we discuss recent findings on the computational principles of social reinforcement learning. In particular, we focus on two under-explored issues: from whom individuals choose to learn, and how they adjust their social learning strategies to navigate the &#x201C;exploration&#x2013;exploitation dilemma,&#x201D; a key computational challenge in reinforcement learning. We believe that this review offers a new perspective on the social learning literature by re-examining it through the lens of this fundamental reinforcement learning challenge.</p>
<sec id="sec2">
<title>Exploration&#x2013;exploitation dilemma in individual learning</title>
<p>Before addressing social learning, we first characterize the exploration&#x2013;exploitation dilemma in individual contexts (<xref ref-type="bibr" rid="ref41">Schulz and Gershman, 2019</xref>; <xref ref-type="bibr" rid="ref52">Wilson et al., 2021</xref>; <xref ref-type="bibr" rid="ref18">Hills et al., 2015</xref>). This dilemma is best illustrated by the &#x201C;restaurant problem.&#x201D; Consider moving to a new city and searching for good dining options. After a period of trial and error, you identify a favorite. At that point, you face a choice: return to your favorite place (&#x201C;exploitation&#x201D;) or continue searching for potentially better options (&#x201C;exploration&#x201D;). If you explore too much, you forego the guaranteed pleasure of your favorite spot; if you exploit too much, you risk missing out on a superior experience. Due to this inherent trade-off, the problem is non-trivial. While computer science has proposed various algorithms to address it (<xref ref-type="bibr" rid="ref23">Lattimore and Szepesv&#x00E1;ri, 2020</xref>), no single definitive solution exists for all environments.</p>
<p>A simple algorithm is called &#x201C;random exploration.&#x201D; In this algorithm, exploration is implemented as stochasticity in decision-making. That is, an agent sometimes chooses the option with a lower estimated value with a certain probability, which is exploratory behavior. A more sophisticated version is called &#x201C;directed exploration.&#x201D; In this algorithm, an agent takes into account the uncertainty of value estimation for each of the available options, often defined by the Bayesian precision or approximated by the number of times the agent sampled the corresponding option. That is, the agent is more likely to choose an uncertain (unfamiliar) option. Consider the case of decision-making with two alternative options. If the agent exhibits no exploration, her choice is guided by the value difference without noise (<xref ref-type="fig" rid="fig2">Figure 2a</xref>). If she employs random exploration to some degree, she sometimes chooses the option with lower value (<xref ref-type="fig" rid="fig2">Figure 2b</xref>). In directed exploration, her choice is guided not only by the value difference but also by the uncertainty of value estimation (<xref ref-type="fig" rid="fig2">Figure 2c</xref>): the uncertain option is more likely to be chosen.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Decision-making with different exploration strategies. <bold>(a)</bold> No exploration. The probability of choosing the uncertain option over the certain option is plotted as a function of the value difference. The agent always chooses the option with the higher estimated value (i.e., deterministic). <bold>(b)</bold> Random exploration. The agent generally favors the option with the higher value, but with added stochasticity (noise). Consequently, the lower-value option is occasionally chosen by chance. The format is the same as <bold>(a)</bold>. <bold>(c)</bold> Directed exploration. The agent considers the uncertainty of the value estimation. The agent is more likely to choose the uncertain option, even if its expected value is lower, to gain information. The format is the same as <bold>(a)</bold>.</p>
</caption>
<graphic xlink:href="fncir-20-1781811-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three-panel figure compares decision functions for choosing an uncertain option based on value difference. Panel a shows a step function at zero. Panel b displays a sigmoid curve crossing at zero. Panel c presents a step function at negative zero point five. All graphs plot probability of choosing uncertain option on the y-axis and value difference on the x-axis.</alt-text>
</graphic>
</fig>
<p>In neuroscience and psychology, a growing body of work has investigated how humans resolve the exploration&#x2013;exploitation dilemma. Behavioral studies indicate that people deploy at least two dissociable strategies (<xref ref-type="bibr" rid="ref15">Gershman, 2018</xref>; <xref ref-type="bibr" rid="ref53">Wilson et al., 2014</xref>): random exploration, which increases choice stochasticity to sample alternatives, and directed exploration, which preferentially targets informative options (e.g., those with higher uncertainty). Developmental evidence suggests that these two forms of exploration follow distinct developmental trajectories, implying separable underlying mechanisms (<xref ref-type="bibr" rid="ref42">Somerville et al., 2017</xref>). Computational modeling of behavior further demonstrates that people adjust the extent of the two types of exploration depending on uncertainty in their value estimates, in a manner consistent with Thompson sampling and the Upper Confidence Bound (UCB) algorithm (<xref ref-type="bibr" rid="ref15">Gershman, 2018</xref>). Moreover, neuroimaging and brain stimulation studies have linked directed exploration to neural computations in the ventrolateral prefrontal cortex (vlPFC), whereas random exploration has been associated with the dorsolateral prefrontal cortex (dlPFC) (<xref ref-type="bibr" rid="ref49">Tomov et al., 2020</xref>; <xref ref-type="bibr" rid="ref57">Zajkowski et al., 2017</xref>; <xref ref-type="bibr" rid="ref1">Badre et al., 2012</xref>). Together, these findings support a neurocomputational account in which multiple exploration systems&#x2014;implemented in distinct prefrontal circuits&#x2014;jointly support adaptive decision-making.</p>
</sec>
<sec id="sec3">
<title>Neurocomputational mechanisms of human social learning</title>
<p>A large body of research has examined the neurocomputational mechanisms underlying human social learning by combining functional magnetic resonance imaging (fMRI) with computational modeling (<xref ref-type="bibr" rid="ref10">Charpentier and O&#x2019;Doherty, 2018</xref>; <xref ref-type="bibr" rid="ref7">Burke et al., 2010</xref>). These studies have demonstrated that social learning is not a unitary process but relies on multiple complementary strategies implemented in distinct subregions of the prefrontal cortex (PFC) (<xref ref-type="bibr" rid="ref48">Suzuki and O&#x2019;Doherty, 2020</xref>).</p>
<p>The first strategy is learning from others&#x2019; rewards (often termed &#x201C;emulation&#x201D; of value). In this process, observers update their own value estimates of options by monitoring the outcomes received by others. This learning is driven by observational reward prediction errors&#x2014;the discrepancy between the observed reward and the observer&#x2019;s expectation. Crucially, neuroimaging evidence consistently localizes this signal to the ventromedial prefrontal cortex (vmPFC) (<xref ref-type="fig" rid="fig1">Figure 1c</xref>) (<xref ref-type="bibr" rid="ref7">Burke et al., 2010</xref>; <xref ref-type="bibr" rid="ref46">Suzuki et al., 2012</xref>), a region central to personal value processing. A meta-analysis has further confirmed that the vmPFC encodes reward prediction errors regardless of whether the recipient is the self or another, suggesting a shared neural representation for value updating (<xref ref-type="bibr" rid="ref26">Morelli et al., 2015</xref>).</p>
<p>The second strategy is learning from others&#x2019; actions (&#x201C;imitation&#x201D;). Unlike emulation, this process focuses on predicting the partner&#x2019;s behavior itself, independent of the immediate outcome. This form of learning is driven by action prediction errors&#x2014;the discrepancy between a partner&#x2019;s actual choice and the observer&#x2019;s prediction of that choice. Such signals are typically associated with activity in the lateral PFC (lPFC; <xref ref-type="fig" rid="fig1">Figure 1b</xref>), a region implicated in inferring others&#x2019; intentions or hidden states (<xref ref-type="bibr" rid="ref7">Burke et al., 2010</xref>; <xref ref-type="bibr" rid="ref46">Suzuki et al., 2012</xref>). Collectively, these findings suggest that these two distinct strategies, emulation and imitation, shape social learning.</p>
<p>Findings on the striatal roles in social learning to date are mixed. Some studies have reported significant coupling between observational reward prediction errors and neural activity in the dorsal and ventral striatum (<xref ref-type="bibr" rid="ref11">Cooper et al., 2012</xref>; <xref ref-type="bibr" rid="ref7">Burke et al., 2010</xref>), consistent with the striatum as a core neural locus of emulation. However, a meta-analysis did not implicate the striatum in encoding observational prediction errors (<xref ref-type="bibr" rid="ref26">Morelli et al., 2015</xref>).</p>
</sec>
<sec id="sec4">
<title>Modulation of social learning strategies</title>
<p>In social learning settings, individuals can free-ride on others&#x2019; exploration. By observing the outcomes of others&#x2019; choices, people can acquire novel information about unfamiliar options without sampling those options themselves. Crucially, learning from others&#x2019; exploration allows individuals to avoid the direct costs of exploration (i.e., foregoing the immediate benefits of exploiting the currently best-known option). Theoretical studies in economics have shown that such informational externalities can generate a free-rider problem when extending the individual reinforcement learning framework (i.e., the two-armed bandit task) to a multi-player setting (<xref ref-type="bibr" rid="ref4">Bolton and Harris, 1999</xref>; <xref ref-type="bibr" rid="ref21">Keller et al., 2005</xref>; <xref ref-type="bibr" rid="ref37">Rogers, 1988</xref>; <xref ref-type="bibr" rid="ref43">Suganuma et al., 2025</xref>). Specifically, when information produced by exploration is non-excludable, rational agents may strategically reduce their own exploration, relying instead on others to bear the cost. Consequently, the aggregate level of exploration falls below the socially optimal level. These mathematical analyses demonstrate that social learning does not necessarily yield socially ideal outcomes and can sometimes lead to a stagnation of exploration.</p>
<p>Empirical evidence regarding these predictions, however, remains mixed. Several studies utilizing a multi-player reinforcement learning task have observed a reduction in exploration (<xref ref-type="bibr" rid="ref50">Toyokawa et al., 2014</xref>; <xref ref-type="bibr" rid="ref54">Witt et al., 2024</xref>). In this set of experiments, participants exhibited lower levels of random and directed exploration in group contexts compared to individual contexts&#x2014;a pattern consistent with strategic free-riding, even while collective performance improved. In contrast, a recent study reported social conformity in exploration (<xref ref-type="bibr" rid="ref12">Danwitz and von Helversen, 2025</xref>). In their experiment, participants performed a task alongside agents exhibiting varying degrees of directed exploration. The results showed that exposure to highly exploratory others led participants to increase their own random and directed exploration. This suggests that social information can promote, rather than suppress, exploratory behavior. Together, these findings highlight that social learning can either attenuate or amplify exploration depending on the task structure and the observed behavior.</p>
<p>Beyond contexts focused specifically on exploration, a substantial literature has examined how people adapt their social learning strategies based on the characteristics of others (<xref ref-type="bibr" rid="ref30">Najar et al., 2020</xref>; <xref ref-type="bibr" rid="ref20">Kang et al., 2024</xref>; <xref ref-type="bibr" rid="ref19">Kang et al., 2021</xref>). For example, one study tested whether individuals modulate the degree of imitation&#x2014;learning from others&#x2019; actions&#x2014;depending on the quality of the social source. Computational modeling indicated that imitation is selectively upregulated when learning from high-performing others, consistent with the idea that observers weight social information by its inferred reliability (<xref ref-type="bibr" rid="ref30">Najar et al., 2020</xref>). Additionally, a study combining behavioral modeling with continuous theta-burst stimulation (cTBS) probed the neural mechanisms governing when imitation is deployed. Results suggest that imitation is prioritized when others&#x2019; actions are predictable, and that this predictability-dependent reliance is causally regulated by the dorsomedial prefrontal cortex (dmPFC) (<xref ref-type="bibr" rid="ref20">Kang et al., 2024</xref>). Social learning is also shaped by group membership; for instance, imitation typically increases for in-group relative to out-group members (<xref ref-type="bibr" rid="ref19">Kang et al., 2021</xref>). Notably, individual differences in this bias are captured by neural learning signals: the differential weighting of in-group information correlates with action prediction error encoding in the lateral prefrontal cortex (lPFC), suggesting that lPFC computations support selective updating based on social identity (<xref ref-type="bibr" rid="ref19">Kang et al., 2021</xref>). An open question is whether such social learning strategies rely on social-specific or domain-general mechanisms (<xref ref-type="bibr" rid="ref17">Heyes and Pearce, 2015</xref>). Although a recent behavioral study suggests that social learning strategies are updated via domain-general associative learning (<xref ref-type="bibr" rid="ref39">Schultner et al., 2025</xref>), further evidence is needed to draw a definitive conclusion and to clarify the underlying neural mechanisms.</p>
<p>Recent work has explored how the brain arbitrates between imitation and emulation. For instance, the ventral PFC has been shown to dynamically control the weights assigned to imitation versus higher-order emulation of others&#x2019; goals on a trial-by-trial basis, prioritizing the strategy that offers greater predictive reliability (<xref ref-type="bibr" rid="ref9">Charpentier et al., 2020</xref>). A subsequent study further revealed that individual differences in reliance on higher-order emulation were associated with autism-like traits in the general population (<xref ref-type="bibr" rid="ref56">Wu et al., 2024</xref>).</p>
<p>Another critical factor modulating social learning strategies is the observer&#x2019;s own decision confidence. From a Bayesian perspective, optimal information integration requires weighing sources according to their reliability. Consistent with this, a breadth of experimental work demonstrates that individuals rely more heavily on social information when their own estimation of the environment is uncertain&#x2014;a strategy often referred to as &#x201C;copy-when-uncertain&#x201D; (<xref ref-type="bibr" rid="ref27">Morgan et al., 2012</xref>). Specifically, when subjective confidence in one&#x2019;s own choice is low, the weight assigned to social signals increases, effectively acting as a compensatory mechanism (<xref ref-type="bibr" rid="ref34">Pescetelli et al., 2021</xref>; <xref ref-type="bibr" rid="ref12">Danwitz and von Helversen, 2025</xref>; <xref ref-type="bibr" rid="ref27">Morgan et al., 2012</xref>; <xref ref-type="bibr" rid="ref51">Toyokawa et al., 2017</xref>). However, this uncertainty-dependent modulation is not uniform across the population; substantial individual differences exist in how strictly agents adhere to this optimal weighting, with some individuals exhibiting persistent egocentric biases regardless of their own uncertainty (<xref ref-type="bibr" rid="ref12">Danwitz and von Helversen, 2025</xref>; <xref ref-type="bibr" rid="ref51">Toyokawa et al., 2017</xref>; <xref ref-type="bibr" rid="ref28">Morin et al., 2021</xref>).</p>
</sec>
<sec id="sec5">
<title>Partner selection in social learning</title>
<p>A relatively underexplored issue in social learning is partner selection. In typical laboratory experiments, participants are assigned a fixed partner and learn from the partner&#x2019;s experiences; they rarely have the opportunity to choose whom to observe. In real-world settings, however, individuals actively select their information sources. For example, when choosing a restaurant based on social media, one must decide whose opinions to trust. Such selection decisions can fundamentally shape the efficacy of social learning.</p>
<p>Our recent study examined whom people prefer to learn from, framing the question in terms of the exploration&#x2013;exploitation dilemma (<xref ref-type="bibr" rid="ref29">Morishita et al., 2025</xref>). Using a behavioral experiment combined with computational modeling, we tested two competing hypotheses. The first hypothesis posits that individuals preferentially learn from partners exhibiting a higher degree of random exploration. This strategy would be advantageous because the partner&#x2019;s exploration generates new information, allowing the learner to continue exploiting currently favorable options. The second hypothesis posits that individuals preferentially learn from partners with a lower degree of random exploration. This strategy would be advantageous when learners primarily rely on imitation (i.e., learning from others&#x2019; actions), as less exploratory partners behave more consistently and may therefore appear more successful and reliable.</p>
<p>The preregistered experiment supported the reliability-seeking hypothesis: participants exhibited a significant preference for learning from less exploratory partners over highly exploratory ones. Furthermore, subsequent computational analyses revealed that individual differences in this partner preference were linked to specific social learning styles. Participants who preferred less exploratory partners relied primarily on imitation (learning from others&#x2019; actions), whereas those who preferred highly exploratory partners relied more on emulation (learning from others&#x2019; rewards). These findings suggest that while there is a general bias toward stable, reliable partners, this preference is modulated by the observer&#x2019;s underlying learning strategy: imitators seek consistency, while emulators seek information.</p>
<p>Research indicates that partner selection in social learning is governed by multiple factors. First, people preferentially learn from successful individuals. For example, in an artifact-design task, participants imitated peers who achieved higher payoffs, consistent with a &#x201C;success-biased&#x201D; strategy (<xref ref-type="bibr" rid="ref25">Mesoudi, 2011</xref>). Related work suggests that learners also copy &#x201C;prestigious&#x201D; individuals&#x2014;defined as those who have been frequently copied by others in the past (<xref ref-type="bibr" rid="ref6">Brand et al., 2020</xref>)&#x2014;indicating that social influence is amplified by reputational cues beyond objective performance. Analogously, there is a robust tendency toward social conformity: following the majority. Across diverse paradigms&#x2014;ranging from perceptual and value-based decision-making (<xref ref-type="bibr" rid="ref45">Suzuki et al., 2015</xref>, <xref ref-type="bibr" rid="ref47">2016</xref>; <xref ref-type="bibr" rid="ref14">Garvert et al., 2015</xref>; <xref ref-type="bibr" rid="ref36">Reiter et al., 2019</xref>) to reinforcement learning (<xref ref-type="bibr" rid="ref7">Burke et al., 2010</xref>; <xref ref-type="bibr" rid="ref9">Charpentier et al., 2020</xref>)&#x2014;individuals systematically shift their choices toward group norms. Similar effects appear in preferential domains like food evaluation and face attractiveness (<xref ref-type="bibr" rid="ref22">Klucharev et al., 2009</xref>; <xref ref-type="bibr" rid="ref31">Nook and Zaki, 2015</xref>; <xref ref-type="bibr" rid="ref24">Levorsen et al., 2021</xref>). Together, these findings demonstrate that partner selection is shaped by multiple sources of influence: demonstrated success, socially conferred prestige, and majority norms.</p>
<p>A promising future direction is to investigate the neurocomputational mechanisms underlying reliability-seeking biases in partner selection&#x2014;preferences to learn from partners who are competitive, predictable, successful, and/or in the majority. Prior studies using a range of decision-making tasks in social contexts have implicated the medial prefrontal cortex (mPFC) and temporoparietal junction (TPJ) in tracking others&#x2019; expertise (<xref ref-type="bibr" rid="ref55">Wittmann et al., 2016</xref>; <xref ref-type="bibr" rid="ref5">Boorman et al., 2013</xref>), fidelity (<xref ref-type="bibr" rid="ref3">Behrens et al., 2008</xref>), and the majority&#x2019;s choice (<xref ref-type="bibr" rid="ref45">Suzuki et al., 2015</xref>). Furthermore, predictability-dependent social learning has been shown to be causally regulated by the dmPFC (<xref ref-type="bibr" rid="ref20">Kang et al., 2024</xref>). These results suggest that a network including the mPFC and TPJ may govern reliability-seeking preferences.</p>
</sec>
</sec>
<sec sec-type="discussion" id="sec6">
<title>Discussion</title>
<p>In this mini review, we summarize recent advances in the neurocomputational principles of social learning. In particular, we discussed how social learning strategies&#x2014;including the critical decision of from whom to learn&#x2014;are modulated by partner characteristics such as the exploration&#x2013;exploitation balance, decision quality, predictability, group membership, and social status. However, a significant open question remains: which specific partner attributes drive these strategic adjustments? In naturalistic settings, these characteristics are often empirically intertwined. For example, a lower level of random exploration (i.e., reduced noise) typically correlates with higher decision quality, greater predictability, and often prestige or majority status. Similarly, in-group membership frequently covaries with predictability, as agents possess richer prior knowledge about their own group&#x2019;s norms. Consequently, it is difficult to determine whether observed biases reflect sensitivity to competence, predictability, social identity, or a combination thereof, while a supplementary analysis in our study suggests that competence contributes more than predictability (<xref ref-type="bibr" rid="ref29">Morishita et al., 2025</xref>). Furthermore, few studies in the social learning literature to date have carefully distinguished between random and directed exploration (but see <xref ref-type="bibr" rid="ref12">Danwitz and von Helversen, 2025</xref>). Future work should therefore rigorously test the primary contribution of competence and/or disentangle these factors&#x2014;for example, by orthogonalizing competence, predictability, and status in experimental designs&#x2014;and develop computational models that separately parameterize beliefs about a partner&#x2019;s reliability versus their informational value. Such precision is essential to clarify how social learning is adaptively tuned and which neural computations implement these adjustments.</p>
</sec>
</body>
<back>
<sec sec-type="author-contributions" id="sec7">
<title>Author contributions</title>
<p>GM: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft, Investigation, Conceptualization. SS: Writing &#x2013; original draft, Conceptualization, Investigation, Funding acquisition, Supervision, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="sec8">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author SS declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="ai-statement" id="sec9">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was used in the creation of this manuscript. Generative AI was used to assist with language proofreading.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Badre</surname><given-names>D.</given-names></name> <name><surname>Doll</surname><given-names>B. B.</given-names></name> <name><surname>Long</surname><given-names>N. M.</given-names></name> <name><surname>Frank</surname><given-names>M. J.</given-names></name></person-group> (<year>2012</year>). <article-title>Rostrolateral prefrontal cortex and individual differences in uncertainty-driven exploration</article-title>. <source>Neuron</source> <volume>73</volume>, <fpage>595</fpage>&#x2013;<lpage>607</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2011.12.025</pub-id>, <pub-id pub-id-type="pmid">22325209</pub-id></mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Behrens</surname><given-names>T. E. J.</given-names></name> <name><surname>Hunt</surname><given-names>L. T.</given-names></name> <name><surname>Rushworth</surname><given-names>M. F. S.</given-names></name></person-group> (<year>2009</year>). <article-title>The computation of social behavior</article-title>. <source>Science</source> <volume>324</volume>, <fpage>1160</fpage>&#x2013;<lpage>1164</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.1169694</pub-id>, <pub-id pub-id-type="pmid">19478175</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Behrens</surname><given-names>T. E. J.</given-names></name> <name><surname>Hunt</surname><given-names>L. T.</given-names></name> <name><surname>Woolrich</surname><given-names>M. W.</given-names></name> <name><surname>Rushworth</surname><given-names>M. F. S.</given-names></name></person-group> (<year>2008</year>). <article-title>Associative learning of social value</article-title>. <source>Nature</source> <volume>456</volume>, <fpage>245</fpage>&#x2013;<lpage>249</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature07538</pub-id>, <pub-id pub-id-type="pmid">19005555</pub-id></mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bolton</surname><given-names>P.</given-names></name> <name><surname>Harris</surname><given-names>C.</given-names></name></person-group> (<year>1999</year>). <article-title>Strategic experimentation</article-title>. <source>Econometrica</source> <volume>67</volume>, <fpage>349</fpage>&#x2013;<lpage>374</lpage>. doi: <pub-id pub-id-type="doi">10.1111/1468-0262.00022</pub-id></mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Boorman</surname><given-names>E. D.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J. P.</given-names></name> <name><surname>Adolphs</surname><given-names>R.</given-names></name> <name><surname>Rangel</surname><given-names>A.</given-names></name></person-group> (<year>2013</year>). <article-title>The behavioral and neural mechanisms underlying the tracking of expertise</article-title>. <source>Neuron</source> <volume>80</volume>, <fpage>1558</fpage>&#x2013;<lpage>1571</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2013.10.024</pub-id></mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brand</surname><given-names>C. O.</given-names></name> <name><surname>Heap</surname><given-names>S.</given-names></name> <name><surname>Morgan</surname><given-names>T. J. H.</given-names></name> <name><surname>Mesoudi</surname><given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>The emergence and adaptive use of prestige in an online social learning task</article-title>. <source>Sci. Rep.</source> <volume>10</volume>:<fpage>12095</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-020-68982-4</pub-id>, <pub-id pub-id-type="pmid">32694697</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Burke</surname><given-names>C. J.</given-names></name> <name><surname>Tobler</surname><given-names>P. N.</given-names></name> <name><surname>Baddeley</surname><given-names>M.</given-names></name> <name><surname>Schultz</surname><given-names>W.</given-names></name></person-group> (<year>2010</year>). <article-title>Neural mechanisms of observational learning</article-title>. <source>Proc. Natl. Acad. Sci. USA</source> <volume>107</volume>, <fpage>14431</fpage>&#x2013;<lpage>14436</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1003111107</pub-id>, <pub-id pub-id-type="pmid">20660717</pub-id></mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Chamley</surname><given-names>C.</given-names></name></person-group> (<year>2004</year>). <source>Rational herds: economic models of social learning</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>.</mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Charpentier</surname><given-names>C. J.</given-names></name> <name><surname>Iigaya</surname><given-names>K.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J. P.</given-names></name></person-group> (<year>2020</year>). <article-title>A neuro-computational account of arbitration between choice imitation and goal emulation during human observational learning</article-title>. <source>Neuron</source> <volume>106</volume>, <fpage>687</fpage>&#x2013;<lpage>699.e7</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2020.02.028</pub-id>, <pub-id pub-id-type="pmid">32187528</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Charpentier</surname><given-names>C. J.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J. P.</given-names></name></person-group> (<year>2018</year>). <article-title>The application of computational models to social neuroscience: promises and pitfalls</article-title>. <source>Soc. Neurosci.</source> <volume>13</volume>, <fpage>637</fpage>&#x2013;<lpage>647</lpage>. doi: <pub-id pub-id-type="doi">10.1080/17470919.2018.1518834</pub-id>, <pub-id pub-id-type="pmid">30173633</pub-id></mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cooper</surname><given-names>J. C.</given-names></name> <name><surname>Dunne</surname><given-names>S.</given-names></name> <name><surname>Furey</surname><given-names>T.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J. P.</given-names></name></person-group> (<year>2012</year>). <article-title>Human dorsal striatum encodes prediction errors during observational learning of instrumental actions</article-title>. <source>J. Cogn. Neurosci.</source> <volume>24</volume>, <fpage>106</fpage>&#x2013;<lpage>118</lpage>. doi: <pub-id pub-id-type="doi">10.1162/jocn_a_00114</pub-id>, <pub-id pub-id-type="pmid">21812568</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Danwitz</surname><given-names>L.</given-names></name> <name><surname>von Helversen</surname><given-names>B.</given-names></name></person-group> (<year>2025</year>). <article-title>Observational learning of exploration-exploitation strategies in bandit tasks</article-title>. <source>Cognition</source> <volume>259</volume>:<fpage>106124</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cognition.2025.106124</pub-id></mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gari&#x00E9;py</surname><given-names>J.-F.</given-names></name> <name><surname>Watson</surname><given-names>K. K.</given-names></name> <name><surname>Du</surname><given-names>E.</given-names></name> <name><surname>Xie</surname><given-names>D. L.</given-names></name> <name><surname>Erb</surname><given-names>J.</given-names></name> <name><surname>Amasino</surname><given-names>D.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Social learning in humans and other animals</article-title>. <source>Front. Neurosci.</source> <volume>8</volume>:<fpage>58</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fnins.2014.00058</pub-id>, <pub-id pub-id-type="pmid">24765063</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Garvert</surname><given-names>M. M.</given-names></name> <name><surname>Moutoussis</surname><given-names>M.</given-names></name> <name><surname>Kurth-Nelson</surname><given-names>Z.</given-names></name> <name><surname>Behrens</surname><given-names>T. E. J.</given-names></name> <name><surname>Dolan</surname><given-names>R. J.</given-names></name></person-group> (<year>2015</year>). <article-title>Learning-induced plasticity in medial prefrontal cortex predicts preference malleability</article-title>. <source>Neuron</source> <volume>85</volume>, <fpage>418</fpage>&#x2013;<lpage>428</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2014.12.033</pub-id>, <pub-id pub-id-type="pmid">25611512</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gershman</surname><given-names>S. J.</given-names></name></person-group> (<year>2018</year>). <article-title>Deconstructing the human algorithms for exploration</article-title>. <source>Cognition</source> <volume>173</volume>, <fpage>34</fpage>&#x2013;<lpage>42</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cognition.2017.12.014</pub-id></mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Glimcher</surname><given-names>P. W.</given-names></name></person-group> (<year>2011</year>). <article-title>Understanding dopamine and reinforcement learning: the dopamine reward prediction error hypothesis</article-title>. <source>Proc. Natl. Acad. Sci. USA</source> <volume>108</volume>, <fpage>15647</fpage>&#x2013;<lpage>15654</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1014269108</pub-id></mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Heyes</surname><given-names>C.</given-names></name> <name><surname>Pearce</surname><given-names>J. M.</given-names></name></person-group> (<year>2015</year>). <article-title>Not-so-social learning strategies</article-title>. <source>Proc. R. Soc. B Biol. Sci.</source> <volume>282</volume>:<fpage>20141709</fpage>. doi: <pub-id pub-id-type="doi">10.1098/rspb.2014.1709</pub-id>, <pub-id pub-id-type="pmid">25608880</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hills</surname><given-names>T. T.</given-names></name> <name><surname>Todd</surname><given-names>P. M.</given-names></name> <name><surname>Lazer</surname><given-names>D.</given-names></name> <name><surname>Redish</surname><given-names>A. D.</given-names></name> <name><surname>Couzin</surname><given-names>I. D.</given-names></name><collab id="coll1">Cognitive Search Research Group</collab></person-group> (<year>2015</year>). <article-title>Exploration versus exploitation in space, mind, and society</article-title>. <source>Trends Cogn. Sci.</source> <volume>19</volume>, <fpage>46</fpage>&#x2013;<lpage>54</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.tics.2014.10.004</pub-id>, <pub-id pub-id-type="pmid">25487706</pub-id></mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kang</surname><given-names>P.</given-names></name> <name><surname>Burke</surname><given-names>C. J.</given-names></name> <name><surname>Tobler</surname><given-names>P. N.</given-names></name> <name><surname>Hein</surname><given-names>G.</given-names></name></person-group> (<year>2021</year>). <article-title>Why we learn less from observing outgroups</article-title>. <source>J. Neurosci.</source> <volume>41</volume>, <fpage>144</fpage>&#x2013;<lpage>152</lpage>. doi: <pub-id pub-id-type="doi">10.1523/JNEUROSCI.0926-20.2020</pub-id>, <pub-id pub-id-type="pmid">33203741</pub-id></mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kang</surname><given-names>P.</given-names></name> <name><surname>Moisa</surname><given-names>M.</given-names></name> <name><surname>Lindstr&#x00F6;m</surname><given-names>B.</given-names></name> <name><surname>Soutschek</surname><given-names>A.</given-names></name> <name><surname>Ruff</surname><given-names>C. C.</given-names></name> <name><surname>Tobler</surname><given-names>P. N.</given-names></name></person-group> (<year>2024</year>). <article-title>Causal involvement of dorsomedial prefrontal cortex in learning the predictability of observable actions</article-title>. <source>Nat. Commun.</source> <volume>15</volume>:<fpage>8305</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-024-52559-0</pub-id>, <pub-id pub-id-type="pmid">39333062</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Keller</surname><given-names>G.</given-names></name> <name><surname>Rady</surname><given-names>S.</given-names></name> <name><surname>Cripps</surname><given-names>M.</given-names></name></person-group> (<year>2005</year>). <article-title>Strategic experimentation with exponential bandits</article-title>. <source>Econometrica</source> <volume>73</volume>, <fpage>39</fpage>&#x2013;<lpage>68</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1468-0262.2005.00564.x</pub-id></mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Klucharev</surname><given-names>V.</given-names></name> <name><surname>Hyt&#x00F6;nen</surname><given-names>K.</given-names></name> <name><surname>Rijpkema</surname><given-names>M.</given-names></name> <name><surname>Smidts</surname><given-names>A.</given-names></name> <name><surname>Fern&#x00E1;ndez</surname><given-names>G.</given-names></name></person-group> (<year>2009</year>). <article-title>Reinforcement learning signal predicts social conformity</article-title>. <source>Neuron</source> <volume>61</volume>, <fpage>140</fpage>&#x2013;<lpage>151</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2008.11.027</pub-id>, <pub-id pub-id-type="pmid">19146819</pub-id></mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Lattimore</surname><given-names>T.</given-names></name> <name><surname>Szepesv&#x00E1;ri</surname><given-names>C.</given-names></name></person-group> (<year>2020</year>). <source>Bandit algorithms</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>.</mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Levorsen</surname><given-names>M.</given-names></name> <name><surname>Ito</surname><given-names>A.</given-names></name> <name><surname>Suzuki</surname><given-names>S.</given-names></name> <name><surname>Izuma</surname><given-names>K.</given-names></name></person-group> (<year>2021</year>). <article-title>Testing the reinforcement learning hypothesis of social conformity</article-title>. <source>Hum. Brain Mapp.</source> <volume>42</volume>, <fpage>1328</fpage>&#x2013;<lpage>1342</lpage>. doi: <pub-id pub-id-type="doi">10.1002/hbm.25296</pub-id>, <pub-id pub-id-type="pmid">33245196</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mesoudi</surname><given-names>A.</given-names></name></person-group> (<year>2011</year>). <article-title>An experimental comparison of human social learning strategies: payoff-biased social learning is adaptive but underused</article-title>. <source>Evol. Hum. Behav.</source> <volume>32</volume>, <fpage>334</fpage>&#x2013;<lpage>342</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.evolhumbehav.2010.12.001</pub-id></mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Morelli</surname><given-names>S. A.</given-names></name> <name><surname>Sacchet</surname><given-names>M. D.</given-names></name> <name><surname>Zaki</surname><given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>Common and distinct neural correlates of personal and vicarious reward: a quantitative meta-analysis</article-title>. <source>NeuroImage</source> <volume>112</volume>, <fpage>244</fpage>&#x2013;<lpage>253</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuroimage.2014.12.056</pub-id></mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Morgan</surname><given-names>T. J. H.</given-names></name> <name><surname>Rendell</surname><given-names>L. E.</given-names></name> <name><surname>Ehn</surname><given-names>M.</given-names></name> <name><surname>Hoppitt</surname><given-names>W.</given-names></name> <name><surname>Laland</surname><given-names>K. N.</given-names></name></person-group> (<year>2012</year>). <article-title>The evolutionary basis of human social learning</article-title>. <source>Proc. Biol. Sci.</source> <volume>279</volume>, <fpage>653</fpage>&#x2013;<lpage>662</lpage>. doi: <pub-id pub-id-type="doi">10.1098/rspb.2011.1172</pub-id>, <pub-id pub-id-type="pmid">21795267</pub-id></mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Morin</surname><given-names>O.</given-names></name> <name><surname>Jacquet</surname><given-names>P. O.</given-names></name> <name><surname>Vaesen</surname><given-names>K.</given-names></name> <name><surname>Acerbi</surname><given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>Social information use and social information waste</article-title>. <source>Philos. Trans. R. Soc. Lond. Ser. B Biol. Sci.</source> <volume>376</volume>:<fpage>20200052</fpage>. doi: <pub-id pub-id-type="doi">10.1098/rstb.2020.0052</pub-id>, <pub-id pub-id-type="pmid">33993762</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Morishita</surname><given-names>G.</given-names></name> <name><surname>Murawski</surname><given-names>C.</given-names></name> <name><surname>Yadav</surname><given-names>N.</given-names></name> <name><surname>Suzuki</surname><given-names>S.</given-names></name></person-group> (<year>2025</year>). <article-title>Whom do we prefer to learn from in observational reinforcement learning?</article-title> <source>PLoS Comput. Biol.</source> <volume>21</volume>:<fpage>e1013143</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pcbi.1013143</pub-id>, <pub-id pub-id-type="pmid">41359675</pub-id></mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Najar</surname><given-names>A.</given-names></name> <name><surname>Bonnet</surname><given-names>E.</given-names></name> <name><surname>Bahrami</surname><given-names>B.</given-names></name> <name><surname>Palminteri</surname><given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>The actions of others act as a Pseudo-reward to drive imitation in the context of social reinforcement learning</article-title>. <source>PLoS Biol.</source> <volume>18</volume>:<fpage>e3001028</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pbio.3001028</pub-id>, <pub-id pub-id-type="pmid">33290387</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nook</surname><given-names>E. C.</given-names></name> <name><surname>Zaki</surname><given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>Social norms shift Behavioral and neural responses to foods</article-title>. <source>J. Cogn. Neurosci.</source> <volume>27</volume>, <fpage>1412</fpage>&#x2013;<lpage>1426</lpage>. doi: <pub-id pub-id-type="doi">10.1162/jocn_a_00795</pub-id>, <pub-id pub-id-type="pmid">25671502</pub-id></mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>O&#x2019;Doherty</surname><given-names>J.</given-names></name> <name><surname>Dayan</surname><given-names>P.</given-names></name> <name><surname>Schultz</surname><given-names>J.</given-names></name> <name><surname>Deichmann</surname><given-names>R.</given-names></name> <name><surname>Friston</surname><given-names>K.</given-names></name> <name><surname>Dolan</surname><given-names>R. J.</given-names></name></person-group> (<year>2004</year>). <article-title>Dissociable roles of ventral and dorsal striatum in instrumental conditioning</article-title>. <source>Science</source> <volume>304</volume>, <fpage>452</fpage>&#x2013;<lpage>454</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.1094285</pub-id></mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Olsson</surname><given-names>A.</given-names></name> <name><surname>Knapska</surname><given-names>E.</given-names></name> <name><surname>Lindstr&#x00F6;m</surname><given-names>B.</given-names></name></person-group> (<year>2020</year>). <article-title>The neural and computational systems of social learning</article-title>. <source>Nat. Rev. Neurosci.</source> <volume>21</volume>, <fpage>197</fpage>&#x2013;<lpage>212</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41583-020-0276-4</pub-id>, <pub-id pub-id-type="pmid">32221497</pub-id></mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pescetelli</surname><given-names>N.</given-names></name> <name><surname>Hauperich</surname><given-names>A.-K.</given-names></name> <name><surname>Yeung</surname><given-names>N.</given-names></name></person-group> (<year>2021</year>). <article-title>Confidence, advice seeking and changes of mind in decision making</article-title>. <source>Cognition</source> <volume>215</volume> (<comment>104810</comment>:<fpage>104810</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cognition.2021.104810</pub-id></mixed-citation></ref>
<ref id="ref35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rangel</surname><given-names>A.</given-names></name> <name><surname>Camerer</surname><given-names>C.</given-names></name> <name><surname>Montague</surname><given-names>P. R.</given-names></name></person-group> (<year>2008</year>). <article-title>A framework for studying the neurobiology of value-based decision making</article-title>. <source>Nat. Rev. Neurosci.</source> <volume>9</volume>, <fpage>545</fpage>&#x2013;<lpage>556</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nrn2357</pub-id>, <pub-id pub-id-type="pmid">18545266</pub-id></mixed-citation></ref>
<ref id="ref36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Reiter</surname><given-names>A.</given-names></name> <name><surname>Suzuki</surname><given-names>S.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J.</given-names></name> <name><surname>Li</surname><given-names>S.-C.</given-names></name> <name><surname>Eppinger</surname><given-names>B.</given-names></name></person-group> (<year>2019</year>). <article-title>Risk contagion by peers affects learning and decision-making in adolescents</article-title>. <source>J. Exp. Psychol. Gen.</source> <volume>148</volume>:<fpage>1494</fpage>. doi: <pub-id pub-id-type="doi">10.1037/xge0000512</pub-id></mixed-citation></ref>
<ref id="ref37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rogers</surname><given-names>A. R.</given-names></name></person-group> (<year>1988</year>). <article-title>Does biology constrain culture</article-title>. <source>Am. Anthropol.</source> <volume>90</volume>, <fpage>819</fpage>&#x2013;<lpage>831</lpage>. doi: <pub-id pub-id-type="doi">10.1525/aa.1988.90.4.02a00030</pub-id></mixed-citation></ref>
<ref id="ref38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rutledge</surname><given-names>R. B.</given-names></name> <name><surname>Dean</surname><given-names>M.</given-names></name> <name><surname>Caplin</surname><given-names>A.</given-names></name> <name><surname>Glimcher</surname><given-names>P. W.</given-names></name></person-group> (<year>2010</year>). <article-title>Testing the reward prediction error hypothesis with an axiomatic model</article-title>. <source>J. Neurosci.</source> <volume>30</volume>, <fpage>13525</fpage>&#x2013;<lpage>13536</lpage>. doi: <pub-id pub-id-type="doi">10.1523/JNEUROSCI.1747-10.2010</pub-id>, <pub-id pub-id-type="pmid">20926678</pub-id></mixed-citation></ref>
<ref id="ref39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schultner</surname><given-names>D.</given-names></name> <name><surname>Molleman</surname><given-names>L.</given-names></name> <name><surname>Lindstr&#x00F6;m</surname><given-names>B.</given-names></name></person-group> (<year>2025</year>). <article-title>Feature-based reward learning shapes human social learning strategies</article-title>. <source>Nat. Hum. Behav.</source> <volume>9</volume>, <fpage>2183</fpage>&#x2013;<lpage>2198</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41562-025-02269-4</pub-id>, <pub-id pub-id-type="pmid">40702301</pub-id></mixed-citation></ref>
<ref id="ref40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schultz</surname><given-names>W.</given-names></name> <name><surname>Dayan</surname><given-names>P.</given-names></name> <name><surname>Montague</surname><given-names>P. R.</given-names></name></person-group> (<year>1997</year>). <article-title>A neural substrate of prediction and reward</article-title>. <source>Science</source> <volume>275</volume>, <fpage>1593</fpage>&#x2013;<lpage>1599</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.275.5306.1593</pub-id></mixed-citation></ref>
<ref id="ref41"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schulz</surname><given-names>E.</given-names></name> <name><surname>Gershman</surname><given-names>S. J.</given-names></name></person-group> (<year>2019</year>). <article-title>The algorithmic architecture of exploration in the human brain</article-title>. <source>Curr. Opin. Neurobiol.</source> <volume>55</volume>, <fpage>7</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.conb.2018.11.003</pub-id></mixed-citation></ref>
<ref id="ref42"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Somerville</surname><given-names>L. H.</given-names></name> <name><surname>Sasse</surname><given-names>S. F.</given-names></name> <name><surname>Garrad</surname><given-names>M. C.</given-names></name> <name><surname>Drysdale</surname><given-names>A. T.</given-names></name> <name><surname>Abi Akar</surname><given-names>N.</given-names></name> <name><surname>Insel</surname><given-names>C.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Charting the expansion of strategic exploratory behavior during adolescence</article-title>. <source>J. Exp. Psychol. Gen.</source> <volume>146</volume>, <fpage>155</fpage>&#x2013;<lpage>164</lpage>. doi: <pub-id pub-id-type="doi">10.1037/xge0000250</pub-id>, <pub-id pub-id-type="pmid">27977227</pub-id></mixed-citation></ref>
<ref id="ref43"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Suganuma</surname><given-names>H.</given-names></name> <name><surname>Katahira</surname><given-names>K.</given-names></name> <name><surname>Ohtsuki</surname><given-names>H.</given-names></name> <name><surname>Kameda</surname><given-names>T.</given-names></name></person-group> (<year>2025</year>). <article-title>How social learning enhances-or undermines-efficiency and flexibility in collective decision-making under uncertainty</article-title>. <source>Proc. Natl. Acad. Sci. USA</source> <volume>122</volume>:<fpage>e2516827122</fpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.2516827122</pub-id>, <pub-id pub-id-type="pmid">41284859</pub-id></mixed-citation></ref>
<ref id="ref44"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Sutton</surname><given-names>R. S.</given-names></name> <name><surname>Barto</surname><given-names>A. G.</given-names></name></person-group> (<year>1998</year>). <source>Reinforcement learning: an introduction</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>MIT Press</publisher-name>.</mixed-citation></ref>
<ref id="ref45"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Suzuki</surname><given-names>S.</given-names></name> <name><surname>Adachi</surname><given-names>R.</given-names></name> <name><surname>Dunne</surname><given-names>S.</given-names></name> <name><surname>Bossaerts</surname><given-names>P.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J. P.</given-names></name></person-group> (<year>2015</year>). <article-title>Neural mechanisms underlying human consensus decision-making</article-title>. <source>Neuron</source> <volume>86</volume>, <fpage>591</fpage>&#x2013;<lpage>602</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2015.03.019</pub-id>, <pub-id pub-id-type="pmid">25864634</pub-id></mixed-citation></ref>
<ref id="ref46"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Suzuki</surname><given-names>S.</given-names></name> <name><surname>Harasawa</surname><given-names>N.</given-names></name> <name><surname>Ueno</surname><given-names>K.</given-names></name> <name><surname>Gardner</surname><given-names>J. L.</given-names></name> <name><surname>Ichinohe</surname><given-names>N.</given-names></name> <name><surname>Haruno</surname><given-names>M.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Learning to simulate others&#x2019; decisions</article-title>. <source>Neuron</source> <volume>74</volume>, <fpage>1125</fpage>&#x2013;<lpage>1137</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2012.04.030</pub-id>, <pub-id pub-id-type="pmid">22726841</pub-id></mixed-citation></ref>
<ref id="ref47"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Suzuki</surname><given-names>S.</given-names></name> <name><surname>Jensen</surname><given-names>E. L. S.</given-names></name> <name><surname>Bossaerts</surname><given-names>P.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J. P.</given-names></name></person-group> (<year>2016</year>). <article-title>Behavioral contagion during learning about another agent&#x2019;s risk-preferences acts on the neural representation of decision-risk</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>113</volume>, <fpage>3755</fpage>&#x2013;<lpage>3760</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1600092113</pub-id>, <pub-id pub-id-type="pmid">27001826</pub-id></mixed-citation></ref>
<ref id="ref48"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Suzuki</surname><given-names>S.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J. P.</given-names></name></person-group> (<year>2020</year>). <article-title>Breaking human social decision making into multiple components and then putting them together again</article-title>. <source>Cortex</source> <volume>127</volume>, <fpage>221</fpage>&#x2013;<lpage>230</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cortex.2020.02.014</pub-id></mixed-citation></ref>
<ref id="ref49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tomov</surname><given-names>M. S.</given-names></name> <name><surname>Truong</surname><given-names>V. Q.</given-names></name> <name><surname>Hundia</surname><given-names>R. A.</given-names></name> <name><surname>Gershman</surname><given-names>S. J.</given-names></name></person-group> (<year>2020</year>). <article-title>Dissociable neural correlates of uncertainty underlie different exploration strategies</article-title>. <source>Nat. Commun.</source> <volume>11</volume>:<fpage>2371</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-020-15766-z</pub-id>, <pub-id pub-id-type="pmid">32398675</pub-id></mixed-citation></ref>
<ref id="ref50"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Toyokawa</surname><given-names>W.</given-names></name> <name><surname>Kim</surname><given-names>H.-R.</given-names></name> <name><surname>Kameda</surname><given-names>T.</given-names></name></person-group> (<year>2014</year>). <article-title>Human collective intelligence under dual exploration-exploitation dilemmas</article-title>. <source>PLoS One</source> <volume>9</volume>:<fpage>e95789</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0095789</pub-id>, <pub-id pub-id-type="pmid">24755892</pub-id></mixed-citation></ref>
<ref id="ref51"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Toyokawa</surname><given-names>W.</given-names></name> <name><surname>Saito</surname><given-names>Y.</given-names></name> <name><surname>Kameda</surname><given-names>T.</given-names></name></person-group> (<year>2017</year>). <article-title>Individual differences in learning behaviours in humans: asocial exploration tendency does not predict reliance on social learning</article-title>. <source>Evol. Hum. Behav.</source> <volume>38</volume>, <fpage>325</fpage>&#x2013;<lpage>333</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.evolhumbehav.2016.11.001</pub-id></mixed-citation></ref>
<ref id="ref52"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wilson</surname><given-names>R. C.</given-names></name> <name><surname>Bonawitz</surname><given-names>E.</given-names></name> <name><surname>Costa</surname><given-names>V. D.</given-names></name> <name><surname>Ebitz</surname><given-names>R. B.</given-names></name></person-group> (<year>2021</year>). <article-title>Balancing exploration and exploitation with information and randomization</article-title>. <source>Curr. Opin. Behav. Sci.</source> <volume>38</volume>, <fpage>49</fpage>&#x2013;<lpage>56</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cobeha.2020.10.001</pub-id></mixed-citation></ref>
<ref id="ref53"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wilson</surname><given-names>R. C.</given-names></name> <name><surname>Geana</surname><given-names>A.</given-names></name> <name><surname>White</surname><given-names>J. M.</given-names></name> <name><surname>Ludvig</surname><given-names>E. A.</given-names></name> <name><surname>Cohen</surname><given-names>J. D.</given-names></name></person-group> (<year>2014</year>). <article-title>Humans use directed and random exploration to solve the explore-exploit dilemma</article-title>. <source>J. Exp. Psychol. Gen.</source> <volume>143</volume>, <fpage>2074</fpage>&#x2013;<lpage>2081</lpage>. doi: <pub-id pub-id-type="doi">10.1037/a0038199</pub-id>, <pub-id pub-id-type="pmid">25347535</pub-id></mixed-citation></ref>
<ref id="ref54"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Witt</surname><given-names>A.</given-names></name> <name><surname>Toyokawa</surname><given-names>W.</given-names></name> <name><surname>Lala</surname><given-names>K. N.</given-names></name> <name><surname>Gaissmaier</surname><given-names>W.</given-names></name> <name><surname>Wu</surname><given-names>C. M.</given-names></name></person-group> (<year>2024</year>). <article-title>Humans flexibly integrate social information despite interindividual differences in reward</article-title>. <source>Proc. Natl. Acad. Sci. USA</source> <volume>121</volume>:<fpage>e2404928121</fpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.2404928121</pub-id></mixed-citation></ref>
<ref id="ref55"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wittmann</surname><given-names>M. K.</given-names></name> <name><surname>Kolling</surname><given-names>N.</given-names></name> <name><surname>Faber</surname><given-names>N. S.</given-names></name> <name><surname>Scholl</surname><given-names>J.</given-names></name> <name><surname>Nelissen</surname><given-names>N.</given-names></name> <name><surname>Rushworth</surname><given-names>M. F. S.</given-names></name></person-group> (<year>2016</year>). <article-title>Self-other mergence in the frontal cortex during cooperation and competition</article-title>. <source>Neuron</source> <volume>91</volume>, <fpage>482</fpage>&#x2013;<lpage>493</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2016.06.022</pub-id>, <pub-id pub-id-type="pmid">27477020</pub-id></mixed-citation></ref>
<ref id="ref56"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname><given-names>Q.</given-names></name> <name><surname>Sarah</surname><given-names>O.</given-names></name> <name><surname>Tadayonnejad</surname><given-names>R.</given-names></name> <name><surname>Feusner</surname><given-names>J. D.</given-names></name> <name><surname>Cockburn</surname><given-names>J.</given-names></name> <name><surname>O&#x2019;Doherty</surname><given-names>J. P.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Individual differences in autism-like traits are associated with reduced goal emulation in a computational model of observational learning</article-title>. <source>Nature Mental Health</source> <volume>2</volume>, <fpage>1032</fpage>&#x2013;<lpage>1044</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s44220-024-00287-1</pub-id>, <pub-id pub-id-type="pmid">39734327</pub-id></mixed-citation></ref>
<ref id="ref57"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zajkowski</surname><given-names>W. K.</given-names></name> <name><surname>Kossut</surname><given-names>M.</given-names></name> <name><surname>Wilson</surname><given-names>R. C.</given-names></name></person-group> (<year>2017</year>). <article-title>A causal role for right frontopolar cortex in directed, but not random, exploration</article-title>. <source>eLife</source> <volume>6</volume>:<fpage>e27430</fpage>. doi: <pub-id pub-id-type="doi">10.7554/eLife.27430</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1201166/overview">Atsushi Noritake</ext-link>, University of Toyama, Japan</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/160275/overview">Masamichi Sakagami</ext-link>, Tamagawa University, Japan</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3346419/overview">Hidezo Suganuma</ext-link>, The University of Tokyo, Japan</p>
</fn>
</fn-group>
</back>
</article>