<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2026.1752586</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Pupil size response within direct and random exploration and exploitation behaviors selectively reflects value of control</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Barkay</surname>
<given-names>Gili</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3282668"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gabay</surname>
<given-names>Shai</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1887782"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hertz</surname>
<given-names>Uri</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>School of Psychological Sciences, University of Haifa</institution>, <city>Haifa</city>, <country country="il">Israel</country></aff>
<aff id="aff2"><label>2</label><institution>The Institute of Information Processing and Decision Making, University of Haifa</institution>, <city>Haifa</city>, <country country="il">Israel</country></aff>
<aff id="aff3"><label>3</label><institution>Department of Cognitive Science, University of Haifa</institution>, <city>Haifa</city>, <country country="il">Israel</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Gili Barkay, <email xlink:href="mailto:gilia.barkay@gmail.com">gilia.barkay@gmail.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-03">
<day>03</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1752586</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>24</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Barkay, Gabay and Hertz.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Barkay, Gabay and Hertz</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-03">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec id="sec1001">
<title>Background</title>
<p>Balancing exploration and exploitation is central to adaptive decision-making and is thought to depend on interactions between arousal-related neuromodulation and strategic control. The present study examined how pupil-indexed arousal corresponds to different aspects of exploration and exploitation decisions.</p>
</sec>
<sec id="sec2001">
<title>Method</title>
<p>We used the Horizon Task, which independently manipulated value of control through value uncertainty, information asymmetries, and choice horizon. Thirty-five participants completed 320 mini-games while pupil diameter was continuously recorded, with analyses focused on the first free-choice trial.</p>
</sec>
<sec id="sec3001">
<title>Results</title>
<p>Behaviorally, participants exploited more when value gaps were larger, preferentially sampled the option with fewer prior observations and showed increased exploration in long-horizon conditions, where additional choices enabled the use of newly acquired information. These patterns replicate established patterns of directed and random exploration. Pupillary responses, however, showed a selective profile. For exploitative choices, though not for exploratory choices, pupil size increased when horizons were short and when value differences were small, indicating greater arousal during decisions with higher immediate importance or increased discrimination demands, reflecting increased value of control. Trial-by-trial analyses revealed sustained pre-decision modulation rather than discrete phasic peaks.</p>
</sec>
<sec id="sec4001">
<title>Conclusion</title>
<p>Together, these findings allow integration of value of control approach and exploitative and exploratory control modes, indicating highlighting how strategic demands within each mode shape pupil-linked arousal.</p>
</sec>
</abstract>
<kwd-group>
<kwd>exploration-exploitation</kwd>
<kwd>pupillometry</kwd>
<kwd>cognitive control</kwd>
<kwd>arousal</kwd>
<kwd>value of control</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="3"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="30"/>
<page-count count="10"/>
<word-count count="7213"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Cognition</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<title>Introduction</title>
<p>Decision-makers face a fundamental challenge: should they exploit familiar, rewarding options or explore uncertain alternatives that might yield better long-term outcomes? (<xref ref-type="bibr" rid="ref6">Cohen et al., 2007</xref>; <xref ref-type="bibr" rid="ref4">Cogliati Dezza et al., 2017</xref>). This exploration - exploitation dilemma arises across species and contexts, from animal foraging to human strategic planning (<xref ref-type="bibr" rid="ref20">Monk et al., 2018</xref>; <xref ref-type="bibr" rid="ref3">Berger-Tal et al., 2014</xref>; <xref ref-type="bibr" rid="ref25">Rich and Gureckis, 2018</xref>), yet questions persist regarding the psychological and neurobiological mechanisms underlying these competing drives (<xref ref-type="bibr" rid="ref3">Berger-Tal et al., 2014</xref>; <xref ref-type="bibr" rid="ref19">Mehlhorn et al., 2015</xref>).</p>
<p>Adaptive gain theory (AGT) provides a neurobiological framework for understanding the explore&#x2013;exploit balance by linking behavioral modes to activity patterns in the locus coeruleus&#x2013;norepinephrine (LC- NE) system (<xref ref-type="bibr" rid="ref1">Aston-Jones and Cohen, 2005</xref>). According to AGT, the LC-NE system operates in two functionally distinct modes. In the phasic mode, the LC exhibits transient bursts of activity in response to task-relevant events, supporting focused attention and goal-directed behavior, conditions optimal for exploiting known reward sources. In the tonic mode, elevated baseline LC firing rates, reflecting a state of exploratory control - heightened arousal and promotion of exploration of alternative options, while lower tonic rates are associated with exploitative control, which is focused on choosing the known best option. Critically, these modes are not merely different levels of activation but correspond to qualitatively different patterns of neural activity that bias behavior toward exploitation or exploration, respectively.</p>
<p>Pupil diameter provides a noninvasive window into these LC-NE dynamics. Changes in pupil size closely track LC activity: baseline (average) pupil diameter reflects tonic LC firing, while transient pupil dilations index phasic LC responses (<xref ref-type="bibr" rid="ref16">Joshi et al., 2016</xref>; <xref ref-type="bibr" rid="ref21">Murphy et al., 2014</xref>). This correspondence allows pupillometry to test AGT&#x2019;s core prediction that exploration and exploitation categorically distinct neuromodulatory states. Specifically, AGT predicts that exploration should be accompanied by larger tonic pupil size, reflecting sustained arousal in the tonic LC mode, whereas exploitation should be characterized by smaller baseline pupils with transient phasic dilations time-locked to decision-relevant events (<xref ref-type="bibr" rid="ref14">Jepma and Nieuwenhuis, 2011</xref>). Consistent with this view, recent work of <xref ref-type="bibr" rid="ref8">Fan et al. (2023)</xref>, has demonstrated that baseline pupil size tracks total uncertainty in the environment and is associated with increased random exploration, suggesting that tonic arousal reflects a global state of uncertainty rather than specific decision demands (<xref ref-type="bibr" rid="ref8">Fan et al., 2023</xref>).</p>
<p>While AGT framework links neuromodulatory activity to behavioral flexibility, previous findings revealed that exploration is not a unitary phenomenon. <xref ref-type="bibr" rid="ref29">Wilson et al. (2014)</xref> distinguished directed exploration- deliberate information-seeking guided by uncertainty, from random exploration, which reflects stochastic choice variability unrelated to specific information goals. This functional distinction challenges AGT&#x2019;s arousal-based account: directed exploration requires effortful goal maintenance and value integration, whereas random exploration may arise from diffuse arousal or decision noise. Yet these two forms of exploration exhibit fundamentally different behavioral pattern: directed exploration is modulated by information structure and planning opportunities, whereas random exploration scales with overall uncertainty regardless of whether the environment affords effective information use (<xref ref-type="bibr" rid="ref12">Gershman, 2019</xref>).</p>
<p>This dissociation indicates that exploration cannot be fully captured by arousal fluctuations alone but depends on strategic control processes selectively recruited according to decision context. The Expected Value of Control (EVC) framework provides a complementary account, proposing that cognitive control is allocated when anticipated benefits, such as reducing uncertainty or optimizing future outcomes- outweigh intrinsic effort costs (<xref ref-type="bibr" rid="ref27">Shenhav et al., 2013</xref>). Critically, EVC predicts that control deployment should scale with the prospective utility of investing effort in each decision rather than with exploration per se. This perspective suggests that exploration and exploitation behaviors may recruit control levels depending on strategic context, specifically, whether the decision environment affords sufficient opportunity to capitalize on acquired information (<xref ref-type="bibr" rid="ref26">Sadeghiyeh et al., 2020</xref>; <xref ref-type="bibr" rid="ref10">Fr&#x00F6;mer et al., 2021</xref>). Empirical evidence supports this prediction: <xref ref-type="bibr" rid="ref7">Dubois and Hauser (2022)</xref> demonstrated that the planning horizon, the number of choices remaining in a task, dictates exploration strategy. They found that participants strategically increased random exploration only when the horizon was long (e.g., six choices), affording an opportunity to capitalize on the information, but relied less on this strategy when the horizon was short. This cost&#x2013;benefit logic has been formalized computationally by <xref ref-type="bibr" rid="ref22">Musslick et al. (2015)</xref>, who demonstrated that the cognitive mechanisms governing goal-directed response selection determines when to deploy effortful control versus rely on stochastic processes.</p>
<p>Integrating AGT and EVC thus suggests that arousal and control jointly regulate adaptive decision-making: tonic arousal sets background readiness for behavioral flexibility, while strategic control is selectively recruited when decision context renders effortful processing worthwhile. Recent neurophysiological evidence supports this integrated view. <xref ref-type="bibr" rid="ref17">Kozunova et al. (2022)</xref> using a continuous probabilistic learning task, provided direct support for this distinction. They operationalized exploitation as choosing the known high-payoff (HP) option after learning and directed exploration as choosing the known low-payoff (LP) option. They found that directed exploration elicited significantly larger phasic pupil dilations and slower response times compared to exploitation. This effect was absent during &#x201C;random exploration&#x201D; (choices made before learning), suggesting the pupil response tracked more than just uncertainty. Instead, the authors compellingly attribute the dilation to the cognitive conflict arising from intentionally overriding a strong, learned bias to exploit. However, both exploration and exploitation decisions happened within a continuous learning task, where exploitation reflects low expected value of control, while directed exploration reflects higher value of control. Shorter choice horizon, where the current choice cannot be used to inform future decisions, may reflect different values of control to exploration and exploitation.</p>
<p>This finding isolates a key component of strategic processing, linking pupil dilation to conflict-driven control. A key open question, however, is whether this pupillary response is specific to this form of internal conflict, or if it reflects a broader component of exploration, such as <italic>decision uncertainty</italic>. This distinction is central to integrating the EVC and AGT frameworks, particularly as other findings suggest pupil-linked arousal can be dissociated from effective control deployment. <xref ref-type="bibr" rid="ref10">Fr&#x00F6;mer et al. (2021)</xref>, for example, demonstrated that while neural markers of control (ERPs) scaled with reward and efficacy, pupillary responses showed a contrasting pattern: they were largest under low efficacy conditions. In that context, uncertainty was high, but strategic control was inefficacious. This suggests that pupil responses can track uncertainty-driven arousal even when strategic control is not effectively deployed.</p>
<p>The present study examines how pupil-indexed arousal relates to exploration and exploitation under varying conditions of uncertainty, information structure, and planning opportunity. Using the Horizon Task, a multi-trial decision task in which participants choose between two options with different reward distributions (<xref ref-type="fig" rid="fig1">Figure 1</xref>). In this task, participants complete four forced-choice trials that provide initial information about each option&#x2019;s payoff, followed by either one or six free-choice trials during which they can explore or exploit. We manipulate value differences, information asymmetries, and horizon length to differentially promote directed exploration, random exploration, and exploitation. AGT predicts that explorative control elicits elevated tonic arousal while exploitative control engages phasic responses. EVC predicts arousal scales with the strategic value of control deployment rather than choice type. In the horizon task, directed exploration is associated with the choice of low value option under long horizon conditions, while random exploration is associated with choosing the low value option under short horizon conditions. Exploitation is related to choosing the high-value option, but in this task the importance of exploitation changes. Specifically, when value differences are small (high uncertainty), exploitative choices require greater discriminative effort and carry higher impact on outcomes, increasing their expected value of control. Similarly, when horizons are short, exploitative decisions become more critical as there are no future opportunities to correct errors, further elevating their value of control. In contrast, exploration choices under long horizons may carry lower control demands because errors can be compensated for in subsequent trials, while exploratory choices under short horizons (random exploration) may reflect low-cost stochastic sampling rather than effortful strategic processing. Thus, integrating EVC consideration provides novel predictions for pupil diameter in exploration and exploitation decisions, predicting that pupil size should be larger (reflecting higher arousal and control investment) when the expected value of control is high, specifically, during exploitative choices under small value gaps or short horizons, and smaller when the expected value of control is low, as in exploratory choices where strategic demands are minimal. By measuring pupil dynamics across these conditions, this study tests whether arousal tracks exploratory versus exploitative choices or the contextual factors determining when effortful processing is warranted.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Experimental design: <bold>(A)</bold> Example trial sequence in the Horizon Task. The first four trials were forced-choice trials (left), where one choice was marked with X and the participants had to choose the other option and see its outcome. In free-choice trials a green square indicated that participants could freely choose between the two options. The selected option&#x2019;s reward was then displayed (right). <bold>(B)</bold> Illustration of task paradigm and variables: The paradigm included 320 mini-games, that differed in three variables: value gap &#x2013; the absolute difference in expected reward between options, information gap - imbalance in the number of samples from each option during the forced-choice session, and choice horizon - the number of remaining free-choice trials after the forced choice session. Green boxes indicate the first free choice, which was the focus of our analysis.</p>
</caption>
<graphic xlink:href="fpsyg-17-1752586-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Panel A shows a sequence of grid-based gambling choices, progressing from a forced-choice with numbers and X symbols, through a free-choice signaled by green squares, to outcome revelation with a bolded number. Panel B illustrates three task variants: value gap, information gap, and choice horizon, each comparing grids with different numbers, X symbols, and green squares to indicate experimental conditions.</alt-text>
</graphic>
</fig>
</sec>
<sec sec-type="methods" id="sec2">
<title>Method</title>
<sec id="sec3">
<title>Participants</title>
<p>Thirty-five participants between ages 18&#x2013;41 (24 females, 13 males; <italic>M</italic>&#x202F;=&#x202F;25.9, <italic>SD</italic>&#x202F;=&#x202F;5.2) took part in the study. A sensitivity power analysis (<italic>&#x03B1;</italic>&#x202F;=&#x202F;0.05, two-tailed, 80% power; <xref ref-type="bibr" rid="ref9">Faul et al., 2009</xref>) indicated that with 35 participants the study was sufficiently powered to detect medium-sized effects (f&#x202F;&#x2248;&#x202F;0.25, &#x03B7;<sup>2</sup>p&#x202F;&#x2248;&#x202F;0.06) (<xref ref-type="bibr" rid="ref5">Cohen, 2013</xref>) All had normal or corrected-to-normal vision and no history of neurological or psychiatric disorders. Participants were recruited through volunteer sampling within the academic institution and provided written informed consent in accordance with institutional guidelines. The experimental task lasted approximately 45&#x202F;min, and participants received monetary compensation of $20 USD for their participation without any performance-based bonus. The study was approved by the institutional review board (IRB Approval code 252/22).</p>
</sec>
<sec id="sec4">
<title>Task and design</title>
<p>Participants completed an adapted version of the Horizon Task, designed to manipulate directed and random exploration. The task is programmed using MATLAB and Psychtoolbox 3.017. The experiment consisted of 320 games, divided into four blocks of 80 games each. Each game contained either 5 or 10 trials. On each trial, participants chose between two slot machine options, each delivering a reward between 1 and 100 points. Rewards were drawn from Gaussian distributions (SD&#x202F;=&#x202F;8), with the mean value of one option set to either 40 or 60 and the other differing by 4, 8, 12, 20, or 30 points. These differences manipulated the expected value gap, such that smaller differences introduced greater uncertainty and encouraged exploration, while larger differences promoted exploitation.</p>
<p>The two choice options were visually identical and presented symmetrically to the left and right of a central fixation point. Choice and outcome history for each option remained visible within the corresponding display throughout the game. Each game began with four forced-choice trials, during which only one option was available at a time (<xref ref-type="fig" rid="fig1">Figure 1A</xref>). The unavailable option was marked with an &#x201C;X&#x201D; ensuring controlled exposure to outcome information while maintaining active task engagement. This phase-controlled participants&#x2019; initial information exposure prior to free choice. After each choice, the reward outcome was displayed on screen, along with a history of choices and rewards for both options within the current game. We manipulated three task variables, which captured distinct exploration- exploitation related dynamics (<xref ref-type="fig" rid="fig1">Figure 1B</xref>):</p>
<list list-type="simple">
<list-item>
<p>(1) Value gap: the absolute difference in expected reward between the two options. Larger gaps are associated with low uncertainty, whereas smaller gaps increase uncertainty.</p>
</list-item>
<list-item>
<p>(2) Information gap: the sampling imbalance during forced trials, defined by the number of prior samples from the higher-value option (+2&#x202F;=&#x202F;more samples from the better option, &#x2212;2&#x202F;=&#x202F;more from the worse option, 0&#x202F;=&#x202F;equal). Lower exposure to the superior option was expected to promote information-seeking via directed exploration.</p>
</list-item>
<list-item>
<p>(3) Choice horizon: the number of remaining free-choice trials (1 vs. 6). A longer horizon was expected to promote direct exploration by increasing the prospective utility of information, while a shorter horizon was expected to increase the importance of exploitation due to limited opportunity to benefit from newly acquired information. This variable was dummy-coded with the <italic>long-horizon</italic> condition (6 trials) serving as the reference category (coded as 0), and the <italic>short-horizon</italic> condition (1 trial) coded as 1.</p>
</list-item>
</list>
</sec>
<sec id="sec5">
<title>Procedure</title>
<p>Participants were first provided with on-screen illustrated instructions explaining the task structure, the fixed reward distributions within each game, and the goal of maximizing total points. Similar to <xref ref-type="bibr" rid="ref29">Wilson et al. (2014)</xref>, the instructions clarified that reward distributions remained constant within each game but varied across games, and that one option was always superior on average. Participants were informed that the initial trials in each game were forced-choice trials designed to provide information about the options before free choice. No separate practice block was administered prior to the experimental task; participants proceeded directly to the task following the instructions. Each game began with a fixation cross presented at the center of the screen. During the forced-choice phase, successive forced-choice trials were separated by brief inter-step intervals of 100&#x202F;ms. Following the final forced-choice trial, the fixation display remained on screen for an additional 200&#x202F;ms before its color changed, signaling the transition to the free-choice phase. The onset of the first free-choice trial occurred after a further 800&#x202F;ms interval.</p>
<p>Following fixation, the choice display appeared and remained on screen until a response was made (self-paced- with a maximum allowed response time of 5,000&#x202F;ms). During the free-choice phase, successive free-choice trials were separated by 500&#x202F;ms intervals. After a choice, the selected option was highlighted and the numerical reward outcome was presented for a fixed interval, after which the next trial commenced. Between games, a blank screen was presented for 2000&#x202F;ms before the onset of the next game. Throughout the task, pupil size was recorded continuously. To isolate anticipatory cognitive effort associated with exploratory behavior, pupillometry analyses focused exclusively on the first free-choice trial of each game. Pupil measures were extracted from the interval spanning choice onset until the motor response, allowing assessment of sustained decision-related arousal rather than stimulus-locked responses.</p>
</sec>
<sec id="sec6">
<title>Pupillometry recording and analysis</title>
<p>All behavioral and pupillometry data were processed and analyzed following a standardized pipeline. Pupil size data were recorded via the EyeLink 1,000 system at a sampling rate of 1,000&#x202F;Hz. Preprocessing included blink detection, linear interpolation, and baseline correction. Trials with missing data were excluded. To ensure measurement accuracy and control for gaze position, the task was programmed to restrict responses unless the participant&#x2019;s gaze was fixed at the center of the screen at the time of choice.</p>
<p>Pupil data were processed and visualized using Data Viewer software (version 4.1.211 Research Ltd., Ontario, Canada), which enabled blink detection verification, baseline correction, segmentation of trials, and extraction of tonic and phasic pupil measures. The primary tonic measure was defined as the average pupil size across the entire decision interval (from choice onset until the motor response) on the first free-choice trial of each game. Phasic analyses involved segmenting each trial into 100-ms bins spanning the 600&#x202F;ms prior to the motor response (from Bin &#x2212;6 to Bin 0) to examine the temporal dynamics of pupil-linked arousal, to capture the decision-related phasic burst, which is known to be time-locked to the motor response<sup>36</sup>.</p>
<p>The experimental task was programmed in MATLAB (version 2023a, MathWorks, Natick, MA, USA) using Psychtoolbox-3. All statistical analyses, including mixed-effects regression models for behavioral and pupillometry data, were conducted in SPSS (version 28, IBM Corp., Armonk, NY, USA).</p>
<p>All models included random intercepts for participants. More complex random-effects structures including random slopes for the experimental predictors were evaluated but did not converge reliably. Therefore, the reported results are based on the most parsimonious models that yielded stable estimates. Logistic mixed-effects models were used for the behavioral data to predict choice behavior on the first free-choice trial. For the primary pupillometry analysis, general linear mixed-effects models were applied. Critically, to test our main hypothesis, these models were conducted separately for exploitation trials (defined as choosing the higher-value option) and exploration trials (defined as choosing the lower-value option). In this analysis dependent variable is the pupil size during the period immediately preceding the motor response in the first free-choice trial of each game, reflecting anticipatory arousal. Predictor coding is consistent with the behavioral 1, except for Information gap, which reflects the symmetry of information between options (equal&#x202F;=&#x202F;0; unequal&#x202F;=&#x202F;1). These analyses were carried out in SPSS version 28.</p>
</sec>
</sec>
<sec sec-type="results" id="sec7">
<title>Results</title>
<p>All models included random intercepts for participants. More complex random-effects structures including random slopes for the experimental predictors were evaluated but did not converge reliably. Therefore, the reported results are based on the most parsimonious models that yielded stable estimates.</p>
<sec id="sec8">
<title>Behavioral model</title>
<p>To examine the factors influencing participants&#x2019; choices on the first free-choice trial of each game, we conducted a logistic mixed-effects regression model. The model predicted the probability of choosing the higher-value option as a function of three fixed-effect predictors Value gap, Choice Horizon and Information gap (<xref ref-type="fig" rid="fig1">Figure 1B</xref>, see full description in methods).</p>
<p>The model revealed significant effects for all predictors. Participants were more likely to choose the higher-value option when the reward difference between options was larger (Value Gap: <italic>&#x03B2;</italic> =&#x202F;0.019, <italic>p</italic> &#x003C;&#x202F;0.001), indicating increased exploitation under conditions of low uncertainty. A negative effect of Information Gap (<italic>&#x03B2;</italic> =&#x202F;&#x2212;0.058, <italic>p</italic> =&#x202F;0.009) revealed that participants tended to choose the higher-value option more often when they had received more prior information about it, whereas they were more likely to explore the lower-value option when it had been sampled more frequently that demonstrating a directed exploration strategy aimed at reducing uncertainty. Choice horizon also showed a significant positive effect (<italic>&#x03B2;</italic> =&#x202F;0.167, <italic>p</italic> =&#x202F;0.007), indicating that participants exploited more in short-horizon conditions (1 free trial), where the immediate payoff was crucial. Since the short horizon was dummy-coded as 1, the positive coefficient reflects a greater tendency to choose the higher-value option in that condition, compared to the long-horizon condition (6 trials), where exploration could support future gains (<xref ref-type="table" rid="tab1">Table 1</xref>).</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Mixed-effects logistic regression results for first free-choice decisions.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Predictor</th>
<th align="center" valign="top">Coefficient <italic>&#x03B2;</italic></th>
<th align="center" valign="top">Std. error</th>
<th align="center" valign="top">Exp (OR) (Coefficient)</th>
<th align="center" valign="top">CI for OR 95%</th>
<th align="center" valign="top"><italic>p</italic> value</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Value gap (large)</td>
<td align="char" valign="top" char=".">0.019</td>
<td align="char" valign="top" char=".">0.0034</td>
<td align="char" valign="top" char=".">1.019</td>
<td align="center" valign="top">[1.012, 1.026]</td>
<td align="char" valign="top" char="."><bold>&#x003C;0.001</bold></td>
</tr>
<tr>
<td align="left" valign="top">Information gap (positive)</td>
<td align="char" valign="top" char=".">&#x2212;0.058</td>
<td align="char" valign="top" char=".">0.0223</td>
<td align="char" valign="top" char=".">0.943</td>
<td align="center" valign="top">[0.903, 0.985]</td>
<td align="char" valign="top" char="."><bold>0.009</bold></td>
</tr>
<tr>
<td align="left" valign="top">Choice horizon (short)</td>
<td align="char" valign="top" char=".">0.167</td>
<td align="char" valign="top" char=".">0.0623</td>
<td align="char" valign="top" char=".">1.182</td>
<td align="center" valign="top">[1.046, 1.335]</td>
<td align="char" valign="top" char="."><bold>0.007</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Logistic mixed-effects models predicting choice behavior during the first free-choice trial, conducted across all games. The model predicted the probability of choosing the higher-value option as a function of value gap, information gap, and choice horizon. &#x002A;&#x002A;<italic>p</italic> &#x003C; 0.01, &#x002A;&#x002A;&#x002A;<italic>p</italic> &#x003C; 0.001. Models included random intercepts for participants.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec9">
<title>Average (tonic) pupil size</title>
<p>For the tonic analysis, pupil size was averaged across the entire response time of the first free-choice trial in each game, from the onset of choice presentation until the motor response. This measure captures overall anticipatory arousal during the decision process.</p>
<p>First, we examined whether pupil size was different between exploratory and explicatory decisions across all trials, using mixed-effects linear regression. We found that exploration decisions were associated with larger pupil size, but in marginal significance [<italic>F</italic> (1.5200)&#x202F;=&#x202F;2.95, <italic>&#x03B2;</italic> =&#x202F;19.62 <italic>p</italic> =&#x202F;0.086]. This finding indicates that exploratory decisions were associated with higher arousal to some extent, in line with the AGT prediction of exploratory and exploitative control.</p>
<p>We then proceeded to analyze the effects of value gap, information gap and choice horizon on pupil size. To differentiate between their effects on exploration and exploitation, we carried out two independent analyses, one including only exploratory choices, and the other including exploitative choices (<xref ref-type="fig" rid="fig2">Figure 2</xref>). <xref ref-type="table" rid="tab2">Table 2</xref> presents the results of these models. For exploratory choices, none of the predictors significantly modulated pupil size (all <italic>p</italic> &#x003E;&#x202F;0.36), indicating that average pupil-indexed arousal did not vary with factors hypothesized to promote directed and random exploration. In contrast, for exploitative choices both Choice Horizon (<italic>&#x03B2;</italic> =&#x202F;&#x2212;5.63, <italic>p</italic> =&#x202F;0.025) and Value Gap (<italic>&#x03B2;</italic> =&#x202F;&#x2212;1.58, <italic>p</italic> =&#x202F;0.021) significantly predicted pupil size. Specifically, pupil size was larger in short-horizon trials (where only one decision remained) compared to long-horizon trials, indicating that the importance of the choice played an important role, in line with EVC account. In addition, pupil size increased when the value difference between options was small, indicating greater uncertainty, compared to when the difference was large and uncertainty diminished. This indicates that exploitation under low uncertainty is related to low arousal, but exploitation under high uncertainty is related to high arousal and task engagement, similarly, to predicted directed exploration in AGT. Finally, Information Gap did not significantly modulate pupil size in either exploration or exploitation contexts.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Pupil size during the experimental conditions. To demonstrate the effects observed in our statistical model, we plotted the pupil size (after removal of individual baseline) during exploit (left) and explore (right) choices, for short horizon (blue) and long horizon (red) conditions, in different value-gap choices. Exploratory choices were marked with somewhat larger pupil size, though not significantly so. In exploit decisions, pupil size decreased when uncertainty decreased (high value difference between the options) (<italic>p</italic> &#x003C;&#x202F;0.05) and was overall higher in short horizon choices than in long horizon choices (<italic>p</italic> &#x003C;&#x202F;0.05). Points indicate mean values and error bars indicate 95% bootstrap of the mean.</p>
</caption>
<graphic xlink:href="fpsyg-17-1752586-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Line graph with two panels compares centered pupil size by value difference for high value (exploit) and low value (explore) conditions. Two lines represent long (red) and short (blue) choice horizons, with error bars for each data point.</alt-text>
</graphic>
</fig>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Linear mixed-effects model results for pupil size by choice type.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Predictor</th>
<th align="center" valign="top">Coefficient &#x03B2;</th>
<th align="center" valign="top">Std. Error</th>
<th align="center" valign="top">CI 95%</th>
<th align="center" valign="top"><italic>p</italic> value</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top" colspan="5">Exploration (lower-value option)</td>
</tr>
<tr>
<td align="left" valign="top">Horizon</td>
<td align="center" valign="top">&#x2212;3.07</td>
<td align="center" valign="top">3.38</td>
<td align="center" valign="top">[&#x2212;9.69, 3.56]</td>
<td align="center" valign="top">0.364</td>
</tr>
<tr>
<td align="left" valign="top">Value gap</td>
<td align="center" valign="top">0.56</td>
<td align="center" valign="top">0.93</td>
<td align="center" valign="top">[&#x2212;1.25, 2.38]</td>
<td align="center" valign="top">0.542</td>
</tr>
<tr>
<td align="left" valign="top">Info gap</td>
<td align="center" valign="top">&#x2212;4.40</td>
<td align="center" valign="top">16.90</td>
<td align="center" valign="top">[&#x2212;37.56, 28.75]</td>
<td align="center" valign="top">0.794</td>
</tr>
<tr>
<td align="left" valign="top" colspan="5">Exploitation (higher-value option)</td>
</tr>
<tr>
<td align="left" valign="top">Horizon</td>
<td align="center" valign="top">&#x2212;5.63</td>
<td align="center" valign="top">2.51</td>
<td align="center" valign="top">[&#x2212;10.56, &#x2212;0.71]</td>
<td align="center" valign="top"><bold>0.025&#x002A;</bold></td>
</tr>
<tr>
<td align="left" valign="top">Value gap</td>
<td align="center" valign="top">&#x2212;1.58</td>
<td align="center" valign="top">0.68</td>
<td align="center" valign="top">[&#x2212;2.92, &#x2212;0.24]</td>
<td align="center" valign="top"><bold>0.021&#x002A;</bold></td>
</tr>
<tr>
<td align="left" valign="top">Info gap</td>
<td align="center" valign="top">8.31</td>
<td align="center" valign="top">12.56</td>
<td align="center" valign="top">[&#x2212;16.33, 32.94]</td>
<td align="center" valign="top">0.509</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Linear mixed-effects models predicting mean pupil size during the decision period, conducted separately for exploration (choosing the lower-value option) and exploitation (choosing the higher-value option). Significant effects (<italic>p</italic>&#x202F;&#x003C;&#x202F;0.05) are shown. Models included random intercepts for participants.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec10">
<title>Trial-by-trial binned pupil size</title>
<p>To identify the precise timing of pupillary effects, we conducted a binned - analysis of the pupil responses (i.e., phasic response) during the time leading to the decision point. To identify the precise timing of pupillary effects, we conducted a binned - analysis of the pupil responses (i.e., phasic response) during the time leading to the decision point. The group-level mean response time was 415.19&#x202F;ms (SD&#x202F;=&#x202F;122.66&#x202F;ms). The majority of responses (87%) occurred within 700&#x202F;ms prior to the choice, supporting the use of a 700-ms pre-response window for the time-bin analysis. Because many responses were faster, the number of observations contributing to each time bin was not uniform and decreased for bins farther from the response onset (see <xref rid="SM1" ref-type="supplementary-material">Supplementary materials</xref> for the RT distribution). Next, the 700 MS period preceding each motor response was segmented into seven successive 100&#x202F;ms bins, aligned to the response onset (from Bin &#x2212;6 to Bin 0).</p>
<p>Next, the 700 MS period preceding each motor response was segmented into seven successive 100&#x202F;ms bins, aligned to the response onset (from Bin &#x2212;6 to Bin 0). A separate linear mixed-effects model, similar to the one conducted for the average pupil size in the previous section, was fitted to all participants&#x2019; pupil msta within each time bin, with the same predictor structure as the tonic analysis (value gap, information gap, and choice horizon). This approach yielded a time course of beta (<italic>&#x03B2;</italic>) coefficients for each predictor, allowing us to identify when during the decision period each factor influenced pupil result. The full statistical tables for each bin are provided in the Supplementary Materials.</p>
<p>In addition to the beta coefficients, we examined model-based predicted pupil values (estimated marginal means) across time bins to facilitate interpretation of the direction and magnitude of pupil changes.</p>
<p>In <xref ref-type="fig" rid="fig3">Figure 3A</xref>, Value gap was negatively associated with pupil size across the decision window, with <italic>&#x03B2;</italic> coefficients consistently below zero. These negative values reached significance only in bin &#x2212;2 (<italic>&#x03B2;</italic>&#x202F;=&#x202F;&#x2212;1.20, <italic>p</italic>&#x202F;=&#x202F;0.05) and 0 (<italic>&#x03B2;</italic>&#x202F;=&#x202F;&#x2212;1.14, <italic>p</italic>&#x202F;=&#x202F;0.048), showing reduced pupil size when the absolute difference between option values was larger. These results do not support a phasic effect of value-gap on pupil size, but a weak and consistent negative effect, similar to the marginal effect observed in the average pupil size analysis.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p><bold>(A)</bold> Trial-by-trial analysis of pupil size across pre-decision time bins. The beta coefficients for predictors of pupil size in the 700&#x202F;ms preceding the first free-choice decision, analyzed across seven consecutive 100&#x202F;ms bins (Bin &#x2212;6 to Bin 0) aligned to the motor response. Beta coefficients from linear mixed-effects models are shown in circles (connected with lines) for value gap (blue), choice horizon (red) and information gap (green). Shaded areas represent the standard error (SE), &#x002A;<italic>p</italic> &#x003C;&#x202F;0.05, ^ marginal significance <italic>p</italic> &#x003C;&#x202F;0.08. <bold>(B)</bold> Model-based predicted pupil size (estimated marginal means) across the pre-decision time bins, separated for each condition (value-gap, information gap and choice horizon).</p>
</caption>
<graphic xlink:href="fpsyg-17-1752586-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Panel A shows three line graphs with shaded confidence intervals for value gap (red), information gap (green), and choice horizon (blue) displaying beta estimates over time bins; annotated markers indicate significant points. Panel B presents three corresponding line graphs showing predicted pupil size (EMM) by value gap (red, multiple levels), information gap (green, unequal and equal), and choice horizon (blue, short and long) over time bins, with changes and group differences annotated in respective colors.</alt-text>
</graphic>
</fig>
<p>Choice horizon was a significant positive predictor of pupil size at bin &#x2212;6 (<italic>&#x03B2;</italic>&#x202F;=&#x202F;51.32, <italic>p</italic>&#x202F;=&#x202F;0.016) and bin &#x2212;1 (<italic>&#x03B2;</italic>&#x202F;=&#x202F;21.80, <italic>p</italic>&#x202F;=&#x202F;0.039), with a marginally significant effect at bin 0 (<italic>&#x03B2;</italic>&#x202F;=&#x202F;18.25, <italic>p</italic>&#x202F;=&#x202F;0.08). These results indicate larger pupil size in the short-horizon condition (1 free choice) compared to the long-horizon condition (6 free choices). However, the effects did not manifest as a discrete phasic peak, but rather as a more sustained modulation across the decision window.</p>
<p>Information gap was not significantly associated with pupil size at any point in the decision window (all <italic>p</italic>&#x202F;&#x003E;&#x202F;0.05). Coefficients were consistently negative across bins (ranging from <italic>&#x03B2;</italic>&#x202F;=&#x202F;&#x2212;11.70 at bin &#x2212;6 to <italic>&#x03B2;</italic>&#x202F;=&#x202F;0.034 at bin 0), but none reached statistical significance. These findings indicate that sampling asymmetry (equal vs. unequal information) did not elicit reliable modulation of pupil-linked arousal, and no evidence for a phasic peak was observed.</p>
<p>The results from all time-bin analyses show that effects were somewhat more consistent immediately before the response, where they were similar in direction to the effects observed in the average pupil size analysis. This suggests that the observed pupil modulation reflects a sustained cognitive process leading up to the decision, rather than a discrete, transient response to an external stimulus. It also indicates that temporal dynamics along the time leading to decision may contribute to pupil size.</p>
<p>In <xref ref-type="fig" rid="fig3">Figure 3B</xref>, by using estimated marginal means, the predicted values clarify the direction and magnitude of pupil changes over time, demonstrating overall larger pupil size in the short-horizon condition relative to the long-horizon condition throughout the decision window. Alongside a gradual decrease in pupil size toward response execution. Importantly, this pattern confirms the absence of a discrete phasic peak and supports the interpretation of a sustained, context-dependent modulation of pupil size leading up to the decision.</p>
</sec>
</sec>
<sec sec-type="discussion" id="sec11">
<title>Discussion</title>
<p>The present investigation examined the relationship between pupillary responses and different types of control demands, associated with exploration and exploitation behavior. We used a decision-making task where the value of control was manipulated in different ways, including the level of uncertainty about the best option, the future consequences of the decision and the importance of current outcome. These allowed us to track subtle differences within exploration and exploitation decisions, and to examine how they bear common theories of task engagement. Behavioral analyses confirmed the importance of these factors in influencing participants&#x2019; choices. Participants adjusted their choices based on value uncertainty, consistently chose options with less prior information in asymmetrical-information conditions, and exhibited greater information-seeking behavior when more free choices remained (six vs. one). These patterns replicated <xref ref-type="bibr" rid="ref29">Wilson et al. (2014)</xref> findings and confirmed the effectiveness of the experimental manipulations in eliciting exploration behavior.</p>
<p>Pupillary responses, however, revealed a more selective pattern. In line with AGT predictions, pupil size was somewhat larger in exploratory choices, i.e., choices of lower value options, compared with exploitations. However, when examining patterns within each type of choice, we found that pupil size was affected by task parameters only during exploitative choices. Both choice horizon and value-gap significantly predicted average pupil size: pupils were larger when only one decision remained, reflecting how importance and value of control shape arousal, and when value differences in values were small, reflecting directed exploitation. This dissociation reveals that pupil-indexed arousal selectively tracks the strategic demand for control, beyond differentiating simply between exploitative and exploratory decisions.</p>
<p>These findings prompt a refinement of AGT&#x2019;s predictions regarding exploration and exploitation. AGT proposes that exploration engages elevated tonic arousal while exploitation engages transient phasic bursts (<xref ref-type="bibr" rid="ref1">Aston-Jones and Cohen, 2005</xref>; <xref ref-type="bibr" rid="ref14">Jepma and Nieuwenhuis, 2011</xref>). Our findings partially support this framework: we observed modulation of average (tonic) pupil size in exploratory and exploitative choices, consistent with AGT&#x2019;s emphasis on sustained arousal states. However, further task-related modulations were observed during exploitation rather than exploration, and specifically during high uncertainty, where exploitative decisions are still uncertain and provide information, similar to directed exploration. This suggests that exploration and exploitation control strategies should be defined according to task demands and not just based on choice value. Moreover, we did not observe discrete phasic peaks time-locked to decisions but instead sustained elevation throughout the decision period. This pattern is consistent with the view that pupil signals reflect context-dependent integration of multiple neuromodulatory and cognitive processes, rather than a one-to-one mapping onto phasic versus tonic LC activity (<xref ref-type="bibr" rid="ref15">Joshi and Gold, 2020</xref>). Notably this stable temporal profile contrasts with the discrete phasic bursts observed in visual- attentional tasks (<xref ref-type="bibr" rid="ref11">Gabay et al., 2011</xref>; <xref ref-type="bibr" rid="ref24">Preuschoff et al., 2011</xref>) and may reflect prolonged evaluative processing under strategic constraints, as was tested in early study in working memory domain (<xref ref-type="bibr" rid="ref13">Granholm et al., 1996</xref>).</p>
<p>The absence of pupil modulation during exploration warrants consideration alongside prior evidence. <xref ref-type="bibr" rid="ref17">Kozunova et al. (2022)</xref> found that directed exploration elicited larger pupil dilations compared to exploitation. Critically, their operationalization involved deliberately choosing a known low-payoff option after learning, thereby overriding an established exploitation bias- a process likely engaging conflict-driven control (<xref ref-type="bibr" rid="ref27">Shenhav et al., 2013</xref>). In contrast, the Horizon Task manipulates environmental factors that promote exploration without imposing strong conflict or immediate costs. Participants can explore with relatively low cost, particularly when horizons are long and future opportunities remain available (<xref ref-type="bibr" rid="ref29">Wilson et al., 2014</xref>; <xref ref-type="bibr" rid="ref30">Zajkowski et al., 2017</xref>). This difference in the way exploration is elicited by the task may explain why we observed no pupil modulation during exploration despite systematic exploration strategies, as pupil-linked arousal in such contexts may primarily reflect uncertainty at the level of the environment rather than decision-specific control demands (<xref ref-type="bibr" rid="ref8">Fan et al., 2023</xref>).</p>
<p>This interpretation aligns with the Expected Value of Control (EVC) framework (<xref ref-type="bibr" rid="ref27">Shenhav et al., 2013</xref>), which proposes that control is allocated when anticipated benefits outweigh costs. Exploitation under short horizons represents high-importance decisions where errors cannot be corrected, and exploitation with small value differences requires enhanced processing for accurate discrimination. Both conditions increase the expected value of control, warranting greater resource investment. Importantly, pupillary responses have been shown to index the amount of cognitive effort actually invested, rather than objective task difficulty per se, highlighting the distinction between task demands and effortful control allocation (<xref ref-type="bibr" rid="ref28">van der Wel and van Steenbergen, 2018</xref>). This finding is also consistent with resource-rational principles (<xref ref-type="bibr" rid="ref18">Lieder and Griffiths, 2020</xref>), which argue that people allocate cognitive resources only when the computational costs are justified. Our current adapted paradigm, characterized by relatively low-cost exploration; initial uncertainty is reduced by forced trials and the stakes of any single choice are low. This creates a condition where exploration is not computationally demanding. Thus, the absence of pupil modulation during exploration may not indicate a failure of strategic processing, but rather an efficient adaptation where costly cognitive control was unnecessary. This account is also in line with recent works that similarly shown that pupillary responses track the efficacy and reward value of control deployment (<xref ref-type="bibr" rid="ref10">Fr&#x00F6;mer et al., 2021</xref>) and vary systematically with task demands (<xref ref-type="bibr" rid="ref23">Nassar et al., 2012</xref>).</p>
<p>Our findings should be interpreted in light of several limitations. The Horizon Task structure provides initial outcome information via forced trials, reducing baseline uncertainty and potentially lowering the strategic demands of directed exploration relative to paradigms requiring information-seeking under complete uncertainty. This limits generalizability to contexts involving substantial opportunity costs or cognitive conflict. Additionally, pupillometry provides an indirect measure of arousal, and individual differences in exploration strategies and cognitive control capacity were not examined. Future work combining pupil measurements with direct neural recordings (e.g., fMRI) across diverse decision context, particularly those where exploration and exploitation vary in strategic importance beyond immediate outcomes- will clarify how pupil-linked arousal indexes context-dependent control allocation. Moreover, from a modeling perspective, although we evaluated more complex random-effects structures, models including random slopes did not converge reliably, likely reflecting overparameterization relative to the experimental design and the limited number of levels per predictor (<xref ref-type="bibr" rid="ref2">Bates et al., 2015</xref>). We therefore report parsimonious models with random intercepts for participants, highlighting the need for future studies with greater within-subject variability to more fully characterize individual differences in context-dependent control demands.</p>
<p>To conclude, our results provide novel insights into how different styles of control shape pupil-indexed arousal, integrating AGT and EVC theoretical frameworks. We demonstrate that arousal does not uniformly distinguish exploration from exploitation but instead tracks contextual factors determining when control is strategically warranted.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec12">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found here: <ext-link xlink:href="https://osf.io/dq3ke/overview?view_only=a3c445dac8bd425fbd114f0047fb1d7a" ext-link-type="uri">https://osf.io/dq3ke/overview?view_only=a3c445dac8bd425fbd114f0047fb1d7a</ext-link>.</p>
</sec>
<sec sec-type="ethics-statement" id="sec13">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Ethics Committee &#x2013; Institutional Review Board Haifa University. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec sec-type="author-contributions" id="sec14">
<title>Author contributions</title>
<p>GB: Formal analysis, Writing &#x2013; original draft, Methodology, Investigation, Visualization, Conceptualization. SG: Visualization, Supervision, Formal analysis, Validation, Methodology, Writing &#x2013; review &#x0026; editing, Conceptualization. UH: Methodology, Visualization, Conceptualization, Writing &#x2013; review &#x0026; editing, Investigation, Validation, Supervision.</p>
</sec>
<sec sec-type="COI-statement" id="sec15">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec16">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec17">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec18">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fpsyg.2026.1752586/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fpsyg.2026.1752586/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Aston-Jones</surname><given-names>G.</given-names></name> <name><surname>Cohen</surname><given-names>J. D.</given-names></name></person-group> (<year>2005</year>). <article-title>An integrative theory of locus coeruleus-norepinephrine function: adaptive gain and optimal performance</article-title>. <source>Annu. Rev. Neurosci.</source> <volume>28</volume>, <fpage>403</fpage>&#x2013;<lpage>450</lpage>. doi: <pub-id pub-id-type="doi">10.1146/annurev.neuro.28.061604.135709</pub-id></mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bates</surname><given-names>D.</given-names></name> <name><surname>Kliegl</surname><given-names>R.</given-names></name> <name><surname>Vasishth</surname><given-names>S.</given-names></name> <name><surname>Baayen</surname><given-names>H.</given-names></name></person-group> (<year>2015</year>). <article-title>Parsimonious mixed models</article-title>. <source>arXiv preprint arXiv</source>:<fpage>1506.04967</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1506.04967</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Berger-Tal</surname><given-names>O.</given-names></name> <name><surname>Nathan</surname><given-names>J.</given-names></name> <name><surname>Meron</surname><given-names>E.</given-names></name> <name><surname>Saltz</surname><given-names>D.</given-names></name></person-group> (<year>2014</year>). <article-title>The exploration-exploitation dilemma: a multidisciplinary framework</article-title>. <source>PLoS One</source> <volume>9</volume>:<fpage>e95693</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0095693</pub-id>, <pub-id pub-id-type="pmid">24756026</pub-id></mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cogliati Dezza</surname><given-names>I.</given-names></name> <name><surname>Yu</surname><given-names>A. J.</given-names></name> <name><surname>Cleeremans</surname><given-names>A.</given-names></name> <name><surname>Alexander</surname><given-names>W.</given-names></name></person-group> (<year>2017</year>). <article-title>Learning the value of information and reward over time when solving exploration-exploitation problems</article-title>. <source>Sci. Rep.</source> <volume>7</volume>:<fpage>16919</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-017-17237-w</pub-id>, <pub-id pub-id-type="pmid">29209058</pub-id></mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Cohen</surname><given-names>J.</given-names></name></person-group> (<year>2013</year>). <source>Statistical power analysis for the behavioral sciences</source>. New York, NY: <publisher-name>Routledge</publisher-name>.</mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cohen</surname><given-names>J. D.</given-names></name> <name><surname>McClure</surname><given-names>S. M.</given-names></name> <name><surname>Yu</surname><given-names>A. J.</given-names></name></person-group> (<year>2007</year>). <article-title>Should I stay or should I go? How the human brain manages the trade-off between exploitation and exploration</article-title>. <source>Philos. Trans. R. Soc. Lond. Ser. B Biol. Sci.</source> <volume>362</volume>, <fpage>933</fpage>&#x2013;<lpage>942</lpage>. doi: <pub-id pub-id-type="doi">10.1098/rstb.2007.2098</pub-id>, <pub-id pub-id-type="pmid">17395573</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dubois</surname><given-names>M.</given-names></name> <name><surname>Hauser</surname><given-names>T. U.</given-names></name></person-group> (<year>2022</year>). <article-title>Value-free random exploration is linked to impulsivity</article-title>. <source>Nat. Commun.</source> <volume>13</volume>:<fpage>4542</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-022-31918-9</pub-id>, <pub-id pub-id-type="pmid">35927257</pub-id></mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fan</surname><given-names>H.</given-names></name> <name><surname>Burke</surname><given-names>T.</given-names></name> <name><surname>Sambrano</surname><given-names>D. C.</given-names></name> <name><surname>Dial</surname><given-names>E.</given-names></name> <name><surname>Phelps</surname><given-names>E. A.</given-names></name> <name><surname>Gershman</surname><given-names>S. J.</given-names></name></person-group> (<year>2023</year>). <article-title>Pupil size encodes uncertainty during exploration</article-title>. <source>J. Cognitive Neurosci.</source> <volume>35</volume>, <fpage>1508</fpage>&#x2013;<lpage>1520</lpage>. doi: <pub-id pub-id-type="doi">10.1162/jocn_a_02025</pub-id>, <pub-id pub-id-type="pmid">37382476</pub-id></mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Faul</surname><given-names>F.</given-names></name> <name><surname>Erdfelder</surname><given-names>E.</given-names></name> <name><surname>Buchner</surname><given-names>A.</given-names></name> <name><surname>Lang</surname><given-names>A. G.</given-names></name></person-group> (<year>2009</year>). <article-title>Statistical power analyses using G&#x002A; power 3.1: tests for correlation and regression analyses</article-title>. <source>Behav. Res. Methods</source> <volume>41</volume>, <fpage>1149</fpage>&#x2013;<lpage>1160</lpage>. doi: <pub-id pub-id-type="doi">10.3758/BRM.41.4.1149</pub-id>, <pub-id pub-id-type="pmid">19897823</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fr&#x00F6;mer</surname><given-names>R.</given-names></name> <name><surname>Lin</surname><given-names>H.</given-names></name> <name><surname>Dean Wolf</surname><given-names>C. K.</given-names></name> <name><surname>Inzlicht</surname><given-names>M.</given-names></name> <name><surname>Shenhav</surname><given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>Expectations of reward and efficacy guide cognitive control allocation</article-title>. <source>Nat. Commun.</source> <volume>12</volume>:<fpage>1030</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-021-21315-z</pub-id>, <pub-id pub-id-type="pmid">33589626</pub-id></mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gabay</surname><given-names>S.</given-names></name> <name><surname>Pertzov</surname><given-names>Y.</given-names></name> <name><surname>Henik</surname><given-names>A.</given-names></name></person-group> (<year>2011</year>). <article-title>Orienting of attention, pupil size, and the norepinephrine system</article-title>. <source>Atten. Percept. Psychophys.</source> <volume>73</volume>, <fpage>123</fpage>&#x2013;<lpage>129</lpage>. doi: <pub-id pub-id-type="doi">10.3758/s13414-010-0015-4</pub-id>, <pub-id pub-id-type="pmid">21258914</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gershman</surname><given-names>S. J.</given-names></name></person-group> (<year>2019</year>). <article-title>Uncertainty and exploration</article-title>. <source>Decision</source> <volume>6</volume>, <fpage>277</fpage>&#x2013;<lpage>286</lpage>. doi: <pub-id pub-id-type="doi">10.1037/dec0000101</pub-id>, <pub-id pub-id-type="pmid">33768122</pub-id></mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Granholm</surname><given-names>E.</given-names></name> <name><surname>Asarnow</surname><given-names>R. F.</given-names></name> <name><surname>Sarkin</surname><given-names>A. J.</given-names></name> <name><surname>Dykes</surname><given-names>K. L.</given-names></name></person-group> (<year>1996</year>). <article-title>Pupillary responses index cognitive resource limitations</article-title>. <source>Psychophysiology</source> <volume>33</volume>, <fpage>457</fpage>&#x2013;<lpage>461</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1469-8986.1996.tb01071.x</pub-id>, <pub-id pub-id-type="pmid">8753946</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jepma</surname><given-names>M.</given-names></name> <name><surname>Nieuwenhuis</surname><given-names>S.</given-names></name></person-group> (<year>2011</year>). <article-title>Pupil diameter predicts changes in the exploration&#x2013; exploitation trade-off: evidence for the adaptive gain theory</article-title>. <source>J. Cognitive Neurosci.</source> <volume>23</volume>, <fpage>1587</fpage>&#x2013;<lpage>1596</lpage>. doi: <pub-id pub-id-type="doi">10.1162/jocn.2010.21548</pub-id>, <pub-id pub-id-type="pmid">20666595</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Joshi</surname><given-names>S.</given-names></name> <name><surname>Gold</surname><given-names>J. I.</given-names></name></person-group> (<year>2020</year>). <article-title>Pupil size as a window on neural substrates of cognition</article-title>. <source>Trends Cogn. Sci.</source> <volume>24</volume>, <fpage>466</fpage>&#x2013;<lpage>480</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.tics.2020.03.005</pub-id>, <pub-id pub-id-type="pmid">32331857</pub-id></mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Joshi</surname><given-names>S.</given-names></name> <name><surname>Li</surname><given-names>Y.</given-names></name> <name><surname>Kalwani</surname><given-names>R. M.</given-names></name> <name><surname>Gold</surname><given-names>J. I.</given-names></name></person-group> (<year>2016</year>). <article-title>Relationships between pupil diameter and neuronal activity in the locus coeruleus, colliculi, and cingulate cortex</article-title>. <source>Neuron</source> <volume>89</volume>, <fpage>221</fpage>&#x2013;<lpage>234</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2015.11.028</pub-id>, <pub-id pub-id-type="pmid">26711118</pub-id></mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kozunova</surname><given-names>G. L.</given-names></name> <name><surname>Sayfulina</surname><given-names>K. E.</given-names></name> <name><surname>Prokofyev</surname><given-names>A. O.</given-names></name> <name><surname>Medvedev</surname><given-names>V. A.</given-names></name> <name><surname>Rytikova</surname><given-names>A. M.</given-names></name> <name><surname>Stroganova</surname><given-names>T. A.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Pupil dilation and response slowing distinguish deliberate explorative choices in the probabilistic learning task</article-title>. <source>Cognitive Affective Behav. Neurosci.</source> <volume>22</volume>, <fpage>1108</fpage>&#x2013;<lpage>1129</lpage>. doi: <pub-id pub-id-type="doi">10.3758/s13415-022-00996-z</pub-id>, <pub-id pub-id-type="pmid">35359274</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lieder</surname><given-names>F.</given-names></name> <name><surname>Griffiths</surname><given-names>T. L.</given-names></name></person-group> (<year>2020</year>). <article-title>Resource-rational analysis: understanding human cognition as the optimal use of limited computational resources</article-title>. <source>Behav. Brain Sci.</source> <volume>43</volume>:<fpage>e1</fpage>. doi: <pub-id pub-id-type="doi">10.1017/S0140525X1900061X</pub-id></mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mehlhorn</surname><given-names>K.</given-names></name> <name><surname>Newell</surname><given-names>B. R.</given-names></name> <name><surname>Todd</surname><given-names>P. M.</given-names></name> <name><surname>Lee</surname><given-names>M. D.</given-names></name> <name><surname>Morgan</surname><given-names>K.</given-names></name> <name><surname>Braithwaite</surname><given-names>V. A.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title>Unpacking the exploration&#x2013;exploitation tradeoff: a synthesis of human and animal literatures</article-title>. <source>Decision</source> <volume>2</volume>:<fpage>191</fpage>. doi: <pub-id pub-id-type="doi">10.1037/dec0000033</pub-id></mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Monk</surname><given-names>C. T.</given-names></name> <name><surname>Barbier</surname><given-names>M.</given-names></name> <name><surname>Romanczuk</surname><given-names>P.</given-names></name> <name><surname>Watson</surname><given-names>J. R.</given-names></name> <name><surname>Al&#x00F3;s</surname><given-names>J.</given-names></name> <name><surname>Nakayama</surname><given-names>S.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>How ecology shapes exploitation: a framework to predict the behavioural response of human and animal foragers along exploration-exploitation trade-offs</article-title>. <source>Ecol. Lett.</source> <volume>21</volume>, <fpage>779</fpage>&#x2013;<lpage>793</lpage>. doi: <pub-id pub-id-type="doi">10.1111/ele.12949</pub-id>, <pub-id pub-id-type="pmid">29611278</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Murphy</surname><given-names>P. R.</given-names></name> <name><surname>O'connell</surname><given-names>R. G.</given-names></name> <name><surname>O'sullivan</surname><given-names>M.</given-names></name> <name><surname>Robertson</surname><given-names>I. H.</given-names></name> <name><surname>Balsters</surname><given-names>J. H.</given-names></name></person-group> (<year>2014</year>). <article-title>Pupil diameter covaries with BOLD activity in human locus coeruleus</article-title>. <source>Human Brain Mapping</source> <volume>35</volume>, <fpage>4140</fpage>&#x2013;<lpage>4154</lpage>. doi: <pub-id pub-id-type="doi">10.1002/hbm.22466</pub-id>, <pub-id pub-id-type="pmid">24510607</pub-id></mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Musslick</surname><given-names>S.</given-names></name> <name><surname>Shenhav</surname><given-names>A.</given-names></name> <name><surname>Botvinick</surname><given-names>M. M.</given-names></name> <name><surname>Cohen</surname><given-names>J. D.</given-names></name></person-group> (<year>2015</year>). <source>A computational model of control allocation based on the expected value of control. In reinforcement learning and decision making conference</source>, (Edmonton, Canada) <fpage>7</fpage>&#x2013;<lpage>10</lpage>.</mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nassar</surname><given-names>M. R.</given-names></name> <name><surname>Rumsey</surname><given-names>K. M.</given-names></name> <name><surname>Wilson</surname><given-names>R. C.</given-names></name> <name><surname>Parikh</surname><given-names>K.</given-names></name> <name><surname>Heasly</surname><given-names>B.</given-names></name> <name><surname>Gold</surname><given-names>J. I.</given-names></name></person-group> (<year>2012</year>). <article-title>Rational regulation of learning dynamics by pupil-linked arousal systems</article-title>. <source>Nature Neurosci.</source> <volume>15</volume>, <fpage>1040</fpage>&#x2013;<lpage>1046</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nn.3130</pub-id>, <pub-id pub-id-type="pmid">22660479</pub-id></mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Preuschoff</surname><given-names>K.</given-names></name><collab id="coll1">t Hart, B. M</collab><name><surname>Einh&#x00E4;user</surname><given-names>W.</given-names></name></person-group> (<year>2011</year>). <article-title>Pupil dilation signals surprise: evidence for noradrenaline&#x2019;s role in decision making</article-title>. <source>Front. Neurosci.</source> <volume>5</volume>:<fpage>115</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fnins.2011.00115</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rich</surname><given-names>A. S.</given-names></name> <name><surname>Gureckis</surname><given-names>T. M.</given-names></name></person-group> (<year>2018</year>). <article-title>The limits of learning: exploration, generalization, and the development of learning traps</article-title>. <source>J. Experi. Psychol. General</source> <volume>147</volume>, <fpage>1553</fpage>&#x2013;<lpage>1570</lpage>. doi: <pub-id pub-id-type="doi">10.1037/xge0000466</pub-id>, <pub-id pub-id-type="pmid">30247058</pub-id></mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sadeghiyeh</surname><given-names>H.</given-names></name> <name><surname>Wang</surname><given-names>S.</given-names></name> <name><surname>Alberhasky</surname><given-names>M. R.</given-names></name> <name><surname>Kyllo</surname><given-names>H. M.</given-names></name> <name><surname>Shenhav</surname><given-names>A.</given-names></name> <name><surname>Wilson</surname><given-names>R. C.</given-names></name></person-group> (<year>2020</year>). <article-title>Temporal discounting correlates with directed exploration but not with random exploration</article-title>. <source>Sci. Rep.</source> <volume>10</volume>:<fpage>4020</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-020-60576-4</pub-id>, <pub-id pub-id-type="pmid">32132573</pub-id></mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shenhav</surname><given-names>A.</given-names></name> <name><surname>Botvinick</surname><given-names>M. M.</given-names></name> <name><surname>Cohen</surname><given-names>J. D.</given-names></name></person-group> (<year>2013</year>). <article-title>The expected value of control: an integrative theory of anterior cingulate cortex function</article-title>. <source>Neuron</source> <volume>79</volume>, <fpage>217</fpage>&#x2013;<lpage>240</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2013.07.007</pub-id>, <pub-id pub-id-type="pmid">23889930</pub-id></mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van der Wel</surname><given-names>P.</given-names></name> <name><surname>van Steenbergen</surname><given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>Pupil dilation as an index of effort in cognitive control tasks: a review</article-title>. <source>Psychon. Bull. Rev.</source> <volume>25</volume>, <fpage>2005</fpage>&#x2013;<lpage>2015</lpage>. doi: <pub-id pub-id-type="doi">10.3758/s13423-018-1432-y</pub-id>, <pub-id pub-id-type="pmid">29435963</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wilson</surname><given-names>R. C.</given-names></name> <name><surname>Geana</surname><given-names>A.</given-names></name> <name><surname>White</surname><given-names>J. M.</given-names></name> <name><surname>Ludvig</surname><given-names>E. A.</given-names></name> <name><surname>Cohen</surname><given-names>J. D.</given-names></name></person-group> (<year>2014</year>). <article-title>Humans use directed and random exploration to solve the explore&#x2013;exploit dilemma</article-title>. <source>J. Experimental Psychol. General</source> <volume>143</volume>, <fpage>2074</fpage>&#x2013;<lpage>2081</lpage>. doi: <pub-id pub-id-type="doi">10.1037/a0038199</pub-id>, <pub-id pub-id-type="pmid">25347535</pub-id></mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zajkowski</surname><given-names>W. K.</given-names></name> <name><surname>Kossut</surname><given-names>M.</given-names></name> <name><surname>Wilson</surname><given-names>R. C.</given-names></name></person-group> (<year>2017</year>). <article-title>A causal role for right frontopolar cortex in directed, but not random, exploration</article-title>. <source>eLife</source> <volume>6</volume>:<fpage>e27430</fpage>. doi: <pub-id pub-id-type="doi">10.7554/eLife.27430</pub-id>, <pub-id pub-id-type="pmid">28914605</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/95138/overview">Chris Baber</ext-link>, University of Birmingham, United Kingdom</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/519518/overview">Qing-Wei Chen</ext-link>, South China Normal University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/25125/overview">Joseph M. Orr</ext-link>, Texas A&#x0026;M University, United States</p>
</fn>
</fn-group>
</back>
</article>