<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Robot. AI</journal-id>
<journal-title>Frontiers in Robotics and AI</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Robot. AI</abbrev-journal-title>
<issn pub-type="epub">2296-9144</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1621033</article-id>
<article-id pub-id-type="doi">10.3389/frobt.2025.1621033</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Robotics and AI</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Large language model-driven natural language interaction control framework for single-operator bimanual teleoperation</article-title>
<alt-title alt-title-type="left-running-head">Fei et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frobt.2025.1621033">10.3389/frobt.2025.1621033</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Fei</surname>
<given-names>Haolin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2188253/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xue</surname>
<given-names>Tao</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>He</surname>
<given-names>Yiyang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lin</surname>
<given-names>Sheng</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Du</surname>
<given-names>Guanglong</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/634072/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Guo</surname>
<given-names>Yao</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1647788/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Ziwei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1350326/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>School of Engineering</institution>, <institution>Lancaster University</institution>, <addr-line>Lancaster</addr-line>, <country>United Kingdom</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Automation</institution>, <institution>Tsinghua University</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>School of Mechanical Engineering</institution>, <institution>Dalian Jiaotong University</institution>, <addr-line>Dalian</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>School of Computer Science and Engineering</institution>, <institution>South China University of Technology</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>School of Biomedical Engineering</institution>, <institution>Shanghai Jiao Tong University</institution>, <addr-line>Shanghai</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2802377/overview">Weiyong Si</ext-link>, University of Essex, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1724926/overview">Mengchao Ma</ext-link>, Hefei University of Technology, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3053104/overview">Mng Chen</ext-link>, Zhejiang University of Technology, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Ziwei Wang, <email>z.wang82@lancaster.ac.uk</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>17</day>
<month>07</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1621033</elocation-id>
<history>
<date date-type="received">
<day>30</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>06</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Fei, Xue, He, Lin, Du, Guo and Wang.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Fei, Xue, He, Lin, Du, Guo and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Bimanual teleoperation imposes cognitive and coordination demands on a single human operator tasked with simultaneously controlling two robotic arms. Although assigning each arm to a separate operator can distribute workload, it often leads to ambiguities in decision authority and degrades overall efficiency. To overcome these challenges, we propose a novel bimanual teleoperation large language model assistant (BTLA) framework, an intelligent co-pilot that augments a single operator&#x2019;s motor control capabilities. In particular, BTLA enables operators to directly control one robotic arm through conventional teleoperation while directing a second assistive arm via simple voice commands, and therefore commanding two robotic arms simultaneously. By integrating the GPT-3.5-turbo model, BTLA interprets contextual voice instructions and autonomously selects among six predefined manipulation skills, including real-time mirroring, trajectory following, and autonomous object grasping. Experimental evaluations in bimanual object manipulation tasks demonstrate that BTLA increased task coverage by 76.1<inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and success rate by 240.8<inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> relative to solo teleoperation, and outperformed dyadic control with a 19.4<inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> gain in coverage and a 69.9<inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> gain in success. Furthermore, NASA Task Load Index (NASA-TLX) assessments revealed a 38&#x2013;52<inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> reduction in operator mental workload, and 85<inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> of participants rated the voice-based interaction as &#x201c;natural&#x201d; and &#x201c;highly effective.&#x201d;</p>
</abstract>
<kwd-group>
<kwd>human-robot collaboration</kwd>
<kwd>teleoperation</kwd>
<kwd>bimanual manipulation</kwd>
<kwd>embodied AI</kwd>
<kwd>large language model (LLM)</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Intelligence in Robotics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Teleoperation has emerged as a pivotal technology for controlling robotic systems in hazardous or inaccessible environments while prioritizing human safety (<xref ref-type="bibr" rid="B29">Moniruzzaman et al., 2022</xref>; <xref ref-type="bibr" rid="B18">Huang et al., 2022</xref>). It has been widely applied in space rendezvous and docking (<xref ref-type="bibr" rid="B43">Zhang et al., 2017</xref>; <xref ref-type="bibr" rid="B40">Wang et al., 2021</xref>), underwater exploration (<xref ref-type="bibr" rid="B38">Sun et al., 2023</xref>), and remote surgery (<xref ref-type="bibr" rid="B2">Bacha et al., 2022</xref>; <xref ref-type="bibr" rid="B6">Boehm et al., 2021</xref>). To meet the demands of these scenarios, dual-arm robotic teleoperation has gained prominence as a robust solution for executing complex tasks that require enhanced dexterity (<xref ref-type="bibr" rid="B6">Boehm et al., 2021</xref>; <xref ref-type="bibr" rid="B4">Bai et al., 2021</xref>). Unlike single-arm systems, dual-arm configurations offer superior maneuverability, increased stability, and the ability to perform asymmetric operations (<xref ref-type="bibr" rid="B18">Huang et al., 2022</xref>; <xref ref-type="bibr" rid="B41">Wu et al., 2019</xref>).</p>
<p>Single-person bimanual (a single operator controlling dual robotic arms) and dyad teleoperation (two operators collaboratively controlling one arm each) represent the predominant paradigms for dual-arm robotic systems. In terms of single-person teleoperation, human control performance is sensitive to hardware design ergonomics, cognitive load, and task complexity (<xref ref-type="bibr" rid="B14">Guo et al., 2022</xref>). The operator needs to simultaneously manage the motion and coordination of two robotic arms, which can lead to increased mental workload and reduced performance (<xref ref-type="bibr" rid="B3">Bai et al., 2022</xref>). Regarding dyad teleoperation, human-human communication, synchronization, and control mechanism design remain challenging in ensuring intuitive collaboration and avoiding arbitration conflict among humans (<xref ref-type="bibr" rid="B13">Gowrishankar et al., 2014</xref>; <xref ref-type="bibr" rid="B20">Huang Z. et al., 2021</xref>; <xref ref-type="bibr" rid="B27">Li et al., 2022</xref>). Thus, dual-arm teleoperation performance can benefit from sensory feedback, motor control, and decision-making assistance as needed. For instance, with the shared mechanism, operators can focus on performing partial tasks while the assistive agent manages the remaining (<xref ref-type="bibr" rid="B17">Hu Z. J. et al., 2023</xref>; <xref ref-type="bibr" rid="B39">Wang et al., 2024</xref>). However, existing assistance systems tend to be task-dependent or rigidly structured with fixed autonomy levels. This limits their adaptability across different scenarios and operator preferences (<xref ref-type="bibr" rid="B10">Clark et al., 2019</xref>; <xref ref-type="bibr" rid="B19">Huang Y. et al., 2021</xref>; <xref ref-type="bibr" rid="B32">Sena et al., 2021</xref>). These systems may struggle to handle dynamic environments or adapt to new tasks without significantly modifying the control system. Additionally, the interface between the operator and the assistive system often requires specialized training or relies on pre-programmed commands that may not be natural to users.</p>
<p>To address these challenges, we incorporate a large language model (LLM) into a bimanual teleoperation framework (i.e., BTLA), which combines natural language interaction with variable autonomy to support single-operator dual-arm teleoperation. BTLA enables the command of the second assistive arm through natural voice instructions. The system utilizes LLMs to interpret operator intent and select the most suitable assistance mode from a set of core manipulation skills. The main contributions of our work are summarized as follows:<list list-type="simple">
<list-item>
<p>1. A flexible assistance system that enables natural language control of a secondary robotic arm during bimanual teleoperation, reducing operator cognitive load while maintaining task effectiveness.</p>
</list-item>
<list-item>
<p>2. Integration of LLMs for robust natural language understanding in robotic control, allowing operators to command complex manipulation skills through intuitive voice instructions.</p>
</list-item>
<list-item>
<p>3. Comprehensive experimental evaluation demonstrating significant improvements in task performance and reduction in operator workload compared to single-operator and dyadic teleoperation.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2">
<title>2 Related works</title>
<p>Dual-arm teleoperation architecture can be generally categorized into two main categories: single-person bimanual (SPB) teleoperation and dual-human, dual-arm (dyadic) teleoperation. The SPB teleoperation often leads to a high mental workload for the operator, as they must manage the coordination and motion of two robotic arms in real time (<xref ref-type="bibr" rid="B33">Shao et al., 2020</xref>). For dyadic teleoperation, two operators collaboratively control the robotic arms, theoretically leveraging their combined expertise and cognitive capabilities (<xref ref-type="bibr" rid="B30">Noohi et al., 2016</xref>). developed a fundamental model for computing interaction forces during dyadic cooperative manipulation tasks. Interestingly (<xref ref-type="bibr" rid="B8">Che et al., 2016</xref>), found that dyadic collaboration doesn&#x2019;t necessarily improve performance over individual control in teleoperation environments, highlighting the complexities of human-human coordination in robotic control (<xref ref-type="bibr" rid="B21">Kropiv&#x161;ek Leskovar et al., 2021</xref>). further investigated these dynamics by examining leader-follower relationships in human dyads during collaborative tasks, providing valuable insights into role allocation strategies. To address the challenges of coordination between operators (<xref ref-type="bibr" rid="B23">Li et al., 2023a</xref>), proposed a flexible system capable of dynamically switching between different control architectures and controllers during operation. Two additional routes have been widely adopted to overcome the above obstacle: (i) developing more intuitive control interfaces, and (ii) designing control assistance algorithms. Intuitive human-machine interfaces aim to provide operators with natural sensations and user-friendly means of controlling multiple-arm robots (<xref ref-type="bibr" rid="B9">Cheng et al., 2023</xref>). Various interface technologies have been proposed, such as gesture-based interfaces (<xref ref-type="bibr" rid="B6">Boehm et al., 2021</xref>), virtual reality-based interfaces (<xref ref-type="bibr" rid="B12">Garc&#xed;a et al., 2022</xref>), and haptic devices (<xref ref-type="bibr" rid="B31">Rakita et al., 2019</xref>; <xref ref-type="bibr" rid="B24">Li et al., 2023b</xref>), reducing the cognitive burden associated with traditional control methods. Additionally, haptic feedback algorithms (<xref ref-type="bibr" rid="B36">Soyguder and Abut, 2016</xref>; <xref ref-type="bibr" rid="B7">Cavusoglu et al., 2002</xref>; <xref ref-type="bibr" rid="B46">Zhou et al., 2021</xref>) have been proposed to provide force feedback to the operator, enhancing their situational awareness and control precision. Control assistance algorithms, on the other hand, focus on developing intelligent strategies to assist the operator in managing the dual-arm system, including mapping strategies that translate human input into efficient and coordinated robot motions. Shared control approaches (<xref ref-type="bibr" rid="B44">Zheng H. et al., 2024</xref>; <xref ref-type="bibr" rid="B18">Huang et al., 2022</xref>; <xref ref-type="bibr" rid="B22">Laghi et al., 2018</xref>; <xref ref-type="bibr" rid="B37">Sun et al., 2020</xref>; <xref ref-type="bibr" rid="B20">Huang Z. et al., 2021</xref>; <xref ref-type="bibr" rid="B34">Shi et al., 2024</xref>) have been introduced to combine human input with autonomous robot behaviors, assisting the operator in dual-arm manipulation tasks. Recent taxonomies have provided valuable frameworks for understanding shared control in teleoperation (<xref ref-type="bibr" rid="B25">Li et al., 2023c</xref>). classified shared control strategies into semi-autonomous control (SAC), state-guidance shared control, and state-fusion shared control (SFSC) based on human-autonomy interaction patterns. While developed for single-arm systems, these concepts parallel our approach&#x2014;our system implements SAC-like behavior during autonomous operations and SFSC-like behavior during mirroring tasks, but extends these principles to address the unique coordination challenges of bimanual manipulation.</p>
<p>LLM-based methods have shown promising results in enhancing interactive capabilities of robotic systems (<xref ref-type="bibr" rid="B42">Zha et al., 2023</xref>; <xref ref-type="bibr" rid="B11">Cui et al., 2024</xref>; <xref ref-type="bibr" rid="B35">Singh et al., 2023</xref>). These methods leverage the strong understanding of the real world inherent in LLMs/VLMs to perform high-level planning using image cues. The planned tasks are then executed by calling upon lower-level knowledge bases for automation (<xref ref-type="bibr" rid="B16">Hu Y. et al., 2023</xref>; <xref ref-type="bibr" rid="B42">Zha et al., 2023</xref>; <xref ref-type="bibr" rid="B26">Li et al., 2024</xref>; <xref ref-type="bibr" rid="B45">Zheng Y. et al., 2024</xref>; <xref ref-type="bibr" rid="B28">Lin et al., 2024</xref>), allowing for more flexibility and adaptation to handle various tasks and environments. However, these LLM-based methods may not be ideal for multi-contact teleoperation and physical interaction. Object grasping and manipulation in complicated or dynamic environments may be more suitable for human operators due to their intuitive understanding of the task and the ability to adapt quickly to minor variations (<xref ref-type="bibr" rid="B1">Akinola et al., 2021</xref>; <xref ref-type="bibr" rid="B5">Balasubramanian et al., 2010</xref>). In such situations, the overhead of using an LLM for planning and automation may not justify the potential benefits. Instead of tasking the LLM with context understanding and decision-making, our approach leverages the human operator&#x2019;s expertise in these areas. We utilize the LLM as a human-robot interface, concentrating on its core strength of natural language processing to effectively convey human intentions.</p>
</sec>
<sec sec-type="methods" id="s3">
<title>3 Methodology</title>
<p>We first provide the formulation of the bimanual teleoperation problem in <xref ref-type="sec" rid="s3-1">Section 3.1</xref>. Subsequently, we present in <xref ref-type="sec" rid="s3-2">Section 3.2</xref> how BTLA utilizes LLM to assist humans in bimanual teleoperation tasks.</p>
<sec id="s3-1">
<title>3.1 Problem formulation</title>
<p>BTLA addresses SPB teleoperation by enabling natural language control of an assistive robot arm while the operator directly manipulates the master arm. This approach allows operators to maintain precise control over critical manipulation tasks while delegating complementary actions to the assistant arm through intuitive voice commands. The assistant robot receives natural language voice instructions <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (e.g., help me push the green blob together) that specify the desired assistive behavior. These instructions can be long-horizon, context-aware, or ambiguously described (e.g., move a little bit upwards), requiring sophisticated contextual understanding. At any given time <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, BTLA processes multiple input streams to determine the resulting assistance behaviors. These inputs include natural language commands <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> that specify desired assistive behaviors, proprioceptive information from both the master arm <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and assistant arm <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, environmental observations <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">env,t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, direct human control inputs <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and environmental sensing data <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Therefore, the problem formulation can be summarized as follows: given a natural language instruction <inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, the assistant robot&#x2019;s proprioceptive information <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the master robot&#x2019;s proprioceptive information <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, human input <inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, environment sensing information <inline-formula id="inf19">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> at time <inline-formula id="inf20">
<mml:math id="m20">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and environmental observations <inline-formula id="inf21">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">env,t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the embodied AI system should generate a sequence of low-level skills from the skill base <inline-formula id="inf22">
<mml:math id="m22">
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and map them to a control policy <inline-formula id="inf23">
<mml:math id="m23">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> that enables the assistant robot to assist the human operator in performing the desired task effectively.</p>
</sec>
<sec id="s3-2">
<title>3.2 BTLA system implementation</title>
<p>To this end, the assistant robot must decompose the high-level instruction <inline-formula id="inf24">
<mml:math id="m24">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> into a sequence of low-level skills selected from a predefined skill base <inline-formula id="inf25">
<mml:math id="m25">
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The chosen skills and their corresponding parameters are then mapped to a control policy <inline-formula id="inf26">
<mml:math id="m26">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, represented by a skill function <inline-formula id="inf27">
<mml:math id="m27">
<mml:mrow>
<mml:mtext mathvariant="italic">BTLA</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The skill knowledge in the skill base <inline-formula id="inf28">
<mml:math id="m28">
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> can be adapted to accommodate different task requirements. Therefore, the focus of our work is not on the acquisition of these skills but rather on the effective utilization of the available skills to assist the human operator.</p>
<p>BTLA consists of three key components that collaborate to enable effective assistance: (1) the natural language interface uses OpenAI&#x2019;s Whisper model for speech-to-text conversion and LLM processing to interpret operator intentions; (2) a skill execution module manages the implementation of six core manipulation skills: Follow(), SymmetricalFollow(), Approach(), Move(), Handover(), and Fetch(); and (3) the control policy generator translates selected skills into robot control commands while maintaining safety constraints. Unlike a simple skill switcher, the LLM can interpret complex instructions, understand context, and provide feedback when needed. This flexibility enables the robot assistant to adapt to a wider range of scenarios and user needs, embodying the variable autonomy principle of BTLA. As shown in <xref ref-type="fig" rid="F1">Figure 1</xref>, BTLA can be divided into three main components: the human operator, the human-robot interface, and the teleoperation environment. The human operator can concentrate on the current task by observing the environment via visual feedback, manipulating one robot arm with teleoperation devices, and soliciting support from the AI-assisted robot arm for collaborative task execution. The AI-assisted robot arm receives human language commands as input and identifies the most relevant skill from its skill database <inline-formula id="inf29">
<mml:math id="m29">
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, along with the necessary task parameters. The selected skill, combined with environmental data from sensors (such as visual information), proprioceptive data, and human input, forms the control policy that guides the actions of the AI-assisted robot arm. Within this configuration, the human operator collaborates with the AI-assisted robot arm within the teleoperation environment to achieve the desired task with optimal efficiency and effectiveness. The human operator provides high-level guidance and control, while the AI-assisted robot arm contributes its capabilities and understanding of the context to support the human operator in achieving their objectives. <xref ref-type="statement" rid="Algorithm_1">Algorithm 1</xref> outlines the core control loop of BTLA, showing how voice commands are processed through the LLM to select and execute appropriate skills. The algorithm handles both real-time skills that require continuous execution until stopped (like following behaviors) and autonomous skills that complete specific tasks (like object fetching).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Schematic diagram of the proposed BTLA method.</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating a human-robot interaction system. On the left, a human provides input through a haptic device with prompts to mirror robot arm movements and fetch objects. The &#x22;Human-Robot Bridge&#x22; in the center shows skill decomposition into real-time and autonomous skills, such as move, follow, mirror, fetch, grasp, and approach, communicated via JSON. Feedback informs control policy. On the right, the teleoperation environment shows master and assistant robots manipulating an object, transitioning from initial state to tasks like mirroring and fetching.</alt-text>
</graphic>
</fig>
<p>Each skill in the system is designed with clear activation conditions and completion criteria. Real-time skills like Follow() and SymmetricalFollow() maintain continuous adaptation to the master arm&#x2019;s movements, while autonomous skills like Fetch() and Handover() execute specific object manipulation sequences. The system monitors execution status and provides verbal feedback to the operator, ensuring transparent operation and easy error recovery. The processing of human intent occurs in real-time while the system is executing actions. When a voice command is received, the system temporarily maintains its current action while processing the new instruction through the LLM pipeline to ensure smooth transitions between different assistance modes. The operator can issue new commands at any time, and the system will complete its current atomic action before transitioning to the new requested behavior. For safety reasons, certain commands (like &#x201c;stop&#x201d;) are processed with the highest priority without passing through the LLM pipeline and interrupt any ongoing action immediately.</p>
<p>
<statement content-type="algorithm" id="Algorithm_1">
<label>Algorithm 1</label>
<p>Embodied AI-Assisted Robot Arm Control.<list list-type="simple">
<list-item>
<p>
<bold>Require:</bold>Initial skills base <inline-formula id="inf30">
<mml:math id="m30">
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> with predefined skills, LLM initial language description <inline-formula id="inf31">
<mml:math id="m31">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>1:&#x2003;Initialize <inline-formula id="inf32">
<mml:math id="m32">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf33">
<mml:math id="m33">
<mml:mrow>
<mml:mtext mathvariant="italic">skill</mml:mtext>
<mml:mo>&#x2190;</mml:mo>
<mml:mtext mathvariant="italic">None</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>2:&#x2003;<bold>while</bold> not finished <bold>do</bold>
</p>
</list-item>
<list-item>
<p>3:&#x2003;&#x2003;<bold>if</bold> voice_command received <bold>then</bold>
</p>
</list-item>
<list-item>
<p>4:&#x2003;&#x2003;&#x2003;<inline-formula id="inf34">
<mml:math id="m34">
<mml:mrow>
<mml:mtext mathvariant="italic">skill</mml:mtext>
<mml:mo>&#x2190;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <bold>LLM</bold> (voice_command)</p>
</list-item>
<list-item>
<p>5:&#x2003;&#x2003;&#x2003;<inline-formula id="inf35">
<mml:math id="m35">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
<mml:mo>&#x2190;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <bold>BTLA</bold> <inline-formula id="inf36">
<mml:math id="m36">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mtext mathvariant="italic">skill</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mtext>_</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>6:&#x2003;&#x2003;&#x2003;<bold>if</bold> <inline-formula id="inf37">
<mml:math id="m37">
<mml:mrow>
<mml:mtext mathvariant="italic">skill</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is real-time <bold>then</bold>
</p>
</list-item>
<list-item>
<p>7:&#x2003;&#x2003;&#x2003;&#x2003;<bold>repeat</bold>
</p>
</list-item>
<list-item>
<p>8:&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;Execute <inline-formula id="inf38">
<mml:math id="m38">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>9:&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf39">
<mml:math id="m39">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>10:&#x2003;&#x2003;&#x2003;&#x2003;<bold>until</bold> voice_command to stop</p>
</list-item>
<list-item>
<p>11:&#x2003;&#x2003;&#x2003;<bold>else if</bold> <inline-formula id="inf40">
<mml:math id="m40">
<mml:mrow>
<mml:mtext mathvariant="italic">skill</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is autonomous <bold>then</bold>
</p>
</list-item>
<list-item>
<p>12:&#x2003;&#x2003;&#x2003;&#x2003;<bold>repeat</bold>
</p>
</list-item>
<list-item>
<p>13:&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;Execute <inline-formula id="inf41">
<mml:math id="m41">
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>14:&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf42">
<mml:math id="m42">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>15:&#x2003;&#x2003;&#x2003;&#x2003;<bold>until</bold> <inline-formula id="inf43">
<mml:math id="m43">
<mml:mrow>
<mml:mtext mathvariant="italic">skill</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is done</p>
</list-item>
<list-item>
<p>16:&#x2003;&#x2003;&#x2003;&#x2003;<bold>end if</bold>
</p>
</list-item>
<list-item>
<p>17:&#x2003;&#x2003;&#x2003;<bold>end if</bold>
</p>
</list-item>
<list-item>
<p>18:&#x2003;<bold>end while</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>Building upon the existing skill base and task categorization framework, our proposed system explicitly addresses scenarios involving command misinterpretations or kinematic singularities through an integrated error-handling mechanism. To ensure operational safety and task efficacy, BLTA employs a multi-stage confirmation protocol before task execution. Upon receiving an instruction, the robotic agent initiates a semantic parsing phase to interpret the command, followed by the generation of a hierarchical execution plan. This plan is then presented to the human operator via an interface for explicit validation during the execution plan verification phase, enabling cross-verification of the robot&#x2019;s comprehension and providing a structured opportunity for the operator to implement necessary adjustments before deployment. Furthermore, BLTA incorporates real-time singularity detection algorithms and exception handling protocols. When kinematic singularities, operational anomalies, or unmodeled environmental constraints are detected during execution, the system initiates a suspension of operations and requests human intervention through prioritized status alerts.</p>
<p>Remark: This bidirectional communication framework establishes a closed-loop interaction protocol between the human operator and robotic system, enhancing system resilience through error recovery mechanisms and adaptive replanning capabilities. By integrating proactive validation checkpoints with reactive exception management, the architecture maintains optimal equilibrium between automated functionality and human supervisory control, thereby ensuring robust performance in dynamic, unstructured environments.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Experiment</title>
<p>To evaluate the effectiveness of the BTLA system, we conducted experiments to move and manipulate large, heavy objects using a bimanual robotic system. The experimental procedure, from operator training to performance assessment, is illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>. The assessment metrics include task efficiency, operator workload, and user satisfaction in comparison to SPB and Dyadic teleoperation methods. Ten participants (7 male, 3 female, aged 22&#x2013;35) volunteered for this study, approved by Lancaster University&#x2019;s Ethics Committee (FST-2024-4525-RECR-4), with informed consent obtained beforehand. All underwent comprehensive system training before testing. Participants comprised graduate students and research staff recruited from engineering and computer science disciplines. Screening confirmed that all possessed fundamental robotics literacy (e.g., coursework in control systems or human-computer interaction) but had no prior experience with bimanual teleoperation systems.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Experimental procedure for evaluating bimanual teleoperation methods. The process begins with participant training in three tasks (reaching, pick-and-place, and pushing), followed by three experimental sessions (SPB, Dyadic, and BTLA teleoperation) conducted in randomized order for each participant.</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating an experiment sequence: Training involves tasks like reaching, pick-and-place, and pushing within 3 minutes. Following training, three experiments (Single, Dyad, BTLA) occur, each separated by a 5-minute break. Each experiment requires performing a bimanual transfer task and completing a NASA-TLX questionnaire. The sequence of these experiments is randomized. The assessment phase involves a post-experiment questionnaire. Training sessions are conducted twice with a one-hour interval between sessions.</alt-text>
</graphic>
</fig>
<sec id="s4-1">
<title>4.1 Experimental setup</title>
<sec id="s4-1-1">
<title>4.1.1 Equipment and software</title>
<p>The experimental setup incorporated two 3D Systems Touch haptic interfaces (formerly Phantom Omni). The PyBullet physics engine API was employed to construct the virtual environment, orchestrate robotic arm actuation, and render object dynamics in real time. To enhance user interface intuitiveness and operational precision, we developed a haptic feedback-enabled control architecture <xref ref-type="disp-formula" rid="e1">Equation 1</xref> incorporating a closed-loop velocity control scheme:<disp-formula id="e1">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf44">
<mml:math id="m45">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf45">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the velocity of the end effector of the robot arm, <inline-formula id="inf46">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the displacement of the tip on the pen of the haptic device, and <inline-formula id="inf47">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the hand controller-to-robot velocity gain. The feedback force is given by <xref ref-type="disp-formula" rid="e2">Equation 2</xref>.<disp-formula id="e2">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">initial</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf48">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">initial</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the initial force that allows the user to feel a sense of boundaries. <inline-formula id="inf49">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the feedback force on the user, which is equal in magnitude but opposite in direction to the force applied by the human on the haptic device, i.e., <inline-formula id="inf50">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">Feedback</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">Human</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. This feedback force creates a sense of resistance when the user tries to move further, allowing the user to experience greater resistance when expecting a larger robot arm moving speed. To minimize uncontrolled variables that might influence the experiment results, we designed customized objects using Fusion 360 and converted them into URDF files.</p>
</sec>
<sec id="s4-1-2">
<title>4.1.2 LLM initial prompt</title>
<p>For realistic human voice interactions, we adopted the OpenAI Whisper model for speech-to-text and text-to-speech (TTS) tasks. We selected GPT-3.5-turbo as our primary LLM after comparative testing with GPT-4 and Mistral-7B-OpenOrca showed similar performance in command interpretation but faster response times with GPT-3.5-turbo. Our LLM prompt employs a structured three-component design: role definition, skill specification, and JSON response formatting. The prompt explicitly defines available skills (e.g., Follow(), Fetch(), SymmetricalFollow()) and requires standardized JSON responses such as &#x201c;Skill&#x201d;: &#x201c;Follow()&#x201d;, &#x201c;Description&#x201d;: &#x201c;I&#x2019;ll follow your arm movement to help push the object together.&#x201d; This ensures consistent command interpretation and seamless integration with our control pipeline. The complete prompt structure is detailed in the appendix (<xref ref-type="fig" rid="F9">Figure 9</xref>).</p>
<p>To optimize the robot assistant&#x2019;s understanding of its role and objectives, we implemented a set of predefined rules and instructions as an initial prompt for the LLM. The initial prompt configures the LLM as an AI assistant designed to aid a robot arm in task execution. It instructs the LLM to generate scripts based on the user&#x2019;s spoken commands, adhering to a specific JSON format: Script: &#x201c;Skill: Write the function here.&#x201c;, &#x201c;Description: Include a necessary description about this skill, as if you are talking to the user directly. Use &#x2018;you&#x2019; to address the user.&#x201d; The robot assistant is equipped with a comprehensive list of available skills from the skill database to enable matching of user commands with appropriate functions. The LLM is programmed to provide user feedback on its actions through the &#x201c;Description&#x201d; field in the JSON script. When a user&#x2019;s command corresponds to a known skill, the LLM generates the relevant script. In cases where no match is found, the assistant generates a script with an empty function and a description indicating that no action will be taken. This structured approach to the initial prompt ensures the LLM-aided robot assistant&#x2019;s ability to interpret user commands and provide meaningful feedback, which facilitates a more seamless and effective interaction between the human operator and the embodied AI system in bimanual handling tasks. Additionally, this safety check effectively addresses potential conflicts or misinterpretations between the LLM&#x2019;s voice command interpretation and the predefined skill base. The LLM is configured with a structured prompt (see <xref ref-type="fig" rid="F9">Figure 9</xref>) that defines available skills and expected response formats. This ensures consistent interpretation of operator commands and appropriate skill selection. The system provides immediate feedback through natural language responses, confirming command understanding before execution.</p>
</sec>
<sec id="s4-1-3">
<title>4.1.3 Skills</title>
<p>There are two types of skills: autonomous and real-time skills. Autonomous skills are executing actions in series and exiting when the whole action is done, such as Handover()&#x2014;handover an object to the master arm; Approach()&#x2014;move the arm to approach an object (e.g., for listing objects together); Fetch()&#x2014;grab an object and bring it to the master arm. Real-time skills are continuous motions and exiting when the user gives the stop command, like Follow()&#x2014;follow the master robot arm (e.g., for pushing together); SymmetricalFollow()&#x2014;act a mirror behavior of the master robot arm; Move(distance, direction)&#x2014;move the arm (ask user for distance in meters and direction: &#x201c;&#x2b;x&#x201d;, &#x201c;-x&#x201d;, &#x201c;&#x2b;y&#x201d;, &#x201c;-y&#x201d;, &#x201c;&#x2b;z&#x201d;, &#x201c;-z&#x201d;). Each skill includes parameter validation and safety checks to ensure reliable operation.</p>
</sec>
</sec>
<sec id="s4-2">
<title>4.2 Training protocol</title>
<p>We developed a structured training protocol to ensure consistent operator proficiency across all experimental conditions. Each participant completed three increasingly complex tasks: target reaching, pick-and-place, and pushing operations (<xref ref-type="fig" rid="F3">Figure 3</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Single arm training tasks: <bold>(a)</bold> target reaching, <bold>(b)</bold> pick-and-place, and <bold>(c)</bold> pushing.</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g003.tif">
<alt-text content-type="machine-generated">(a) Robotic arm above a wooden table with red and green markers labeled as unreached and reached waypoints. (b) Robotic arm near a green block with a target area indicated and coverage value of 0.99. (c) Robotic arm's gripper closed on a block with a coverage value of 0.54.</alt-text>
</graphic>
</fig>
<p>This progressive training approach helped participants develop fundamental skills before attempting more complex bimanual operations. In the target-reaching task, the goal was to navigate to the red waypoints. The pick-and-place task required participants to use the gripper to grasp a square block and transport it to a target area while avoiding a vertical barrier. The pushing task involved pushing an object into a designated target area. Participants were required to complete the tasks within 4 and 3 min, respectively.</p>
</sec>
<sec id="s4-3">
<title>4.3 Experiments procedure</title>
<p>The experimental task required coordinated bimanual manipulation to transport an object to a designated platform (<xref ref-type="fig" rid="F4">Figure 4</xref>). We evaluated three teleoperation patterns: SPB, Dyadic, and BTLA, with participants experiencing each mode in counterbalanced order. In the baseline SPB condition, participants controlled both robotic arms simultaneously using haptic devices, representing traditional teleoperation approaches. The dyadic teleoperation condition paired participants with a trained operator, simulating collaborative control scenarios. BTLA condition enabled participants to control the master arm directly while commanding the assistant arm through voice instructions. After each trial, participants completed the NASA-TLX questionnaire and provided feedback on their experience. Three types of teleoperation were tested in randomized order to tackle learning effects.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Experimental setup for bimanual teleoperation: <bold>(a)</bold> SPB and <bold>(b)</bold> Dyadic configurations.</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g004.tif">
<alt-text content-type="machine-generated">Two-panel image showing a virtual scene with operators. In panel (a), a monitor displays the scene while Operator 1 interacts with Hand Controller 1. Hand Controller 2 is also visible. In panel (b), the same scene is displayed, with Operator 2 using Hand Controller 1 and another person using Hand Controller 2.</alt-text>
</graphic>
</fig>
<p>The experimental task involved coordinated manipulation of a large object, requiring precise control during grasping, transport, and placement phases. As illustrated in <xref ref-type="fig" rid="F5">Figure 5</xref>, successful completion demanded stable bimanual coordination to move the object to a designated target location while maintaining proper orientation and avoiding collisions. <xref ref-type="fig" rid="F5">Figures 5a&#x2013;d</xref> shows the motion from the start position to the grasp position. <xref ref-type="fig" rid="F5">Figures 5e,f</xref> shows the motion to the appointed platform.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Illustration of the execution of BTLA on object transferring tasks: <bold>(a)</bold> initial state <bold>(b)</bold> move the left arm only without following command <bold>(c)</bold> right arm is controlled by BTLA with symmetrical following behavior <bold>(d)</bold> go to pick up position simultaneously <bold>(e)</bold> grab the object <bold>(f)</bold> collaborate with BTLA moving the object to the specific place.</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g005.tif">
<alt-text content-type="machine-generated">Six-panel image showing robotic arms positioning around an orange object with &#x22;Coverage&#x22; scores. Panels (a) to (e) display zero coverage with varying arm positions. Panel (f) shows full coverage with a score of one.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-4">
<title>4.4 Assessment</title>
<p>We defined a successful trial using three criteria: successful simultaneous object grasping by both arms, stable object transport without drops or collisions, and accurate placement with at least 70% coverage of the target area. For each teleoperation pattern, we recorded multiple trials to assess the consistency and reliability of performance.</p>
<p>System usability and operator experience were assessed through two complementary questionnaires. The first evaluated the quality of human-robot interaction across multiple dimensions, including interface naturalness, operator satisfaction, perceived system intelligence, and overall usability. The second utilized the NASA-TLX to measure operator workload across six dimensions: mental demand, physical demand, temporal demand, performance, effort, and frustration (<xref ref-type="fig" rid="F8">Figure 8</xref>). This standardized assessment tool has been widely validated in human-machine interaction studies (<xref ref-type="bibr" rid="B15">Hart and Staveland, 1988</xref>) and provides robust metrics for comparing different teleoperation approaches.</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s5">
<title>5 Results and discussions</title>
<sec id="s5-1">
<title>5.1 Performance metrics</title>
<p>To evaluate the effectiveness of the BTLA, we compared its performance with the Dyadic and SPB scenarios using three metrics: coverage, success rate, and task completion time, as shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. The BTLA scenario demonstrated the highest mean coverage (0.861) and success rate (0.627) among the three scenarios, suggesting that the BTLA system is more effective in completing tasks and covering a larger portion of the task space compared to the Dyadic and SPB scenarios. The Kruskal&#x2013;Wallis test was performed to assess the statistical significance of the differences in coverage <inline-formula id="inf54">
<mml:math id="m56">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.003</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and success rate <inline-formula id="inf55">
<mml:math id="m57">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.004</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> among the patterns, and the results indicate that the differences in these metrics among the scenarios are statistically significant. Although the BTLA scenario exhibited faster task completion times compared to the other patterns, the differences were not statistically significant based on the Kruskal&#x2013;Wallis test, which yielded a p-value of 0.117 for the time metric. To identify specific group differences, we conducted post-hoc pairwise comparisons using the Dunn test with Bonferroni correction. For coverage, BTLA significantly outperformed both SPB <inline-formula id="inf56">
<mml:math id="m58">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and Dyadic <inline-formula id="inf57">
<mml:math id="m59">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.032</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> conditions, while the difference between Dyadic and SPB was not significant <inline-formula id="inf58">
<mml:math id="m60">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.089</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. Similarly, for success rate, BTLA showed significant improvements over SPB <inline-formula id="inf59">
<mml:math id="m61">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and Dyadic <inline-formula id="inf60">
<mml:math id="m62">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.045</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, with no significant difference between Dyadic and SPB <inline-formula id="inf61">
<mml:math id="m63">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.156</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. These results confirm that BTLA provides the most substantial performance gains compared to traditional teleoperation approaches.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Box plots for performance <bold>(a)</bold> coverage <inline-formula id="inf51">
<mml:math id="m53">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, <bold>(b)</bold> success rate <inline-formula id="inf52">
<mml:math id="m54">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, <bold>(c)</bold> time <inline-formula id="inf53">
<mml:math id="m55">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.117</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> among all subjects for three experimental scenarios SPB, Dyadic, and BTLA.</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g006.tif">
<alt-text content-type="machine-generated">Three box plots compare Coverage, Success Rate, and Time across SPB, Dyadic, and BTLA scenarios. Coverage is highest in BTLA and lowest in SPB. Success Rate is highest in BTLA and lowest in SPB. Time is longest for SPB and shortest for BTLA. Outliers are present in the Success Rate and Time plots.</alt-text>
</graphic>
</fig>
<p>Furthermore, a correlation analysis was conducted to examine the relationship between coverage and success rate (see <xref ref-type="fig" rid="F7">Figure 7</xref>). The analysis revealed a strong positive correlation (0.71) between the two metrics, indicating that higher coverage is associated with higher success rates. This finding suggests that the BTLA system&#x2019;s ability to cover a larger portion of the task space contributes to its higher success rates in completing tasks compared to the Dyadic and SPB patterns.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>
<bold>(a)</bold> Likert Scale Ratings. <bold>(b)</bold> Correlation matrix of performance metrics.</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g007.tif">
<alt-text content-type="machine-generated">Bar chart labeled A shows ratings for four categories: Natural, Pleasant, Typical, and Likeable, with values around 7-8. Heatmap labeled B is a correlation matrix for performance metrics: Coverage, Success Rate, and Time, showing values 1, 0.71, -0.21, and -0.38 in a color gradient.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s5-2">
<title>5.2 Subjective assessment</title>
<p>For all NASA-TLX metrics (mental demand (MD), physical demand (PD), temporal demand (TD), performance (P), effort (E), and frustration (F)), the BTLA pattern exhibited the most favorable ratings, with lower demands, effort, and frustration, as well as better perceived performance compared to the Dyadic and SPB patterns as shown in <xref ref-type="fig" rid="F8">Figure 8</xref>. In contrast, the SPB pattern appeared to be the most challenging, with higher demands, effort, and frustration, and lower perceived performance. The Dyadic pattern fell between the BTLA and SPB, indicating moderate levels of demands, effort, frustration, and performance.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Boxplots for NASA-TLX results among all subjects for three experimental patterns: SPB, Dyadic, and BTLA, respectively. Rated aspects from NASA-TLX: mental demand (MD), physical demand (PD), temporal demand (TD), performance (P), effort (E), and frustration (F). (all metrics: <inline-formula id="inf62">
<mml:math id="m64">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>).</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g008.tif">
<alt-text content-type="machine-generated">Six box plots compare ratings for three scenarios (SPB, Dyadic, BTLA) across six categories labeled MD, PD, TD, P, E, and F. Ratings range from negative five to positive five. Each plot shows median, quartiles, and outliers for each scenario. SPB generally has higher ratings, while BTLA has the lowest.</alt-text>
</graphic>
</fig>
<p>The Kruskal&#x2013;Wallis test results revealed statistically significant differences among the three patterns for all metrics, with the test statistics being 17.974 for MD <inline-formula id="inf63">
<mml:math id="m65">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x226a;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, 14.701 for PD <inline-formula id="inf64">
<mml:math id="m66">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, 12.276 for TD <inline-formula id="inf65">
<mml:math id="m67">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.0002</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, 15.723 for P <inline-formula id="inf66">
<mml:math id="m68">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x226a;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, 14.228 for E <inline-formula id="inf67">
<mml:math id="m69">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.0001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and 11.018 for F <inline-formula id="inf68">
<mml:math id="m70">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x226a;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. The p-values for all metrics were less than 0.001, providing strong evidence against the null hypothesis of no difference among the patterns. Over 40% of participants reported that their performance was limited by the restricted 2D camera view. This limitation was due to either a loss of depth perception, making it difficult to discern spatial relationships, or because the images were partially obstructed.</p>
<p>In summary, experiment results showed marked improvements in task performance and lowered operator workload versus conventional methods, with natural language interpretation and adaptive assistance proving critical for complex manipulations. However, the integrated voice processing pipeline&#x2014;comprising speech-to-text conversion via Whisper, intent interpretation through GPT-3.5-turbo, and skill dispatch&#x2014;introduces a measurable latency, which may impede real-time responsiveness during high-speed bimanual coordination tasks such as dynamic obstacle avoidance. Furthermore, validation remains confined to simulated environments using PyBullet; deployment on physical hardware necessitates addressing critical challenges, including sensor noise robustness and unmodeled dynamics (e.g., joint friction and cable effects). Future work includes three key directions: (1) broadening autonomous behaviors and refining real-time autonomy adaptation to boost flexibility; (2) exploring mutual adaptation between operators and the system during extended use to optimize collaboration; and (3) extending BTLA&#x2019;s application to diverse robotic platforms and real-world scenarios to strengthen practical relevance.</p>
</sec>
</sec>
<sec id="s6">
<title>6 Initial prompts</title>
<p>See <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>An example of LLM initial prompt: textual description of the mission and skills.</p>
</caption>
<graphic xlink:href="frobt-12-1621033-g009.tif">
<alt-text content-type="machine-generated">Example prompt for generating a script to control a robot arm, including functions like Follow(), SymmetricalFollow(), Approach(), Handover(), Fetch(), Stop(), and NoFunction(). It advises responding with a JSON script and includes user feedback.</alt-text>
</graphic>
</fig>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="ethics-statement" id="s8">
<title>Ethics statement</title>
<p>The studies involving humans were approved by The University Research Ethics and Integrity Committee. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study. Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>HF: Data curation, Formal Analysis, Methodology, Software, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review and editing. TX: Methodology, Resources, Writing &#x2013; original draft, Writing &#x2013; review and editing. YH: Data curation, Formal Analysis, Validation, Writing &#x2013; review and editing. SL: Data curation, Formal Analysis, Visualization, Writing &#x2013; original draft, Writing &#x2013; review and editing. GD: Formal Analysis, Supervision, Validation, Writing &#x2013; original draft, Writing &#x2013; review and editing. YG: Investigation, Methodology, Resources, Writing &#x2013; original draft, Writing &#x2013; review and editing. ZW: Conceptualization, Formal Analysis, Funding acquisition, Methodology, Project administration, Supervision, Writing &#x2013; original draft, Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s10">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was supported in part by The Royal Society under Grant IES/R2/232291, the UK-RAS Network&#x2b;, the Engineering and Physical Sciences Research Council (Grant number: EP/Y010523/1), The University of Manchester, and the European Commission grant Up-Skill (Horizon Europe RIA 101070666).</p>
</sec>
<ack>
<p>We thank the subjects for participating in the experiment.</p>
</ack>
<sec sec-type="COI-statement" id="s11">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s12">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Akinola</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>P. K.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Dynamic grasping with reachability and motion awareness</article-title>,&#x201d; in <conf-name>2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</conf-name>, <conf-loc>Prague, Czech Republic</conf-loc>, <conf-date>27 September 2021 - 01 October 2021</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>9422</fpage>&#x2013;<lpage>9429</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bacha</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yeatman</surname>
<given-names>E. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep reinforcement learning-based control framework for multilateral telesurgery</article-title>. <source>IEEE Trans. Med. Robotics Bionics</source> <volume>4</volume>, <fpage>352</fpage>&#x2013;<lpage>355</lpage>. <pub-id pub-id-type="doi">10.1109/tmrb.2022.3170786</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yokoi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fujie</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Yeatman</surname>
<given-names>E. M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Anthropomorphic dual-arm coordinated control for a single-port surgical robot based on dual-step optimization</article-title>. <source>IEEE Trans. Med. Robotics Bionics</source> <volume>4</volume>, <fpage>72</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1109/tmrb.2022.3145673</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cursi</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Tsai</surname>
<given-names>Y.-Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). &#x201c;<article-title>Dual-arm coordinated manipulation for object twisting with human intelligence</article-title>,&#x201d; in <conf-name>2021 IEEE International Conference on Systems, Man, and Cybernetics (SMC)</conf-name>, <conf-loc>Melbourne, Australia</conf-loc>, <conf-date>17-20 October 2021</conf-date>, <fpage>902</fpage>&#x2013;<lpage>908</lpage>. <pub-id pub-id-type="doi">10.1109/smc52423.2021.9658594</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Balasubramanian</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Brook</surname>
<given-names>P. D.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Matsuoka</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Human-guided grasp measures improve grasp robustness on physical robot</article-title>,&#x201d; in <conf-name>2010 IEEE International Conference on Robotics and Automation (ICRA)</conf-name>, <conf-loc>Anchorage, AK, USA</conf-loc>, <conf-date>03-07 May 2010</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>2294</fpage>&#x2013;<lpage>2301</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Boehm</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Fey</surname>
<given-names>N. P.</given-names>
</name>
<name>
<surname>Fey</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Online recognition of bimanual coordination provides important context for movement data in bimanual teleoperated robots</article-title>,&#x201d; in <conf-name>2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</conf-name>, <conf-loc>Prague, Czech Republic</conf-loc>, <conf-date>27 September 2021 - 01 October 2021</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>6248</fpage>&#x2013;<lpage>6255</lpage>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cavusoglu</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Sherman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tendick</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Design of bilateral teleoperation controllers for haptic exploration and telemanipulation of soft environments</article-title>. <source>IEEE Trans. Robotics Automation</source> <volume>18</volume>, <fpage>641</fpage>&#x2013;<lpage>647</lpage>. <pub-id pub-id-type="doi">10.1109/tra.2002.802199</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Che</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Haro</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Okamura</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Two is not always better than one: effects of teleoperation and haptic coupling</article-title>,&#x201d; in <conf-name>2016 6th IEEE International Conference on Biomedical Robotics and Biomechatronics (BioRob)</conf-name>, <conf-loc>Singapore</conf-loc>, <conf-date>26-29 June 2016</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>1290</fpage>&#x2013;<lpage>1295</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Burdet</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Foot gestures to control the grasping of a surgical robot</article-title>,&#x201d; in <conf-name>2023 IEEE International Conference on Robotics and Automation (ICRA)</conf-name>, <conf-loc>London, United Kingdom</conf-loc>, <conf-date>29 May 2023 - 02 June 2023</conf-date>, <fpage>6844</fpage>&#x2013;<lpage>6850</lpage>. <pub-id pub-id-type="doi">10.1109/icra48891.2023.10160368</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Clark</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Lentini</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Barontini</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Catalano</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Bianchi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>O&#x2019;Malley</surname>
<given-names>M. K.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>On the role of wearable haptics for force feedback in teleimpedance control for dual-arm robotic teleoperation</article-title>,&#x201d; in <conf-name>2019 International Conference on Robotics and Automation (ICRA)</conf-name>, <conf-loc>Montreal, QC, Canada</conf-loc>, <conf-date>20-24 May 2019</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>5187</fpage>&#x2013;<lpage>5193</lpage>.</citation>
</ref>
<ref id="B11">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Cui</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <source>Human demonstrations are generalizable knowledge for robots</source>.</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garc&#xed;a</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Solanes</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Mu&#xf1;oz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gracia</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tornero</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Augmented reality-based interface for bimanual robot teleoperation</article-title>. <source>Appl. Sci.</source> <volume>12</volume>, <fpage>4379</fpage>. <pub-id pub-id-type="doi">10.3390/app12094379</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gowrishankar</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Takagi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Osu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yoshioka</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kawato</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Burdet</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Two is better than one: physical interactions improve motor performance in humans</article-title>. <source>Sci. Rep.</source> <volume>4</volume>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1038/srep03824</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Freer</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Deligianni</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>G.-Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Eye-tracking for performance evaluation and workload estimation in space telerobotic training</article-title>. <source>IEEE Trans. Human-Machine Syst.</source> <volume>52</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1109/thms.2021.3107519</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hart</surname>
<given-names>S. G.</given-names>
</name>
<name>
<surname>Staveland</surname>
<given-names>L. E.</given-names>
</name>
</person-group> (<year>1988</year>). <article-title>Development of nasa-tlx (task load index): results of empirical and theoretical research</article-title>. <source>Adv. Psychol.</source> <volume>52</volume>, <fpage>139</fpage>&#x2013;<lpage>183</lpage>. <pub-id pub-id-type="doi">10.1016/s0166-4115(08)62386-9</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <source>Look before you leap: unveiling the power of GPT-4v in robotic vision-language planning</source>.</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>Z. J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sena</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rodriguez y Baena</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Burdet</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Towards human-robot collaborative surgery: trajectory and strategy learning in bimanual peg transfer</article-title>. <source>IEEE Robotics Automation Lett.</source> <volume>8</volume>, <fpage>4553</fpage>&#x2013;<lpage>4560</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2023.3285478</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Cooperative manipulation of deformable objects by single-leader&#x2013;dual-follower teleoperation</article-title>. <source>IEEE Trans. Industrial Electron.</source> <volume>69</volume>, <fpage>13162</fpage>&#x2013;<lpage>13170</lpage>. <pub-id pub-id-type="doi">10.1109/tie.2021.3139228</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Burdet</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A three-limb teleoperated robotic system with foot control for flexible endoscopic surgery</article-title>. <source>Ann. Biomed. Eng.</source> <volume>49</volume>, <fpage>2282</fpage>&#x2013;<lpage>2296</lpage>. <pub-id pub-id-type="doi">10.1007/s10439-021-02766-3</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A novel training and collaboration integrated framework for human&#x2013;agent teleoperation</article-title>. <source>Sensors</source> <volume>21</volume>, <fpage>8341</fpage>. <pub-id pub-id-type="doi">10.3390/s21248341</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kropiv&#x161;ek Leskovar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>&#x10c;amernik</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Petri&#x10d;</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Leader&#x2013;follower role allocation for physical collaboration in human dyads</article-title>. <source>Appl. Sci.</source> <volume>11</volume>, <fpage>8928</fpage>. <pub-id pub-id-type="doi">10.3390/app11198928</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Laghi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Maimeri</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Marchand</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Leparoux</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Catalano</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ajoudani</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). &#x201c;<article-title>Shared-autonomy control for intuitive bimanual tele-manipulation</article-title>,&#x201d; in <conf-name>2018 IEEE-RAS 18th International Conference on Humanoid Robots (Humanoids)</conf-name>, <conf-loc>Beijing, China</conf-loc>, <conf-date>06-09 November 2018</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>9</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Caponetto</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Katsageorgiou</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Tsagarakis</surname>
<given-names>N. G.</given-names>
</name>
<name>
<surname>Sagakoglou</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>A telerobotic system enabling online switching among various architectures and controllers</article-title>. <source>Robotics Aut. Syst.</source> <volume>166</volume>, <fpage>104402</fpage>. <pub-id pub-id-type="doi">10.1016/j.robot.2023.104402</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Caponetto</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Sarakoglou</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Tsagarakis</surname>
<given-names>N. G.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>A haptic shared autonomy with partial orientation regulation for dof deficiency in remote side</article-title>. <source>IEEE Trans. Haptics</source> <volume>16</volume>, <fpage>86</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1109/toh.2023.3239602</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2023c</year>). <article-title>The classification and new trends of shared control strategies in telerobotic systems: a survey</article-title>. <source>IEEE Trans. Haptics</source> <volume>16</volume>, <fpage>118</fpage>&#x2013;<lpage>133</lpage>. <pub-id pub-id-type="doi">10.1109/toh.2023.3253856</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.-P.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2024</year>). <source>Growing from exploration: a self-exploring framework for robots based on foundation models</source>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sena</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xing</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Babi&#x10d;</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>van Asseldonk</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A review on interaction control for contact robots through intent detection</article-title>. <source>Prog. Biomed. Eng.</source> <volume>4</volume>, <fpage>032004</fpage>. <pub-id pub-id-type="doi">10.1088/2516-1091/ac8193</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2024</year>). <source>Advancing object goal navigation through llm-enhanced object affinities transfer</source>.</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moniruzzaman</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rassau</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chai</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Islam</surname>
<given-names>S. M. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Teleoperation methods and enhancement techniques for mobile robots: a comprehensive survey</article-title>. <source>Robotics Aut. Syst.</source> <volume>150</volume>, <fpage>103973</fpage>. <pub-id pub-id-type="doi">10.1016/j.robot.2021.103973</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Noohi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>&#x17d;efran</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Patton</surname>
<given-names>J. L.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A model for human&#x2013;human collaborative object manipulation and its application to human&#x2013;robot interaction</article-title>. <source>IEEE Trans. Robotics</source> <volume>32</volume>, <fpage>880</fpage>&#x2013;<lpage>896</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2016.2572698</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rakita</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Mutlu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gleicher</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hiatt</surname>
<given-names>L. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Shared control&#x2013;based bimanual robot manipulation</article-title>. <source>Sci. Robotics</source> <volume>4</volume>, <fpage>eaaw0955</fpage>. <pub-id pub-id-type="doi">10.1126/scirobotics.aaw0955</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sena</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rouxel</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Ivanova</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Burdet</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Haptic bimanual system for teleoperation of time-delayed tasks</article-title>,&#x201d; in <conf-name>2021 IEEE International Conference on Robotics and Biomimetics (ROBIO)</conf-name>, <conf-loc>Sanya, China</conf-loc>, <conf-date>27-31 December 2021</conf-date>, <fpage>1234</fpage>&#x2013;<lpage>1239</lpage>.</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shao</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Research of hrv as a measure of mental workload in human and dual-arm robot interaction</article-title>. <source>Electronics</source> <volume>9</volume>, <fpage>2174</fpage>. <pub-id pub-id-type="doi">10.3390/electronics9122174</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Shi</surname>
<given-names>L. X.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>T. Z.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pertsch</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <source>Yell at your robot: improving on-the-fly from language corrections</source>.</citation>
</ref>
<ref id="B35">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Singh</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Blukis</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Mousavian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Goyal</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Tremblay</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). &#x201c;<article-title>ProgPrompt: generating situated robot task plans using large language models</article-title>,&#x201d; in <conf-name>2023 IEEE International Conference on Robotics and Automation (ICRA)</conf-name>, <conf-loc>London, United Kingdom</conf-loc>, <conf-date>29 May 2023 - 02 June 2023</conf-date>, <fpage>11523</fpage>&#x2013;<lpage>11530</lpage>. <pub-id pub-id-type="doi">10.1109/icra48891.2023.10161317</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soyguder</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Abut</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Haptic industrial robot control with variable time delayed bilateral teleoperation</article-title>. <source>Industrial Robot Int. J.</source> <volume>43</volume>, <fpage>390</fpage>&#x2013;<lpage>402</lpage>. <pub-id pub-id-type="doi">10.1108/ir-12-2015-0213</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Loutfi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Single master bimanual teleoperation system with efficient regulation</article-title>. <source>IEEE Trans. Robotics</source> <volume>36</volume>, <fpage>1022</fpage>&#x2013;<lpage>1037</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2020.2973099</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Jing</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Adaptive interval type-2 fuzzy control for multi-legged underwater robot with input saturation and full-state constraints</article-title>. <source>Int. J. Syst. Sci.</source> <volume>54</volume>, <fpage>2859</fpage>&#x2013;<lpage>2874</lpage>. <pub-id pub-id-type="doi">10.1080/00207721.2020.1869346</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Fei</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Rouxel</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Learning to assist bimanual teleoperation using interval type-2 polynomial fuzzy inference</article-title>. <source>IEEE Trans. Cognitive Dev. Syst.</source> <volume>16</volume>, <fpage>416</fpage>&#x2013;<lpage>425</lpage>. <pub-id pub-id-type="doi">10.1109/tcds.2023.3272730</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lam</surname>
<given-names>H.-K.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Event-triggered prescribed-time fuzzy control for space teleoperation systems subject to multiple constraints and uncertainties</article-title>. <source>IEEE Trans. Fuzzy Syst.</source> <volume>29</volume>, <fpage>2785</fpage>&#x2013;<lpage>2797</lpage>. <pub-id pub-id-type="doi">10.1109/tfuzz.2020.3007438</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Coordinated control of a dual-arm robot for surgical instrument sorting tasks</article-title>. <source>Robotics Aut. Syst.</source> <volume>112</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1016/j.robot.2018.10.007</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zha</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>L.-H.</given-names>
</name>
<name>
<surname>Kwon</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Arenas</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <source>Distilling and retrieving generalizable knowledge for robot manipulation via language corrections</source>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Mi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Coordinated stabilization for space robot after capturing a noncooperative target with large inertia</article-title>. <source>Acta Astronaut.</source> <volume>134</volume>, <fpage>75</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1016/j.actaastro.2017.01.041</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Z. J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Burdet</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>A user-centered shared control scheme with learning from demonstration for robotic surgery</article-title>,&#x201d; in <conf-name>2024 IEEE International Conference on Robotics and Automation (ICRA)</conf-name>, <conf-loc>Yokohama, Japan</conf-loc>, <conf-date>13-17 May 2024</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>15195</fpage>&#x2013;<lpage>15201</lpage>.</citation>
</ref>
<ref id="B45">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xing</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <source>Planagent: a multi-modal large language agent for closed-loop vehicle motion planning</source>.</citation>
</ref>
<ref id="B46">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>A bilateral dual-arm teleoperation robot system with a unified control architecture</article-title>,&#x201d; in <conf-name>2021 30th IEEE International Conference on Robot &#x26; Human Interactive Communication (RO-MAN)</conf-name>, <conf-loc>Vancouver, BC, Canada</conf-loc>, <conf-date>08-12 August 2021</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>495</fpage>&#x2013;<lpage>502</lpage>.</citation>
</ref>
</ref-list>
</back>
</article>