<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="review-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Robot. AI</journal-id>
<journal-title>Frontiers in Robotics and AI</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Robot. AI</abbrev-journal-title>
<issn pub-type="epub">2296-9144</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1347985</article-id>
<article-id pub-id-type="doi">10.3389/frobt.2024.1347985</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Robotics and AI</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A review of visual SLAM for robotics: evolution, properties, and future applications</article-title>
<alt-title alt-title-type="left-running-head">Al-Tawil et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frobt.2024.1347985">10.3389/frobt.2024.1347985</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Al-Tawil</surname>
<given-names>Basheer</given-names>
</name>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2593196/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hempel</surname>
<given-names>Thorsten</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/data curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Abdelrahman</surname>
<given-names>Ahmed</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Al-Hamadi</surname>
<given-names>Ayoub</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/project administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff>
<institution>Institute for Information Technology and Communications</institution>, <institution>Otto-von-Guericke-University</institution>, <addr-line>Magdeburg</addr-line>, <country>Germany</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1573240/overview">Patrick Sebastian</ext-link>, University of Technology Petronas, Malaysia</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1022165/overview">Chinmay Chakraborty</ext-link>, Birla Institute of Technology, Mesra, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2357894/overview">Kishore Bingi</ext-link>, University of Technology Petronas, Malaysia</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1887628/overview">Irraivan Elamvazuthi</ext-link>, University of Technology Petronas, Malaysia</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2417051/overview">Edmanuel Cruz</ext-link>, Technological University of Panama, Panama</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Basheer Al-Tawil, <email>basheer.al-tawil@ovgu.de</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>10</day>
<month>04</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1347985</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>12</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>02</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Al-Tawil, Hempel, Abdelrahman and Al-Hamadi.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Al-Tawil, Hempel, Abdelrahman and Al-Hamadi</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Visual simultaneous localization and mapping (V-SLAM) plays a crucial role in the field of robotic systems, especially for interactive and collaborative mobile robots. The growing reliance on robotics has increased complexity in task execution in real-world applications. Consequently, several types of V-SLAM methods have been revealed to facilitate and streamline the functions of robots. This work aims to showcase the latest V-SLAM methodologies, offering clear selection criteria for researchers and developers to choose the right approach for their robotic applications. It chronologically presents the evolution of SLAM methods, highlighting key principles and providing comparative analyses between them. The paper focuses on the integration of the robotic ecosystem with a robot operating system (ROS) as Middleware, explores essential V-SLAM benchmark datasets, and presents demonstrative figures for each method&#x2019;s workflow.</p>
</abstract>
<kwd-group>
<kwd>V-SLAM</kwd>
<kwd>interactive mobile robots</kwd>
<kwd>ROS</kwd>
<kwd>benchmark</kwd>
<kwd>Middleware</kwd>
<kwd>workflow</kwd>
<kwd>robotic applications</kwd>
<kwd>robotic ecosystem</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Robot Vision and Artificial Perception</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Robotics is an interdisciplinary field that involves the creation, design, and operation of tasks using algorithms and programming (<xref ref-type="bibr" rid="B13">Bongard, 2008</xref>; <xref ref-type="bibr" rid="B70">Joo et al., 2020</xref>; <xref ref-type="bibr" rid="B9">Awais and Henrich 2010</xref>; <xref ref-type="bibr" rid="B47">Fong et al., 2003</xref>). Its impact extends to manufacturing, automation, optimization, transportation, medical applications, and even NASA&#x2019;s interplanetary exploration (<xref ref-type="bibr" rid="B88">Li et al., 2023b</xref>; <xref ref-type="bibr" rid="B63">Heyer, 2010</xref>; <xref ref-type="bibr" rid="B138">Sheridan, 2016</xref>; <xref ref-type="bibr" rid="B98">Mazumdar et al., 2023</xref>). Service robots, which interact with people, are becoming more common and useful in everyday life (<xref ref-type="bibr" rid="B60">Hempel et al., 2023</xref>; <xref ref-type="bibr" rid="B94">Lynch et al., 2023</xref>). The imperative of integrating automation with human cognitive abilities becomes evident in facilitating a successful collaboration between humans and robots. This helps service robots be more effective in different situations where they interact with people (<xref ref-type="bibr" rid="B121">Prati et al., 2021</xref>; <xref ref-type="bibr" rid="B146">Strazdas et al., 2020</xref>; <xref ref-type="bibr" rid="B175">Zheng et al., 2023</xref>). Furthermore, using multiple robots together can help them handle complex tasks better (<xref ref-type="bibr" rid="B174">Zheng et al., 2022</xref>; <xref ref-type="bibr" rid="B88">Li et al., 2023b</xref>; <xref ref-type="bibr" rid="B46">Fiedler et al., 2021</xref>). To manage and coordinate various processes, a robot operating system (ROS) plays a significant role (<xref ref-type="bibr" rid="B17">Buyval et al., 2017</xref>). It is an open-source framework that aids roboticists in implementing their research and projects with minimal complexity. ROS offers a multitude of features, including hardware integration, control mechanisms, and seamless device implementation into the system, thus facilitating the development and operation of robotic systems (<xref ref-type="bibr" rid="B6">Altawil and Can 2023</xref>).</p>
<p>As shown in <xref ref-type="fig" rid="F1">Figure 1</xref>, the paper is divided into six sections. <xref ref-type="sec" rid="s1">Section 1</xref> gives the brief introduction about robotics and SLAM. <xref ref-type="sec" rid="s2">Section 2</xref> presents an overview of the V-SLAM paradigm that delves into its fundamental concepts.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Article organizational chart.</p>
</caption>
<graphic xlink:href="frobt-11-1347985-g001.tif"/>
</fig>
<p>
<xref ref-type="sec" rid="s3">Section 3</xref> presents the state-of-the-art V-SLAM methods, offering insights into the latest advancements of them. Moving forward, <xref ref-type="sec" rid="s4">section 4</xref> explores the evolution of V-SLAM and discusses the most commonly used datasets. <xref ref-type="sec" rid="s5">Section 5</xref> focuses on techniques for evaluating SLAM methods, aiding in the selection of appropriate methods. Finally, <xref ref-type="sec" rid="s6">Section 6</xref> provides the conclusion of the article, summarizing the key points we discovered while working on our review paper. </p> <p> Recently, we require robots that can move around and work well in places they have never been before. In this regard, simultaneous localization and mapping (SLAM) emerges as a fundamental approach for these robots. The primary goal of SLAM is to autonomously explore and navigate unknown environments by simultaneously creating a map and determining their own position (<xref ref-type="bibr" rid="B34">Durrant-Whyte, 2012</xref>; <xref ref-type="bibr" rid="B102">Mohamed et al., 2008</xref>). Furthermore, it provides real-time capabilities, allowing robots to make decisions on-the-fly without relying on pre-existing maps. Its utility extends to the extraction, organization, and comprehension of information, thereby enhancing the robot&#x2019;s capacity to interpret and interact effectively with its environment (<xref ref-type="bibr" rid="B115">Pal et al., 2022</xref>; <xref ref-type="bibr" rid="B80">Lee et al., 2020</xref>; <xref ref-type="bibr" rid="B8">Aslan et al., 2021</xref>). It is crucial to enable these robots to autonomously navigate and interact in human environments, thus reducing human effort and enhancing overall productivity (<xref ref-type="bibr" rid="B7">Arfa, 2022</xref>). The construction of maps is based on the utilization of sensor data, such as visual data, laser scanning data, and data from the inertial measurement unit (IMU), followed by rapid processing (<xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>).</p>
<p>Historically, prior to the advent of SLAM technology, localization and mapping were treated as distinct entities. However, it was seen that there is a strong internal dependency between mapping and localization. Although accurate localization depends on the map, mapping depends on localization. Thus, the question is known as the &#x201c;Chicken and Egg&#x201d; question (<xref ref-type="bibr" rid="B149">Taheri and Xia, 2021</xref>). In robotics, there are different tools to help robots obtain information from surroundings and build their map. One way is to use sensors such as LiDAR, which uses light detection and ranging sensors to make a 3D map (<xref ref-type="bibr" rid="B67">Huang, 2021</xref>; <xref ref-type="bibr" rid="B158">Van Nam and Gon-Woo, 2021</xref>). Another way is to use cameras, such as monocular and stereo cameras, which are applied in visual SLAM (V-SLAM). In this method, the robot uses pictures to figure out where it is and creates the required map (<xref ref-type="bibr" rid="B31">Davison et al., 2007</xref>). Regarding the paper&#x2019;s intensive details, we provide <xref ref-type="table" rid="T1">Table 1</xref> that summarizes and includes the description of abbreviations used in the article based on SLAM principles and fundamentals.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>List of abbreviations used in this article.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Abbreviation</th>
<th align="center">Explanation</th>
<th align="center">Abbreviation</th>
<th align="center">Explanation</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">V-SLAM</td>
<td align="center">Visual simultaneous localization and mapping</td>
<td align="center">LSD</td>
<td align="center">Large-scale direct</td>
</tr>
<tr>
<td align="left">ROS</td>
<td align="center">Robot Operating System</td>
<td align="center">OKVIS</td>
<td align="center">Open keyframe-based visual&#x2013;inertial</td>
</tr>
<tr>
<td align="left">Lidar</td>
<td align="center">Light detection and ranging</td>
<td align="center">DVO</td>
<td align="center">Dense visual odometry</td>
</tr>
<tr>
<td align="left">BA</td>
<td align="center">Bundle adjustment</td>
<td align="center">RPGO</td>
<td align="center">Robust pose-graph optimization</td>
</tr>
<tr>
<td align="left">BoW</td>
<td align="center">Bag of words</td>
<td align="center">IMU</td>
<td align="center">Inertial measurement unit</td>
</tr>
<tr>
<td align="left">PTAM</td>
<td align="center">Parallel tracking and mapping</td>
<td align="center">GPS</td>
<td align="center">Global positioning system</td>
</tr>
<tr>
<td align="left">FAST</td>
<td align="center">Features from accelerated segment test</td>
<td align="center">MAV</td>
<td align="center">Micro air vehicle</td>
</tr>
<tr>
<td align="left">ROVIO</td>
<td align="center">Robust visual&#x2013;inertial odometry</td>
<td align="center">AGV</td>
<td align="center">Automated-guided vehicle</td>
</tr>
<tr>
<td align="left">HRI</td>
<td align="center">Human&#x2013;robot interaction</td>
<td align="center">UAV</td>
<td align="center">Unmanned aerial vehicle</td>
</tr>
<tr>
<td align="left">DTAM</td>
<td align="center">Dense tracking and mapping</td>
<td align="center">AR</td>
<td align="center">Augmented reality</td>
</tr>
<tr>
<td align="left">LCP</td>
<td align="center">Loop closure process</td>
<td align="center">VR</td>
<td align="center">Virtual reality</td>
</tr>
<tr>
<td align="left">SS</td>
<td align="center">Semantic segmentation</td>
<td align="center">RoLi</td>
<td align="center">Range of light intensity</td>
</tr>
<tr>
<td align="left">DSt</td>
<td align="center">Dense stereo</td>
<td align="center">ILR</td>
<td align="center">Illumination and light robustness</td>
</tr>
<tr>
<td align="left">DSe</td>
<td align="center">Dense semantics</td>
<td align="center">BRIEF</td>
<td align="center">Binary Robust Independent Elementary Features</td>
</tr>
<tr>
<td align="left">SCE</td>
<td align="center">Spatial coordinate errors</td>
<td align="left"/>
<td align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
<p>Due to the significance of visual techniques in interactive robotic applications, our research focuses on V-SLAM methodologies and their evaluation. V-SLAM can be applied to mobile robotics that utilizes cameras to create a map of their surroundings and easily locate themselves within their work space (<xref ref-type="bibr" rid="B83">Li et al., 2020</xref>). It uses techniques such as computer vision to extract and match visual data for localization and mapping (<xref ref-type="bibr" rid="B172">Zhang et al., 2020</xref>; <xref ref-type="bibr" rid="B25">Chung et al., 2023</xref>). It allows robots to map complex environments while performing tasks such as navigation in dynamic fields (<xref ref-type="bibr" rid="B120">Placed et al., 2023</xref>; <xref ref-type="bibr" rid="B74">Khoyani and Amini 2023</xref>). It places a strong emphasis on accurate tracking of camera poses and estimating past trajectories of the robot during its work (<xref ref-type="bibr" rid="B111">Nguyen et al., 2022</xref>; <xref ref-type="bibr" rid="B9">Awais and Henrich 2010</xref>). <xref ref-type="fig" rid="F2">Figure 2</xref> provides a basic understanding of V-SLAM. It takes an image from the environment as an input, processes it, and produces a map as an output. In V-SLAM, various types of cameras are used to capture images or videos. A commonly used camera is the monocular camera, which has a single lens, providing 2D visual information (<xref ref-type="bibr" rid="B26">Civera et al., 2011</xref>). However, due to its limitation of lacking depth information, researchers often turn to stereo cameras, which are equipped with two lenses set at a specific distance to capture images from different perspectives, enabling depth details (<xref ref-type="bibr" rid="B48">Gao et al., 2020</xref>; <xref ref-type="bibr" rid="B99">Meng et al., 2018</xref>). Another valuable option in V-SLAM is the use of RGB-D cameras, which are capable of capturing both color information (RGB) and depth information (D) (<xref ref-type="bibr" rid="B99">Meng et al., 2018</xref>). Although monocular cameras are inexpensive and lightweight, they may require additional sensors in order to provide accurate data. In contrast, RGB-D and stereo cameras provide depth information. This makes RGB-D, such as Microsoft&#x2019;s Kinect and stereo cameras, suitable for robust and accurate SLAM systems (<xref ref-type="bibr" rid="B93">Luo et al., 2021</xref>).</p>
<p>Previous research demonstrated the effectiveness of V-SLAM methods, but they are often explained with very few details and separate figures (<xref ref-type="bibr" rid="B74">Khoyani and Amini, 2023</xref>; <xref ref-type="bibr" rid="B43">Fan et al., 2020</xref>), making it challenging to understand, compare, and make selections among them. As a result, our study focuses on simplifying the explanation of V-SLAM methodologies to enable readers to comprehend them easily. The main contributions of the study can be described as follows:<list list-type="simple">
<list-item>
<p>&#x2022; Investigation into V-SLAM techniques to determine the most appropriate tools for use in robotics.</p>
</list-item>
<list-item>
<p>&#x2022; Creation of a graphical and illustrative structural workflow for each method to enhance the comprehension of the operational processes involved in V-SLAM.</p>
</list-item>
<list-item>
<p>&#x2022; Presentation of significant factors for the evaluation and selection criteria among the V-SLAM methods.</p>
</list-item>
<list-item>
<p>&#x2022; Compilation of a comparative table that lists essential parameters and features for each V-SLAM method.</p>
</list-item>
<list-item>
<p>&#x2022; Presentation and discussion of relevant datasets employed within the domain of robotics applications.</p>
</list-item>
</list>
</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Schematic representation of a robotic system&#x2019;s architecture, highlighting the incorporation of SLAM and its location within the system.</p>
</caption>
<graphic xlink:href="frobt-11-1347985-g002.tif"/>
</fig>
</sec>
<sec id="s2">
<title>2 Visual SLAM paradigm</title>
<p>As discussed in Introduction, V-SLAM uses sensor data to provide valuable information to the system (<xref ref-type="bibr" rid="B74">Khoyani and Amini, 2023</xref>). Mobile robots and autonomous vehicles require the ability to understand their environment to complete their tasks and achieve their goals (<xref ref-type="bibr" rid="B3">Ai et al., 2021</xref>). This understanding is essential for them to be successful in their operations (<xref ref-type="bibr" rid="B13">Bongard, 2008</xref>).</p>
<p>The V-SLAM framework is composed of sequential steps that are organized to create the system and process its data; see <xref ref-type="fig" rid="F3">Figure 3</xref>, which explains the processes performed within V-SLAM in parallel with the demonstrated pictures. This includes the creation of a detailed map, a trajectory estimator, and the precise positioning and orientation of the cameras attached to that system (<xref ref-type="bibr" rid="B11">Beghdadi and Mallem, 2022</xref>; <xref ref-type="bibr" rid="B72">Kazerouni et al., 2022</xref>). Within this framework, various scenarios can be effectively implemented and operated, such as pixel-wise motion segmentation (<xref ref-type="bibr" rid="B59">Hempel and Al-Hamadi, 2020</xref>), semantic segmentation (<xref ref-type="bibr" rid="B91">Liu and Miura, 2021</xref>), and filtering techniques (<xref ref-type="bibr" rid="B160">Wang et al., 2023</xref>; <xref ref-type="bibr" rid="B53">Grisetti et al., 2007</xref>). These approaches aim to achieve a professional approach for a visual representation of the processes involved in V-SLAM. The operational framework has been systematically divided into four sections, which can be listed and explained herein.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Visual SLAM architecture: an overview of the four core components necessary for visual SLAM: data acquisition, system localization, system mapping, and system loop closure, and process tuning, enabling mobile robots to perceive, navigate, and interact with their environment.</p>
</caption>
<graphic xlink:href="frobt-11-1347985-g003.tif"/>
</fig>
<sec id="s2-1">
<title>2.1 Data acquisition and system initialization</title>
<p>In this stage of V-SLAM, we systematically prepare input data using system hardware, which includes capturing and preparing images. It involves installing cameras such as RGB-D cameras, depth cameras, or infrared sensors for collecting data and initializing the system (<xref ref-type="bibr" rid="B11">Beghdadi and Mallem, 2022</xref>).</p>
<p>The system gathers data, with a particular emphasis on crucial filtering details aimed at effectively eliminating any noise present in the input data (<xref ref-type="bibr" rid="B96">Mane et al., 2016</xref>; <xref ref-type="bibr" rid="B53">Grisetti et al., 2007</xref>). The refined data are then sent to the next stage for further processing to extract features from the input information (<xref ref-type="bibr" rid="B3">Ai et al., 2021</xref>). As a result, progress in SLAM methods has resulted in the creation of numerous datasets accessible to researchers to evaluate V-SLAM algorithms (<xref ref-type="bibr" rid="B35">El Bouazzaoui et al., 2021</xref>).</p>
</sec>
<sec id="s2-2">
<title>2.2 System localization</title>
<p>In the second stage of V-SLAM, the system focuses on finding its location, which is an important part of the entire process (<xref ref-type="bibr" rid="B132">Scaradozzi et al., 2018</xref>). It involves the execution of various processes that are crucial for successfully determining where the robot is. Feature tracking plays a central role during this phase, with a primary focus on tasks such as feature extraction, matching, re-localization, and pose estimation (<xref ref-type="bibr" rid="B119">Picard et al., 2023</xref>). It aims to align and identify the frames that guide the estimation and creation of the initial keyframe for the input data (<xref ref-type="bibr" rid="B3">Ai et al., 2021</xref>). A keyframe is a set of video frames that includes a group of observed feature points and the camera&#x2019;s poses. It plays an important role for the tracking and localization process, helping in eliminating drift errors for camera poses attached to the robot (<xref ref-type="bibr" rid="B137">Sheng et al., 2019</xref>; <xref ref-type="bibr" rid="B66">Hsiao et al., 2017</xref>). Subsequently, this keyframe is sent for further processing in the next stage, where it will be shaped into a preliminary map, a crucial part for the third stage of the workflow (<xref ref-type="bibr" rid="B5">Aloui et al., 2022</xref>; <xref ref-type="bibr" rid="B172">Zhang et al., 2020</xref>).</p>
</sec>
<sec id="s2-3">
<title>2.3 System map formation</title>
<p>The third stage of the V-SLAM workflow focuses on the crucial task of building the map, an essential element in V-SLAM processes. Various types of maps can be generated using SLAM, including topological maps, volumetric (3D) maps, such as point cloud and occupancy grid maps, and feature-based or landmark maps. The choice of the map type is based on factors such as the sensors employed, application requirements, environmental assumptions, and the type of dataset used in robotic applications (<xref ref-type="bibr" rid="B149">Taheri and Xia, 2021</xref>; <xref ref-type="bibr" rid="B45">Fern&#xe1;ndez-Moral et al., 2013</xref>). In robotics, a grid map is a representation of a physical environment, with each cell representing a particular location and storing data comprising obstacles, topography, and occupancy. It functions as a fundamental data structure for several robotics navigation and localization techniques (<xref ref-type="bibr" rid="B53">Grisetti et al., 2007</xref>). A feature-based map is a representation which captures the features of the environment, such as landmarks or objects, to facilitate localization and navigation tasks (<xref ref-type="bibr" rid="B86">Li et al., 2022a</xref>). A point cloud map is a representation of a physical space or object made from lots of 3D dots, showing how things are arranged in a place. It is created using special cameras or sensors and helps robots and computers understand what is around them (<xref ref-type="bibr" rid="B24">Chu et al., 2018</xref>).</p>
<p>After setting up keyframes during the localization stage, the workflow progresses to field modeling. Then, key points and feature lines are identified and detected, which is crucial for generating a map (<xref ref-type="bibr" rid="B133">Schneider et al., 2018</xref>). It is a process that builds and updates the map of an unknown environment and is used to continuously track the robot&#x2019;s location (<xref ref-type="bibr" rid="B22">Chen et al., 2020</xref>). It is a two-way process that works together with the localization process, where they depend on each other to achieve SLAM processes. It gathers real-time data about the surroundings, creating both a geometric and a visual model r13 (accessed on 14 November 2023). In addition, the process includes the implementation of bundle adjustments (BAs) to improve the precision of the generated map before it is moved to the final stage (<xref ref-type="bibr" rid="B2">Acosta-Amaya et al., 2023</xref>). BA is a tool that simultaneously refines the parameters essential for estimating and reconstructing the location of observed points in available images. It plays a crucial role in feature-based SLAM (<xref ref-type="bibr" rid="B16">Bustos et al., 2019</xref>; <xref ref-type="bibr" rid="B41">Eudes et al., 2010</xref>).</p>
</sec>
<sec id="s2-4">
<title>2.4 System loop closure and process tuning</title>
<p>The final stage in the V-SLAM workflow involves fine-tuning the process and closing loops, resulting in the optimization of the final map. In V-SLAM, the loop closure procedure examines and maintains previously visited places, fixing any errors that might have occurred during the robot&#x2019;s exploration within an unknown environment. These errors typically result from the estimation processes performed in earlier stages of the SLAM workflow (<xref ref-type="bibr" rid="B155">Tsintotas et al., 2022</xref>; <xref ref-type="bibr" rid="B62">Hess et al., 2016</xref>). Loop closure and process tuning can be done using different techniques, such as the extended Kalman filter SLAM (EKF-SLAM). EKF-SLAM combines loop closure and landmark observation data to adjust the map in the Kalman filter&#x2019;s state estimate. This tool helps address uncertainties in the surrounding world (map) and localize the robot within it (<xref ref-type="bibr" rid="B143">Song et al., 2021</xref>; <xref ref-type="bibr" rid="B157">Ullah et al., 2020</xref>).</p>
<p>The bag-of-words (BoW) approach is another technique used to enable robots to recognize and recall previously visited locations. This is similar to how humans remember places they have been to in the past, even after a long time, due to the activities that took place there. BoW works by taking the visual features of each image and converting them into a histogram of visual words. This histogram is then used to create a fixed-size vector representation of the BoW, which is stored for use in matching and loop-closing processes (<xref ref-type="bibr" rid="B27">Cui et al., 2022</xref>; <xref ref-type="bibr" rid="B155">Tsintotas et al., 2022</xref>).</p>
<p>Finally, graph optimization is used as a correction tool for loop closure processes. It refines the final map and robot&#x2019;s trajectory by optimizing the graph based on landmarks. This technique involves a graph-based representation of the SLAM issue, where vertices represent robot poses and map characteristics and edges represent constraints or measurements between the poses. It is commonly used as a correction tool in graph-based SLAM types (<xref ref-type="bibr" rid="B173">Zhang et al., 2017</xref>; <xref ref-type="bibr" rid="B23">Chou et al., 2019</xref>; <xref ref-type="bibr" rid="B100">Meng et al., 2022</xref>).</p>
<p>In conclusion, these comprehensive workflow processes outlined in <xref ref-type="sec" rid="s2-1">Sections 2.1</xref>, <xref ref-type="sec" rid="s2-2">2.2</xref>, <xref ref-type="sec" rid="s2-3">2.3</xref>, and <xref ref-type="sec" rid="s2-4">2.4,</xref> respectively, play an important role in V-SLAM for robotics as they facilitate the simultaneous creation of maps and real-time location tracking within the operational environment (<xref ref-type="bibr" rid="B87">Li et al., 2022b</xref>).</p>
</sec>
</sec>
<sec id="s3">
<title>3 State-of-the-art of visual SLAM methods</title>
<p>V-SLAM plays a significant role as a transformative topic within the robotics industry and research (<xref ref-type="bibr" rid="B74">Khoyani and Amini, 2023</xref>; <xref ref-type="bibr" rid="B2">Acosta-Amaya et al., 2023</xref>). The progress in this field can be attributed to tools such as machine learning, computer vision, deep learning, and state-of-the-art sensor technologies, which have collectively simplified and enhanced its strategy in real-life applications (<xref ref-type="bibr" rid="B11">Beghdadi and Mallem, 2022</xref>; <xref ref-type="bibr" rid="B33">Duan et al., 2019</xref>).</p>
<p>The landscape of V-SLAM is composed of a variety of methodologies, which can be divided into three categories, namely, only visual SLAM, visual-inertial SLAM, and RGB-D SLAM (<xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>; <xref ref-type="bibr" rid="B151">Theodorou et al., 2022</xref>), as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. In this section, we provide a brief overview of the current state-of-the-art V-SLAM algorithms and techniques, including their methodology, efficiency, time requirements, and processing capacity, as well as whether they are designed to run on-board or off-board computer systems (<xref ref-type="bibr" rid="B154">Tourani et al., 2022</xref>). Additionally, we combine various graphical representations to create a single and comprehensive visual representation of the method workflow, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Illustration of visual SLAM types: only-visual SLAM, visual-inertial SLAM, and RGB-D SLAM.</p>
</caption>
<graphic xlink:href="frobt-11-1347985-g004.tif"/>
</fig>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Visual SLAM methods, illustrating the state-of-the-art method and workflow for select notable SLAM methods featured in this study, presented in a simplified view.</p>
</caption>
<graphic xlink:href="frobt-11-1347985-g005.tif"/>
</fig>
<sec id="s3-1">
<title>3.1 Only visual SLAM</title>
<p>It is a SLAM system designed to map the environment around the sensors while simultaneously determining the precise location and orientation of those sensors within their surroundings. It relies entirely on visual data for estimating sensor motion and reconstructing environmental structures (<xref ref-type="bibr" rid="B150">Taketomi et al., 2017</xref>).</p>
<p>It uses monocular, RGB-D, and stereo cameras to scan the environment, helping robots map unfamiliar areas easily. This approach has attracted attention in the literature because it is cost-effective, easy to calibrate, and has low power consumption in monocular cameras while also allowing depth estimation and high accuracy in RGB-D and stereo cameras (<xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>; <xref ref-type="bibr" rid="B1">Abbad et al., 2023</xref>). The methods used in this part can be listed herein.</p>
<sec id="s3-1-1">
<title>3.1.1 PTAM-SLAM</title>
<p>PTAM-SLAM, which stands for parallel tracking and mapping (PTAM), is a monocular SLAM used for real-time tracking systems. It has 6-DoF camera tracking, which can be used in small scenes (K. and Mu. (2007). This methodology demonstrates remarkable efficiency in dynamic operational settings, consistently providing high performance even in conditions of frequent and unstable lighting variations (<xref ref-type="bibr" rid="B140">Soliman et al., 2023</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparative scenarios for actively used visual SLAM methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th/>
<th colspan="4" align="center">Sensor</th>
<th colspan="7" align="center">ILR</th>
</tr>
<tr>
<th align="center">SLAM method</th>
<th align="center">M</th>
<th align="center">S</th>
<th align="center">I</th>
<th align="center">O</th>
<th align="center">W-S</th>
<th align="center">Output usage</th>
<th align="center">Application field</th>
<th align="center">RoLI</th>
<th align="center">T2D</th>
<th align="center">Hardware deployment</th>
<th align="center">S.M</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<bold>PTAM</bold> K. and Mu</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">M-H</td>
<td align="left">Pose-estimation 3D mapping</td>
<td align="left">Robotics, AR, and VR</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">ODROID-XU4, Intel Quad-Core</td>
<td align="left">
<xref ref-type="bibr" rid="B52">GPL (2023)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>DTAM</bold> Ne. et al</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">RGBD</td>
<td align="left">S-I</td>
<td align="left">Textured depth map</td>
<td align="left">Robotics, AR, VR, AGV, and simulators</td>
<td align="left">
<bold>&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">nvidia.gtx.480.gpu, gpgpu-Processors</td>
<td align="left">
<xref ref-type="bibr" rid="B128">Rintar (2023)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>RTAB. M</bold> Labb&#xe9;</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">Lidar</td>
<td align="left">L-H</td>
<td align="left">2D and 3D mapping</td>
<td align="left">Robotics, VR, AR, and 3D reconstruction</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Jetson Nano, Intel Core.i5.8th.gen</td>
<td align="left">
<xref ref-type="bibr" rid="B68">Introlab (2023)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>ORB.S</bold> Mur-A</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">M-H</td>
<td align="left">Tree-spanning and pose-estimating</td>
<td align="left">Robotics mapping indoor navigation</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Intel Core.i7.4700MQ</td>
<td align="left">
<xref ref-type="bibr" rid="B125">raulmur (2023a)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>ORB.S2</bold> Leut et al</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">RGBD</td>
<td align="left">M-H</td>
<td align="left">Point-mapping and keyframe selection</td>
<td align="left">Mobile mapping, robotics, VR, and UAVs</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Intel Core-i7.4790 and RealSense-D435</td>
<td align="left">
<xref ref-type="bibr" rid="B126">raulmur (2023b)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>ORB.S3</bold> Ca. et al</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">fish.e</td>
<td align="left">L-H</td>
<td align="left">2D and 3D-Map and tree-spanning</td>
<td align="left">Robotics, security, and 3D reconstruction</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Jetson-tx2, pi.3B &#x2b; nvidia.geforce</td>
<td align="left">
<xref ref-type="bibr" rid="B30">uz.slaml (2023)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>RGBD.S</bold> End et al</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">RGBD</td>
<td align="left">L-H</td>
<td align="left">Maps, trajectories and 3D point cloud</td>
<td align="left">3D-scanning, robotics and UAVs</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Intel Core.i9.9900k and Quad Core.cpu.8.GB</td>
<td align="left">
<xref ref-type="bibr" rid="B44">felix. (2023)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>SCE.S</bold> Son et al</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">RGBD</td>
<td align="left">M-I</td>
<td align="left">Camera pose and Semantic Map</td>
<td align="left">Robotics, AR, and AGV</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">nvidia.Jetson.AGX, 512.core.Volta.GPU</td>
<td align="left">None</td>
</tr>
<tr>
<td align="left">
<bold>OKVIS</bold> Leut et al</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">M-H</td>
<td align="left">Graph estimation and feature tracking</td>
<td align="left">Robotics, UAVs, and VR</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Up-Board, ODROID.xu4, and Intel&#xae; CoreTM.i7</td>
<td align="left">
<xref ref-type="bibr" rid="B39">eth.a (2023a)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>ROVIO</bold> Blo. et al</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">fish.e</td>
<td align="left">L-H</td>
<td align="left">Position and orientation depth map</td>
<td align="left">Robotics, AR, and self-driving. cars</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">ODROID-xu4 and Intel i7-2760QM</td>
<td align="left">
<xref ref-type="bibr" rid="B40">eth.a (2023b)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>VINS.M</bold> Qin et al</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">L-H</td>
<td align="left">Keyframe database pose estimation</td>
<td align="left">Robotics, AR, and VR</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Intel Pentium, Intel Core i7-4790 CPU</td>
<td align="left">
<xref ref-type="bibr" rid="B64">hkust.a (2023)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>LSD.S</bold> Eng et al</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">RGBD</td>
<td align="left">L-H</td>
<td align="left">Keyframe selection and 3D mapping</td>
<td align="left">Robotics and self-driving cars</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">fpga.zynq.7020.soc Intel&#xae; NUC6i3SYH</td>
<td align="left">
<xref ref-type="bibr" rid="B28">CVG, T. U. o. M. (2023)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>DVO.S</bold> Kerl et al</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>&#xd7;</bold>
</td>
<td align="left">RGBD</td>
<td align="left">S-I</td>
<td align="left">3D mapping image alignment</td>
<td>Robotics and AR Perception</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Sony Xperia.z1, Intel Xeon E5520</td>
<td align="left">
<xref ref-type="bibr" rid="B156">tum.v (2023)</xref>
</td>
</tr>
<tr>
<td align="left">
<bold>Kimera.S</bold> Ros. et al</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">
<bold>
<italic>&#x2713;</italic>
</bold>
</td>
<td align="left">Lidar</td>
<td align="left">M-H</td>
<td align="left">Trajectory estimate semantic mesh</td>
<td align="left">Robotics, UAV, VR, and AGV</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">
<bold>&#x2b;&#x2b;&#x2b;&#x2b;&#x2b;</bold>
</td>
<td align="left">Not mentioned</td>
<td align="left">
<xref ref-type="bibr" rid="B101">MIT.S (2023)</xref>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>-ILR, illumination and light robustness&#x2014;evaluates how well each SLAM method responds to varying environmental lighting.</p>
</fn>
<fn>
<p>-RoLI, range of light intensity&#x2014;measures the robot&#x2019;s ability to operate effectively across a broad spectrum of light intensities, from very dark to very bright.</p>
</fn>
<fn>
<p>-T2D, tolerance to directionality&#x2014;assesses the robot&#x2019;s capability to function in environments with strong directional light sources, such as spotlights and windows.</p>
</fn>
<fn>
<p>-W-S, defines the operational scale and application field of the robot (M, medium; L, large; S, small, H, hybrid, I, indoor).</p>
</fn>
<fn>
<p>-S.M, sources and materials&#x2014;provides links to the source codes used in the method.</p>
</fn>
<fn>
<p>-VINS.M.S, VINS-Mono SLAM; M, monocular camera; S, stereo camera; IMU, inertial measurement unit; O, other sensors; fish.e, fish-eye camera; rgbd, RGB-D camera.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The system workflow consists of four sequential stages (<xref ref-type="bibr" rid="B75">Klien and Murray. 2007</xref>; <xref ref-type="bibr" rid="B45">Fern&#xe1;ndez-Moral et al., 2013</xref>). Input preparation and system initialization involve processes such as monocular camera translation and rotation to improve image efficiency and clarity (<xref ref-type="bibr" rid="B32">De Croce et al., 2019</xref>). The tracking process is carried out, where tasks related to image and video processing are performed to prepare data for subsequent mapping procedures. Following that, the optimization and mapping processes are carried out to prepare the map and reveal the outputs, which include the camera pose and the 3D map used in SLAM operations (<xref ref-type="bibr" rid="B75">Klien and Murray. 2007</xref>; <xref ref-type="bibr" rid="B135">Servi&#xe8;res et al., 2021</xref>). All processes and steps are simplified and demonstrated in <xref ref-type="fig" rid="F5">Figure 5</xref>, part 1.</p>
</sec>
<sec id="s3-1-2">
<title>3.1.2 ORB-SLAM</title>
<p>ORB-SLAM stands for oriented FAST (features from accelerated segment test) and rotated BRIEF (binary robust independent elementary features) SLAM (<xref ref-type="bibr" rid="B154">Tourani et al., 2022</xref>). This feature-based detector is applicable in both small and large indoor or outdoor fields (<xref ref-type="bibr" rid="B154">Tourani et al., 2022</xref>). Due to its real-time capabilities and high-quality map reconstruction, it is widely used in applications such as the human&#x2013;robot interaction (HRI) (<xref ref-type="bibr" rid="B104">Mur-Artal et al., 2015</xref>), augmented reality, and autonomous navigation (<xref ref-type="bibr" rid="B177">Zhu et al., 2022</xref>; <xref ref-type="bibr" rid="B165">Yang et al., 2022</xref>). ORB-SLAM is designed to handle robust and unstable motion clutter, covering essential processes such as tracking, mapping, and loop closing (<xref ref-type="bibr" rid="B19">Campos et al., 2021</xref>). Compared to other advanced V-SLAM methods, ORB-SLAM outperforms by enhancing the dynamic, size, and traceability of the map. It achieves real-time global localization from wide baselines, performs camera re-localization from various viewpoints, and makes better selections for frames and points in the reconstruction process (<xref ref-type="bibr" rid="B123">Ragot et al., 2019</xref>; <xref ref-type="bibr" rid="B103">Mur-A and Tars, 2014</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>ORB-SLAM1 categorized to be only-visual (<xref ref-type="bibr" rid="B104">Mur-Artal et al., 2015</xref>; <xref ref-type="bibr" rid="B103">Mur-A and Tars, 2014</xref>), while ORB-SLAM2 expands to both only-visual and RGB-D SLAM (<xref ref-type="bibr" rid="B123">Ragot et al., 2019</xref>; <xref ref-type="bibr" rid="B105">Mur-Artal and Tard&#xf3;s 2017a</xref>). Furthermore, ORB-SLAM3 furthers its classification to include all three categories: only-visual, visual-inertial, and RGB-D SLAM. This expansion underscores the adaptability and versatility of ORB-SLAM in real-life applications (<xref ref-type="bibr" rid="B167">Zang et al., 2023</xref>; <xref ref-type="bibr" rid="B18">Ca et al., 2021</xref>; <xref ref-type="bibr" rid="B19">Campos et al., 2021</xref>).</p>
<p>The ORB-SLAM methodology process goes through four sequential phases (<xref ref-type="bibr" rid="B104">Mur-Artal et al., 2015</xref>; <xref ref-type="bibr" rid="B105">Mur-Artal and Tard&#xf3;s 2017a</xref>; <xref ref-type="bibr" rid="B18">Ca et al., 2021</xref>). The initial phase involves the sensor input and the tracking process (<xref ref-type="bibr" rid="B70">Joo et al., 2020</xref>). Across all ORB-SLAM versions, this phase shares a common approach, focusing on pose preparation and frame generation to facilitate decision-making (<xref ref-type="bibr" rid="B148">Sun et al., 2017</xref>). However, the difference lies in input usage; for example, ORB-SLAM1 uses one input, ORB-SLAM2 uses three, and ORB-SLAM3 uses four (<xref ref-type="bibr" rid="B19">Campos et al., 2021</xref>). Therefore, the quality and efficiency of the next operation depend on the input in the first stage. In the next phase, local mapping is done by adding new keyframes and creating map points with the localization process simultaneously (<xref ref-type="bibr" rid="B18">Ca et al., 2021</xref>). This part remains consistent across all versions, but version 3 enhances its functionality by incorporating additional bundle adjustment for improved feature detection and matching (<xref ref-type="bibr" rid="B29">Dai et al., 2021</xref>). The subsequent phase involves loop closing, process optimization, and selecting similar candidate data in all versions. However, versions 2 and 3 include additional steps such as bundle adjustment welding and map merging (<xref ref-type="bibr" rid="B105">Mur-Artal and Tard&#xf3;s, 2017a</xref>; <xref ref-type="bibr" rid="B167">Zang et al., 2023</xref>). The last stage is preparing the output, focusing on creating the final map that includes essential information such as graphs, lines, point mapping, and 2D and 3D maps for use in the SLAM process (<xref ref-type="bibr" rid="B2">Acosta-Amaya et al., 2023</xref>). <xref ref-type="fig" rid="F5">Figure 5</xref> parts 4, 5, and 6 give a detailed observation about the methods of ORB-SLAM 1, 2, and 3 versions, respectively, showcasing their features and functionalities for a better understanding.</p>
</sec>
<sec id="s3-1-3">
<title>3.1.3 LSD-SLAM</title>
<p>LSD-SLAM, which stands for large-scale direct monocular SLAM, is an advanced technique made for real-time mapping and positioning. It can utilize various camera setups. It is designed for large-scale mapping jobs where it can create a very accurate and detailed map of the working fields. In addition, it stays accurate even with a lower image resolution (<xref ref-type="bibr" rid="B38">Engel et al., 2015</xref>; <xref ref-type="bibr" rid="B45">Fern&#xe1;ndez-Moral et al., 2013</xref>). This flexibility makes it a better choice for operating in complex, wide-ranging and dynamic environments and is used in various applications such as robotics and self-driving cars (<xref ref-type="bibr" rid="B104">Mur-Artal et al., 2015</xref>; <xref ref-type="bibr" rid="B37">Eng et al., 2014</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>LSD-SLAM distinguishes itself from the DTAM-SLAM approach by focusing on areas with strong intensity changes, leaving out regions with little or no texture details. This choice comes from the challenge of figuring out how far things are in areas where there is not much texture inside images. As a result, LSD-SLAM goes beyond what DTAM can do by concentrating on places with strong changes in brightness and ignoring areas with very little texture (<xref ref-type="bibr" rid="B2">Acosta-Amaya et al., 2023</xref>; <xref ref-type="bibr" rid="B74">Khoyani and Amini, 2023</xref>).</p>
<p>LSD and DVO-SLAM processes can function similarly, and their workflow is structured in five stages (<xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>; <xref ref-type="bibr" rid="B93">Luo et al., 2021</xref>; <xref ref-type="bibr" rid="B134">Sch&#xf6;ps et al., 2014</xref>; <xref ref-type="bibr" rid="B38">Engel et al., 2015</xref>). The first stage includes inputting mono- and stereo data and preparing them for the next processing step. The second stage is designed for tracking and estimating the initial pose by aligning images from both mono and stereo cameras. The third stage is dedicated to loop closure processes, involving keyframe preparation, regularization, and data updates to prepare frames for subsequent stages. The fourth stage carries out map optimization, including two critical phases, which are direct mapping and feature-based mapping. It also covers processes such as activation, marginalization, and direct bundle adjustment. These operations shape the necessary map and manage its pointsassesses their performance under varyin with semi-dense adjustments for use in the output stage. In the final stage, the estimated camera trajectory and pose with the dense 3D map are prepared for application in robotics&#x2019; SLAM functions; see <xref ref-type="fig" rid="F5">Figure 5</xref>, part 14 for a detailed workflow.</p>
</sec>
<sec id="s3-1-4">
<title>3.1.4 DVO-SLAM</title>
<p>DVO-SLAM, which stands for dense visual odometry SLAM, is designed to facilitate real-time motion estimation and map creation using depth-sensing devices, such as stereo and mono cameras (<xref ref-type="bibr" rid="B134">Sch&#xf6;ps et al., 2014</xref>). It stands out for its ability to generate detailed and accurate environment maps while tracking the position and orientation (<xref ref-type="bibr" rid="B93">Luo et al., 2021</xref>; <xref ref-type="bibr" rid="B177">Zhu et al., 2022</xref>). DVO-SLAM uses point-to-plane metrics in photo metric bundle adjustment (PBA), enhancing the navigation of robotic systems, especially in situations with less textured points. The point-to-plane metric is a cost function and optimization tool that is used to optimize the depth sensor poses and plane parameters for 3D reconstruction (<xref ref-type="bibr" rid="B4">Alismail et al., 2017</xref>; <xref ref-type="bibr" rid="B176">Zhou et al., 2020</xref>; <xref ref-type="bibr" rid="B110">Newcombe et al., 2011</xref>). These features make DVO-SLAM suitable for more accurate applications such as in robotics and augmented reality (AR), and it is robust for operating in slightly unstable light sources (<xref ref-type="bibr" rid="B74">Khoyani and Amini, 2023</xref>; <xref ref-type="bibr" rid="B73">Kerl et al., 2013</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Visual-inertial SLAM</title>
<p>VI-SLAM is a technique that combines the capabilities of visual sensors, such as stereo cameras, and inertial measurement sensors (IMUs) to achieve its SLAM objectives and operations (<xref ref-type="bibr" rid="B135">Servi&#xe8;res et al., 2021</xref>; <xref ref-type="bibr" rid="B81">Leut et al., 2015</xref>). This hybrid approach allows a comprehensive modeling of the environment, where robots operate (<xref ref-type="bibr" rid="B171">Zhang et al., 2023</xref>). It can be applied to various real-world applications, such as drones and mobile robotics (<xref ref-type="bibr" rid="B150">Taketomi et al., 2017</xref>). The integration of IMU data enhances and augments the information available for environment modeling, resulting in improved accuracy and reduced errors within the system&#x2019;s functioning (<xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>; <xref ref-type="bibr" rid="B106">Mur-Artal and Tard&#xf3;s 2017b</xref>). The methods and algorithms used in this approach, while implemented in real-life applications, can be listed as shown in the following section.</p>
<sec id="s3-2-1">
<title>3.2.1 OKVIS-SLAM</title>
<p>OKVIS-SLAM, which stands for open keyframe-based visual-inertial SLAM, is designed for robotics and computer vision applications that require real-time 3D reconstruction, object tracking, and position estimation (<xref ref-type="bibr" rid="B71">Kasyanov et al., 2017</xref>). It combines visual and inertial measurements to accurately predict the position and orientation of a robot simultaneously (<xref ref-type="bibr" rid="B81">Leut et al., 2015</xref>).</p>
<p>It accurately tracks the camera&#x2019;s position and orientation in real-time control during a robot&#x2019;s motion (<xref ref-type="bibr" rid="B82">Leutenegger, 2022</xref>). It uses image retrieval to connect keyframes in the SLAM pose-graph, aided by the pose estimator for locations beyond the optimization window of visual&#x2013;inertial odometry (<xref ref-type="bibr" rid="B71">Kasyanov et al., 2017</xref>; <xref ref-type="bibr" rid="B160">Wang et al., 2023</xref>). For portability, a lightweight semantic segmentation CNN is used to remove dynamic objects during navigation (<xref ref-type="bibr" rid="B82">Leutenegger, 2022</xref>). OKVIS&#x2019;s real-time precision and resilience make it suitable for various applications, including robotics and unmanned aerial vehicles (UAVs). It can operate effectively in complex and unstable illumination environments (<xref ref-type="bibr" rid="B160">Wang et al., 2023</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>We have structured the OKVIS-SLAM workflow into three key phases (<xref ref-type="bibr" rid="B82">Leutenegger, 2022</xref>; <xref ref-type="bibr" rid="B71">Kasyanov et al., 2017</xref>; <xref ref-type="bibr" rid="B160">Wang et al., 2023</xref>). The first phase focuses on receiving initial sensor inputs, including IMU and visual data. It initializes the system, conducts IMU integration, and employs tracking techniques to prepare the data for subsequent processing. The second phase is the real-time estimator and odometry filtering phase, covering various operations, such as landmark triangulation and status updating. The triangulation process is used for estimation used to generate the 3D position of visual landmarks to enhance SLAM operation (<xref ref-type="bibr" rid="B166">Yousif et al., 2015</xref>). In the last phase, optimization and full graph estimation are performed. This includes loop closure detection, window sliding, and marginalization. The phase selects relevant frames and optimizes the overall graph structure, ultimately providing essential outputs for the SLAM system; see <xref ref-type="fig" rid="F5">Figure 5</xref>, part 11.</p>
</sec>
<sec id="s3-2-2">
<title>3.2.2 ROVIO-SLAM</title>
<p>ROVIO-SLAM, which stands for robust visual-inertial odometry SLAM, is a cutting-edge sensor fusion method that smoothly combines visual and inertial data. This integration significantly enhances navigation accuracy, leading to improved work efficiency in robotics systems (<xref ref-type="bibr" rid="B12">Blo et al., 2015</xref>; <xref ref-type="bibr" rid="B160">Wang et al., 2023</xref>). It brings valuable attributes for robotics, excelling in robust performance in challenging environments, and presents a smooth interaction between the robot and its surroundings (<xref ref-type="bibr" rid="B84">Li et al., 2023a</xref>). It efficiently handles extensive mapping processes, making it suitable for large-scale applications (<xref ref-type="bibr" rid="B71">Kasyanov et al., 2017</xref>). Moreover, it operates with low computational demands and high robustness to light, making it ideal for cost-effective robotic platforms designed for sustained, long-term operations (<xref ref-type="bibr" rid="B82">Leutenegger, 2022</xref>).</p>
<p>ROVIO-SLAM workflow is divided into three stages (<xref ref-type="bibr" rid="B119">Picard et al., 2023</xref>; <xref ref-type="bibr" rid="B112">Nguyen et al., 2020</xref>; <xref ref-type="bibr" rid="B133">Schneider et al., 2018</xref>). First, data from visual cameras and IMU are obtained and prepared for processing. In the next stage, feature detection, tracking, and semantic segmentation are done for visual data, while IMU data are prepared for integration from the other side. The processing stage involves loop closure operations, new keyframes insertion, and state transition, along with data filtering. State transitions lead to the generation of the key output, which is then transferred to the final stage, providing estimated position, orientation, and 3D landmarks; see <xref ref-type="fig" rid="F5">Figure 5</xref>, part 8.</p>
</sec>
<sec id="s3-2-3">
<title>3.2.3 VINS Mono-SLAM</title>
<p>VINS Mono-SLAM, which stands for the visual-inertial navigation system, is an advanced sensor fusion technology that precisely tracks the motion and position of a robot or sensor in real-time. Utilizing only a single camera and an IMU, it combines visual and inertial data to enhance accuracy and ensure precise functionality of robot operations (<xref ref-type="bibr" rid="B106">Mur-Artal and Tard&#xf3;s, 2017b)</xref>. Known for its efficiency in creating maps and minimizing drift errors, VINS-Mono excels in navigating challenging environments with dynamic obstacles (<xref ref-type="bibr" rid="B14">Bruno and Colombini, 2021)</xref>. Its smooth performance in difficult lighting conditions highlights its reliability, ensuring optimal functionality for mobile robots operating in unstable lighting conditions (<xref ref-type="bibr" rid="B142">Song et al., 2022</xref>; <xref ref-type="bibr" rid="B76">Kuang et al., 2022</xref>). This power-efficient, real-time monocular VIO method is suitable for visual SLAM applications in robotics, virtual reality, and augmented reality (<xref ref-type="bibr" rid="B54">Gu et al., 2022</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>The VINS-Mono SLAM workflow is organized into four stages (<xref ref-type="bibr" rid="B122">Qin et al., 2018</xref>; <xref ref-type="bibr" rid="B163">Xu et al., 2021</xref>). In the first stage, we gathered visual and inertial data and prepared them for acquisition and measurement processing, including feature extraction, matching, and IMU data preparation, and sent them for visual and inertial alignment. The second stage handles loop closure operations and re-localization to adjust old states with additional feature retrieval for the next step. The third stage focuses on process optimization, incorporating bundle adjustments and additional propagation for efficiency. The final stage outputs the system&#x2019;s estimated pose and a keyframe database, applicable to SLAM; see <xref ref-type="fig" rid="F5">Figure 5</xref>, part 13.</p>
</sec>
<sec id="s3-2-4">
<title>3.2.4 Kimera-SLAM</title>
<p>Kimera-SLAM is an open-source SLAM technique applied for real-time metric semantic purposes. Its framework is highly dependent on previous methodologies such as ORB-SLAM, VINS-Mono SLAM, OKVIS, and ROVIO-SLAM (<xref ref-type="bibr" rid="B130">Ros. et al., 2020</xref>). Exhibiting robustness in dynamic scenes, particularly in the presence of moving objects (<xref ref-type="bibr" rid="B159">Wang et al., 2022</xref>), Kimera-SLAM showcases resilience to variations in lighting conditions. It operates effectively in both indoor and outdoor settings, making it highly compatible with integration into interactive robotic systems (<xref ref-type="bibr" rid="B131">Rosinol et al., 2021</xref>). In summary, Kimera-SLAM provides a thorough and efficient solution for real-time metric-semantic SLAM, prioritizing accuracy, modality, and robustness in its operations (<xref ref-type="bibr" rid="B131">Rosinol et al., 2021</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>The procedural workflow of this technique can be summarized in five stages (Ros et al. (2020). First, the input pre-processing includes dense 2D semantics, dense stereo, and Kimera-VIO. It also includes front-end and back-end operations such as tracking, feature extraction, and matching, which yield an accurate state estimation. The second stage involves robust pose graph optimization (Kimera-RPGO), tasked with optimization and the formulation of a global trajectory. Subsequently, the third stage features the per-frame and multi-frame 3D mesh generator (Kimera&#x2013;Mesher), responsible for the execution and generation of 3D meshes representing the environment. The fourth stage introduces semantically annotated 3D meshes (Kimera-Semantics), dedicated to generating 3D meshes with semantic annotations. This stage sets the groundwork for the subsequent and final stage, where the generated 3D meshes are utilized for output visualization, ultimately serving SLAM purposes, as illustrated in <xref ref-type="fig" rid="F5">Figure 5</xref>, part 9.</p>
</sec>
</sec>
<sec id="s3-3">
<title>3.3 RGB-D SLAM</title>
<p>RGB-D is an innovative approach that integrates RGB-D cameras with depth sensors to estimate and build models of the environment (<xref ref-type="bibr" rid="B69">Ji et al., 2021</xref>; <xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>). This technique has found applications in various domains, including robotic navigation and perception (<xref ref-type="bibr" rid="B93">Luo et al., 2021</xref>). It demonstrates efficient performance, particularly in well-lit indoor environments, providing valuable insights into the spatial landscape (<xref ref-type="bibr" rid="B29">Dai et al., 2021</xref>).</p>
<p>The incorporation of RGB-D cameras and depth sensors enables the system to capture both color and depth information simultaneously. This capability is advantageous in indoor applications, addressing the challenge of dense reconstruction in areas with low-textured surfaces (<xref ref-type="bibr" rid="B170">Zhang et al., 2021b</xref>). The objective of RGB-D SLAM is to generate a precise 3D reconstruction for the system surroundings, with a focus on the acquisition of geometric data to build a comprehensive 3D model (<xref ref-type="bibr" rid="B21">Chang et al., 2023</xref>). The methods used in this section are listed as follows:</p>
</sec>
<sec id="s3-4">
<title>3.3.1 RTAB-Map SLAM</title>
<p>RTAB-Map SLAM, which stands for real-time appearance-based mapping, is a visual SLAM technique that works with RGB-D and stereo cameras (<xref ref-type="bibr" rid="B123">Ragot et al., 2019</xref>). It is a versatile algorithm that can handle 2D and 3D mapping tasks depending on the sensor and data that are given (<xref ref-type="bibr" rid="B118">Peter et al., 2023</xref>; <xref ref-type="bibr" rid="B2">Acosta-Amaya et al., 2023</xref>). It integrates RGB-D and stereo data for 3D mapping, enabling the detection of static and dynamic 3D objects in the robot&#x2019;s environment (<xref ref-type="bibr" rid="B123">Ragot et al., 2019</xref>). It is applicable in large outdoor environments where LiDAR rays cannot reflect and manage the field around the robot (<xref ref-type="bibr" rid="B55">Gurel, 2018</xref>). Variable lighting and environmental interactions can cause robotic localization and mapping errors. Therefore, RTAB&#x2019;s robustness and adaptability to changing illumination and scenes enable accurate operation in challenging environments. It can handle large, complex environments and is quickly adaptable to work with multiple cameras or laser rangefinders (<xref ref-type="bibr" rid="B85">Li et al., 2018</xref>; <xref ref-type="bibr" rid="B118">Peter et al., 2023)</xref>. Additionally, the integration of T265 (Intel RealSense Camera) and implementation of ultra-wideband (UWB) (<xref ref-type="bibr" rid="B90">Lin and Yeh, 2022</xref>) address robot wheel slippage with drifting error handling, enhancing system efficiency with precise tracking and 3D point cloud generation, as done in <xref ref-type="bibr" rid="B117">Persson et al. (2023</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>The RTAB-MAP SLAM method involves a series of steps that enable it to function (<xref ref-type="bibr" rid="B55">Gurel, 2018</xref>; <xref ref-type="bibr" rid="B78">Labb&#xe9; and Michaud, 2019</xref>). Initially, the hardware and front-end stage is responsible for tasks such as obtaining data from stereo and RGB-D cameras, generating frames, and integrating sensors. This stage prepares the frames that will be used in the subsequent stage. After the frames have been processed simultaneously with the tracking process, the loop closure is activated to generate the necessary odometry. Subsequently, the keyframes equalization and optimization processes are initiated to improve the quality of the 2D and 3D maps generated for SLAM applications, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>, part 7.</p>
<sec id="s3-4-1">
<title>3.3.2 DTAM-SLAM</title>
<p>DTAM-SLAM, which stands for dense tracking and mapping, is a V-SLAM algorithm specified for real-time camera tracking. It provides robust six degrees of freedom (6 DoF) tracking and facilitates efficient environmental modeling for robotic systems (<xref ref-type="bibr" rid="B109">Ne. et al., 2011</xref>; <xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>). This approach plays a fundamental role in advancing applications such as robotics, augmented reality, and autonomous navigation, delivering precise tracking and high-quality map reconstruction. Furthermore, it is slightly dynamic with light; thus, it is accurate to operate in high and strong illumination fields (<xref ref-type="bibr" rid="B177">Zhu et al., 2022</xref>; <xref ref-type="bibr" rid="B165">Yang et al., 2022</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>The DTAM-SLAM workflow is divided into a series of steps, each with its own purpose (<xref ref-type="bibr" rid="B109">Ne et al., 2011</xref>; <xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>). It begins with the input such as the RGB-D camera, which helps initialize the system work. In the camera tracking and reconstruction stage, the system selects frames and estimates textures on the image. It then accurately tracks the 6DoF camera motion, determining its exact position and orientation. Furthermore, the optimization framework is activated and uses techniques such as spatially regularized energy minimization to enhance data terms, thereby improving the image quality that is captured from video streaming. As a result, the advanced process tuning carries out operations that improve the method&#x2019;s performance and producing precise outputs such as dense models, surface patchwork, and texture depth maps (see <xref ref-type="fig" rid="F5">Figure 5</xref>, part 2).</p>
</sec>
<sec id="s3-4-2">
<title>3.3.3 RGBD-SLAM</title>
<p>RGDB-SLAM, which stands for simultaneous localization and mapping using red&#x2013;green&#x2013;blue and depth data, is an important method that creates a comprehensive 3D map containing both static and dynamic elements (<xref ref-type="bibr" rid="B69">Ji et al., 2021</xref>). This method involves the tracking of trajectories and mapping of points associated with moving objects (<xref ref-type="bibr" rid="B145">Steinbr&#xfc;cker et al., 2011</xref>; <xref ref-type="bibr" rid="B113">Niu et al., 2019</xref>). Using these data types enhances and provides precise SLAM results (<xref ref-type="bibr" rid="B36">End et al., 2012</xref>; <xref ref-type="bibr" rid="B86">Li Q. et al., 2022a</xref>). It has the ability to create registered point clouds or OctoMaps for the purpose that can be used for robotic systems (<xref ref-type="bibr" rid="B169">Zhang and Li 2023</xref>; <xref ref-type="bibr" rid="B127">Ren et al., 2022</xref>). In robotics applications, RGB-D SLAM, specifically V-SLAM, excels in both robustness and accuracy. It effectively addresses challenges such as working in a dynamic environment (<xref ref-type="bibr" rid="B145">Steinbr&#xfc;cker et al., 2011</xref>; <xref ref-type="bibr" rid="B113">Niu et al., 2019</xref>). The implementation of RGB-D SLAM faced a challenge in balancing segmentation accuracy, system load, and the number of detected classes from images. This challenge was tackled using TensorRT, optimized by YOLOX for high-precision real-time object recognition (<xref ref-type="bibr" rid="B21">Chang et al., 2023</xref>; <xref ref-type="bibr" rid="B97">Mart&#xed;nez-Otzeta et al., 2022</xref>). It has versatile applications in real-world robotics scenarios, including autonomous driving cars, mobile robotics, and augmented reality (<xref ref-type="bibr" rid="B169">Zhang and Li, 2023</xref>; <xref ref-type="bibr" rid="B10">Bahraini et al., 2018</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>The RGB-D SLAM workflow can be organized into five essential stages, each playing a crucial role in the SLAM process (<xref ref-type="bibr" rid="B69">Ji et al., 2021</xref>; <xref ref-type="bibr" rid="B58">Hast&#xfc;rk and Erkmen, 2021</xref>; <xref ref-type="bibr" rid="B36">End et al., 2012</xref>). The initial stage involves data acquisition, where RGB-D and depth camera data are collected as the foundational input for subsequent stages. Moving on to the second stage, processing of RGB-D details was activated. During this phase, tasks include feature extraction and pairwise matching while simultaneously addressing depth-related activities, such as storing point clouds, and aligning lines or shapes. In the third stage, activities such as noise removal and semantic segmentation (SS), in addition to loop closure detection, are performed to lay the groundwork for map construction. The fourth stage is dedicated to focus on pose estimation and optimization techniques, leading to improvement in the accuracy of the system output. The final stage involves generating trajectory estimation and maps, refining the outputs for use in SLAM applications in robotic systems; see <xref ref-type="fig" rid="F5">Figure 5</xref>, part 3.</p>
</sec>
<sec id="s3-4-3">
<title>3.3.4 SCE-SLAM</title>
<p>SCE-SLAM, which stands for spatial coordinate errors SLAM, represents an innovative real-time semantic RGB-D SLAM technique. It has been developed to tackle the constraints posed by traditional SLAM systems when operating in dynamic environments (<xref ref-type="bibr" rid="B83">Li et al., 2020</xref>). The method was improved to increase the performance of existing V-SLAM methods such as ORB-SLAM3 and makes it useful with greater accuracy and robustness in dynamic situations with the help of merging semantic and geometric data and leveraging YOLOv7 for quick object recognition (<xref ref-type="bibr" rid="B161">Wu et al., 2022</xref>). Thanks to these improvements, the SLAM algorithms can be well-suited for dynamic scenarios which allows in greater adaptability and comprehension of system surroundings. This enables robotic systems to operate in more complex circumstances with the fewer mistakes or slippage errors (<xref ref-type="bibr" rid="B91">Liu and Miura, 2021</xref>). Moreover, robots equipped with SCE-SLAM are empowered to operate in a more flexible and error-reduced manner, and it can operate in challenging light environments (<xref ref-type="bibr" rid="B141">Son et al., 2023</xref>; <xref ref-type="bibr" rid="B127">Ren et al., 2022</xref>); see <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<p>The SCE-SLAM workflow is divided into three key stages (<xref ref-type="bibr" rid="B141">Son et al., 2023</xref>). The first stage involves the semantic module. This module processes camera input data and employs Yolov2 to remove noise from the input. The second stage is the geometry module, where depth image analysis and spatial coordinate recovery are performed, preparing the system for integration with ORB-SLAM3. The final stage is dedicated to the integration of ORB-SLAM3. This integration facilitates the execution of processes within ORB-SLAM3. The process works in parallel with the loop closure technique, which results in a more accurate and precise system output; see <xref ref-type="fig" rid="F5">Figure 5</xref>, Part 12.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<title>4 Visual SLAM evolution and datasets</title>
<p>The roots of SLAM can be traced back to nearly three decades ago, when it was first introduced by Smith et al. <xref ref-type="bibr" rid="B119">Picard et al. (2023)</xref>; <xref ref-type="bibr" rid="B74">Khoyani and Amini (2023)</xref>. Recently, visual SLAM has changed a lot and made a big impact on robotics and computer vision (<xref ref-type="bibr" rid="B74">Khoyani and Amini, 2023</xref>). Along this journey, different V-SLAM methods have been created to tackle specific challenges in robot navigation, mapping, and understanding the surroundings (<xref ref-type="bibr" rid="B5">Aloui et al., 2022</xref>; <xref ref-type="bibr" rid="B148">Sun et al., 2017</xref>). To verify and compare these V-SLAM methods, important datasets have been created which played a crucial role in the field (<xref ref-type="bibr" rid="B115">Pal et al., 2022</xref>; <xref ref-type="bibr" rid="B152">Tian et al., 2023a</xref>). In this section, we explore the evolution of V-SLAM methods over time and how they have advanced with the help of using the suitable datasets.</p>
<p>To offer a more comprehensible perspective, we provide an illustrative timeline depicting the evolution of the most well-known V-SLAM methods, as shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. This graphical representation illustrates the development of the V-SLAM methodologies from 2007 to 2021. These methods have been applied in various fields, including agriculture, healthcare, and industrial sectors, with a specific focus on interactive mobile robots. Additionally, we highlight several significant and widely recognized benchmark datasets crucial to V-SLAM, as shown in the following section.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Timeline illustrates the evolutionary journey of SLAM techniques, accompanied by the datasets that have played a pivotal role in their development. It showcases the dynamic progression of SLAM technologies over time, reflecting the symbiotic relationship between innovative methods and the rich variety of datasets they have been tested and refined with.</p>
</caption>
<graphic xlink:href="frobt-11-1347985-g006.tif"/>
</fig>
<sec id="s4-1">
<title>4.1 TUM RGB-D dataset</title>
<p>The TUM RGB-D dataset is a widely used resource in the field of V-SLAM, which helps demonstrate the effectiveness and practicality of V-SLAM techniques. This dataset provides both RGB images and depth maps, with the RGB images saved in a 640 &#xd7; 480 8-bit format and the depth maps in a 640 &#xd7; 480 16-bit monochrome (<xref ref-type="bibr" rid="B24">Chu et al., 2018</xref>). It offers RGB-D data, making it appropriate for both depth-based and V-SLAM techniques. Its usefulness extends to essential tasks such as mapping and odometry, providing researchers with a considerable volume of data for testing SLAM algorithms across diverse robotic applications (<xref ref-type="bibr" rid="B69">Ji et al., 2021</xref>; <xref ref-type="bibr" rid="B36">End et al., 2012</xref>). The adaptability of these datasets is remarkable, as they find application in mobile robotics and handheld platforms, demonstrating effectiveness in both indoor and outdoor environments (<xref ref-type="bibr" rid="B97">Mart&#xed;nez-Otzeta et al., 2022</xref>; <xref ref-type="bibr" rid="B141">Son et al., 2023</xref>).</p>
<p>Some of the recent studies used TUM datasets, such as in <xref ref-type="bibr" rid="B89">Li et al. (2023c)</xref>. They have leveraged the TUM RGB-D dataset to establish benchmarks customized to their specific research objectives. The study initiated its investigations with RGB-D images and ground truth poses provided by the TUM datasets, utilizing them to construct 3D scenes characterized with real space features. The integrative role assumed by the TUM RGB-D dataset in this context attains profound significance as a fundamental resource within the domain of V-SLAM research. For more details, refer to the TUM RGB-D SLAM dataset.</p>
</sec>
<sec id="s4-2">
<title>4.2 EuRoC MAV benchmark dataset</title>
<p>The EuRoC MAV benchmark dataset is specifically designed for micro aerial vehicles (MAVs) and contributes a valuable resource in the domain of MAV-SLAM research since it includes sensor data such as IMU and visual data such as stereo images. These datasets, published in early 2016, are made accessible for research purposes and offer a diverse usability in indoor and outdoor applications. Consequently, it serves as a relevant choice for evaluating MAV navigation and mapping algorithms, particularly in conjunction with various visual V-SLAM methodologies (<xref ref-type="bibr" rid="B136">Sharafutdinov et al., 2023</xref>; <xref ref-type="bibr" rid="B82">Leutenegger, 2022</xref>; <xref ref-type="bibr" rid="B15">Burri et al., 2016</xref>).</p>
<p>The EuRoC MAV benchmark dataset, of notable benefits to robotics, is particularly valuable for researchers working on visual-inertial localization algorithms like OpenVINS (<xref ref-type="bibr" rid="B51">Geneva et al., 2020</xref>; <xref ref-type="bibr" rid="B147">Sumikura et al., 2019</xref>) and ORB-SLAM2 (<xref ref-type="bibr" rid="B105">Mur-Artal and Tard&#xf3;s, 2017a</xref>). This dataset incorporates synchronized stereo images, IMU measurements, and precise ground truth data, providing comprehensive resources for algorithm development. Its comprehensive data structure makes it highly suitable for thoroughly testing and validating algorithms tailored for MAV purposes (<xref ref-type="bibr" rid="B15">Burri et al., 2016</xref>). For more details, refer to the EuRoC MAV dataset.</p>
</sec>
<sec id="s4-3">
<title>4.3 KITTI dataset</title>
<p>The KITTI dataset is a widely utilized resource in robotics navigation and SLAM, with a particular emphasis on V-SLAM. Designed for outdoor SLAM applications in urban environments, KITTI integrates data from multiple sensors, including depth cameras, lidar, GPS, and inertial measurement unit (IMU), contributing to the delivery of precise results for robotic applications (<xref ref-type="bibr" rid="B49">Geiger et al., 2013</xref>). Its versatility extends to supporting diverse research objectives such as 3D object detection, semantic segmentation, moving object detection, visual odometry, and road-detection algorithms (<xref ref-type="bibr" rid="B160">Wang et al., 2023</xref>; <xref ref-type="bibr" rid="B124">Raikwar et al., 2023</xref>).</p>
<p>As a valuable asset, researchers routinely rely on the KITTI dataset to evaluate the effectiveness of V-SLAM techniques in real-time tracking scenarios. In addition, it serves as an essential tool for researchers and developers engaged in the domains of self-driving cars and mobile robotics (<xref ref-type="bibr" rid="B50">Geiger et al., 2012</xref>; <xref ref-type="bibr" rid="B114">Ortega-Gomez et al., 2023</xref>). Furthermore, its adaptability facilitates the evaluation of sensor configurations, thereby contributing to the refinement and assessment of algorithms crucial to these fields <xref ref-type="bibr" rid="B49">Geiger et al. (2013)</xref>. For more details, refer to the KITTI Vision Benchmark Suite.</p>
</sec>
<sec id="s4-4">
<title>4.4 Bonn RGB-D dynamic dataset</title>
<p>The Bonn dataset is purposefully designed for RGB-D SLAM, containing dynamic sequences of objects. It showcases RGB-D data accompanied by a 3D point cloud representing the dynamic environment, which has the same format as TUM RGB-D datasets (<xref ref-type="bibr" rid="B116">Palazzolo et al., 2019</xref>). It covers both indoor and outdoor scenarios, extending beyond the boundaries of controlled environments. It proves valuable for developing and evaluating algorithms related to tasks such as robot navigation, object recognition, and scene understanding. Significantly, this dataset is versatile enough to address the complexities of applications used in light-challenging areas (<xref ref-type="bibr" rid="B139">Soares et al., 2021</xref>; <xref ref-type="bibr" rid="B69">Ji et al., 2021</xref>). In addition, it proves to be an important resource for evaluating V-SLAM techniques characterized by high dynamism and crowds where the robot might face the challenge of object detection and interaction with the surrounding environment (<xref ref-type="bibr" rid="B29">Dai et al., 2021</xref>; <xref ref-type="bibr" rid="B164">Yan et al., 2022</xref>). For more details, refer to the Bonn RGB-D dynamic dataset.</p>
</sec>
<sec id="s4-5">
<title>4.5 ICL-NUIM dataset</title>
<p>It is a benchmark dataset which is designed for RGB-D applications, serving as a valuable tool for evaluating RGB-D, visual odometry, and V-SLAM algorithms, particularly in indoor situations (<xref ref-type="bibr" rid="B57">Handa et al., 2014</xref>). It includes 3D sensor data and ground truth poses, facilitating the benchmarking of techniques related to mapping, localization, and object detection in the domain of robotic systems. Its pre-rendered sequences, scripts for generating test data, and standardized data formats are beneficial for researchers in evaluating and improving their SLAM algorithms (<xref ref-type="bibr" rid="B22">Chen et al., 2020</xref>). A unique aspect of the ICL-NUIM dataset is its inclusion of a three-dimensional model. This feature empowers researchers to explore and devise new scenarios for robotic systems, which operates in unknown environments. Moreover, it promotes improvements in V-SLAM, which makes it possible to generate semantic maps that improve robots&#x2019; flexibility and adaptability to integration into that environment easily and flexibly (<xref ref-type="bibr" rid="B168">Zhang et al., 2021a</xref>). For more details, refer to the ICL-NUIM dataset.</p>
</sec>
</sec>
<sec id="s5">
<title>5 Guidelines for evaluating and selecting visual SLAM methods</title>
<p>Choosing the right visual SLAM algorithm is crucial for building an effective SLAM system. With the continuous advancements in V-SLAM methodologies responding to diverse challenges, it is essential to navigate structured criteria to deploy and implement precise solutions (<xref ref-type="bibr" rid="B120">Placed et al., 2023</xref>; <xref ref-type="bibr" rid="B144">Sousa et al., 2023</xref>). In the context of robotic systems, we provide important parameters. We outline them by offering concise explanations of the selection criteria that guide how to choose suitable SLAM methods for field applications. These parameters are listed below.</p>
<sec id="s5-1">
<title>5.1 Robustness and accuracy</title>
<p>When choosing among V-SLAM methods, a key consideration is the robustness and accuracy of the method (<xref ref-type="bibr" rid="B177">Zhu et al., 2022</xref>). In particular, a robust algorithm can handle sensor noise, obstacles, and changing environments to ensure continuous and reliable operation (<xref ref-type="bibr" rid="B13">Bongard, 2008</xref>). Additionally, accuracy is equally important for creating precise maps and localization, allowing the robot to make informed decisions and move through the environment without errors (<xref ref-type="bibr" rid="B77">Kucner et al., 2023</xref>; <xref ref-type="bibr" rid="B107">Nakamura et al., 2023</xref>). These qualities collectively enhance the algorithm&#x2019;s reliability in challenging real-world situations, making them crucial factors for successful mobile robotic applications.</p>
</sec>
<sec id="s5-2">
<title>5.2 Computational efficiency and real-time requirements</title>
<p>In the application of mobile robotics, the selection of the SLAM algorithm is extremely important, focusing on the efficiency of the process happening inside the robot&#x2019;s computational architecture (<xref ref-type="bibr" rid="B95">Macario Barros et al., 2022</xref>). Therefore, the chosen V-SLAM algorithm must be carefully tailored to meet the computational demands imposed by the real-time constraints of the robot. This entails a delicate balancing act as the selected algorithm should be seamlessly integrated with the available processing power and hardware resources, all while satisfying the stringent real-time requirements of the application. The critical consideration for this step is the quality of the sensors, the professors, and/or computers so that they can generate a quick response and accurate localization in a very limited time (<xref ref-type="bibr" rid="B61">Henein et al., 2020</xref>).</p>
</sec>
<sec id="s5-3">
<title>5.3 Flexible hardware integration</title>
<p>In robotic applications, it is important for researchers to choose a SLAM algorithm that works well with the robot&#x2019;s sensors. Integrating suitable hardware improves speed and performance in SLAM systems through accelerators, method optimization, and energy-efficient designs (<xref ref-type="bibr" rid="B42">Eyvazpour et al., 2023</xref>). Various V-SLAM algorithms are designed for specific sensor types such as RGB-D, lidar, and stereo cameras. This facilitates seamless integration into the SLAM system, enhancing the functionality of utilizing integrated hardware (<xref ref-type="bibr" rid="B159">Wang et al., 2022</xref>). Moreover, the availability of ROS packages and open-source software for sensors and cameras provides increased modality and flexibility during system installation. This, in turn, enhances adaptability and makes integration easy and free of challenges (<xref ref-type="bibr" rid="B136">Sharafutdinov et al., 2023</xref>; <xref ref-type="bibr" rid="B129">Roch et al., 2023</xref>). For example, the OAK-D Camera, also known as the OpenCV AI Kit, is a smart camera that is great for indoor use. It can automatically process data files and use neural reasoning right inside the camera, without needing extra computer power from the robot. This means it can run neural network models without making the robot&#x2019;s operating system work harder (<xref ref-type="bibr" rid="B56">Han et al., 2023</xref>).</p>
</sec>
<sec id="s5-4">
<title>5.4 System scalability</title>
<p>In SLAM algorithms for robotics, scalability is a vital factor to keep in mind during the design of the system Middleware architecture. It enables rapid situational awareness over large areas, supports flexible dense metric-semantic SLAM in multi-robot systems, and facilitates fast map learning in unknown environments (<xref ref-type="bibr" rid="B20">Castro, 2021</xref>). This parameter needs to evaluate the algorithm&#x2019;s capability to adjust to different mapping sizes and environmental conditions, particularly considering light emission, video, and/or image clarity. It should also provide versatility for various application needs, applicable to both indoor and outdoor scenarios (<xref ref-type="bibr" rid="B79">Laidlow et al., 2019</xref>; <xref ref-type="bibr" rid="B171">Zhang et al., 2023</xref>).</p>
</sec>
<sec id="s5-5">
<title>5.5 Adapting to dynamic environments</title>
<p>The ability of a SLAM algorithm to handle dynamic objects in the environment is an important consideration for robotics systems. This parameter assesses the algorithm&#x2019;s ability to detect, track, and incorporate dynamic objects and moving obstacles into the mapping process (<xref ref-type="bibr" rid="B92">Lopez et al., 2020</xref>). It focuses on the algorithm&#x2019;s capability to enable the robot to handle these objects effectively and respond quickly during the ongoing SLAM process (<xref ref-type="bibr" rid="B161">Wu et al., 2022</xref>). A robust dynamic environment should ensure the algorithm&#x2019;s ability to adapt and respond in real-time applications. This is crucial for systems operating in environments where changes occur instantaneously, such as in interactive robotics applications (<xref ref-type="bibr" rid="B85">Li et al., 2018</xref>).</p>
</sec>
<sec id="s5-6">
<title>5.6 Open-source availability and community support</title>
<p>When choosing a SLAM algorithm for our project, it is important to observe whether it is open-source and has a community of active users. It is important because it makes it easier to customize and adapt the system according to our needs, benefiting from the experiences of the user community (<xref ref-type="bibr" rid="B74">Khoyani and Amini 2023</xref>; <xref ref-type="bibr" rid="B162">Xiao et al., 2019</xref>). Additionally, having community support ensures that the algorithm receives updates, bug fixes, and improvements. This enhances the reliability and longevity of the algorithm, making it better equipped to handle challenges during system implementation (<xref ref-type="bibr" rid="B117">Persson et al., 2023</xref>).</p>
</sec>
<sec id="s5-7">
<title>5.7 Map data representation and storage</title>
<p>This parameter focuses on how a SLAM algorithm is represented and manages maps, allowing the researcher to determine its suitability for system hardware implementation. The evaluation includes the chosen method&#x2019;s map representation, whether it is grid-based, feature-based, or point cloud, helping in assessing the efficiency of storing map information in the robotic system without encountering challenges (<xref ref-type="bibr" rid="B117">Persson et al., 2023</xref>; <xref ref-type="bibr" rid="B2">Acosta-Amaya et al., 2023</xref>). The selection of map representation influences memory usage and computational demands. It is a critical factor for robotic applications, especially those based on CNN and deep learning approaches (<xref ref-type="bibr" rid="B33">Duan et al., 2019</xref>).</p>
<p>In conclusion, we have summarized the preceding details in <xref ref-type="table" rid="T2">Table 2</xref>, offering a comprehensive overview of various V-SLAM algorithms. This table serves as a valuable resource for informed algorithm selection with comparative details for each method. It offers insights into the sensor capabilities, examining the types of sensors most effectively used by each algorithm and their role in facilitating algorithmic functionality. Moreover, the table underscores the potential application domains of the methods, empowering researchers to align their research objectives with suitable V-SLAM methodologies. The table also classifies algorithms based on their mapping scale distinguishing between small-scale (up to 100 m), medium-scale (up to 500 m), and large-scale (1 km and beyond) mapping capabilities (<xref ref-type="bibr" rid="B153">Tian et al., 2023b</xref>; <xref ref-type="bibr" rid="B65">Hong et al., 2021</xref>).</p>
<p>It also assesses their performance under varying illumination conditions, classifying algorithms based on their robustness, with categories ranging from the lowest, which represents <bold>(&#x2b;)</bold> and to the highest which represents <bold>(&#x2b;&#x2b;&#x2b;&#x2b;&#x2b;)</bold>. Additionally, the table categorizes the algorithms based on their range of light intensity (RoLI), which reflects the robot&#x2019;s ability to operate effectively in diverse lighting conditions, spanning from very dim to extremely bright. Moreover, the tolerance to directionality (T2D) category assesses the algorithm&#x2019;s ability to function in environments with strong directional light sources, such as spotlights and windows. Collectively, these criteria collectively furnish a valuable resource for researchers seeking to pick the most fitting SLAM approach for their specific research endeavors.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s6">
<title>6 Conclusion</title>
<p>The study simplifies the evaluation of V-SLAM methods, making it easy to understand their behavior and suitability for robotics applications. It covers various active V-SLAM methods, each with unique strengths, limitations, specialized use cases, and special workflows. It has served as a solid foundation for the proposed research methodology for selection among V-SLAM methods. Throughout the research, it becomes evident that V-SLAM&#x2019;s evolution is importantly linked to the availability of benchmark datasets, serving as a ground base for method validation. Consequently, the work has laid a strong foundation for understanding the system behavior of the working V-SLAM methods. It explores SLAM techniques that operate in the ROS environment, offering flexibility in simplifying the architecture of robotic systems. The study includes the identification of suitable algorithms and sensor fusion approaches relevant to researchers&#x2019; work.</p>
<p>By examining previous studies, we identified the potential benefits of incorporating V-SLAM software tools into the system architecture. Additionally, the integration of hardware tools such as the T265 camera and OAK-D camera emerged as a valuable strategy. This integration has a significant potential in reducing errors during robot navigation, thereby enhancing overall system robustness.</p>
</sec>
</body>
<back>
<sec id="s7">
<title>Author contributions</title>
<p>BA: investigation, software, supervision, and writing&#x2013;review and editing. TH: data curation, methodology, conceptualization, validation, investigation, resources, visualization, writing&#x2013;review and editing. AA: methodology, formal analysis, validation, investigation, visualization, software, writing&#x2013;review and editing. AA&#x2013;H: methodology, supervision, project administration, validation, funding acquisition, resources, writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This work is funded and supported by the Federal Ministry of Education and Research of Germany (BMBF) (AutoKoWAT-3DMAt under grant No. 13N16336) and German Research Foundation (DFG) under grants Al 638/15-1.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abbad</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Haouala</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Raisov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Benkredda</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Low cost mobile navigation using 2d-slam in complex environments</article-title>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Acosta-Amaya</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Cadavid-Jimenez</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Jimenez-Builes</surname>
<given-names>J. A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Three-dimensional location and mapping analysis in mobile robotics based on visual slam methods</article-title>. <source>J. Robotics</source> <volume>2023</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1155/2023/6630038</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ai</surname>
<given-names>Y.-b.</given-names>
</name>
<name>
<surname>Rui</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>X.-q.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.-l.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.-b.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Visual slam in dynamic environments based on object detection</article-title>. <source>Def. Technol.</source> <volume>17</volume>, <fpage>1712</fpage>&#x2013;<lpage>1721</lpage>. <pub-id pub-id-type="doi">10.1016/j.dt.2020.09.012</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Alismail</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Browning</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Lucey</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Photometric bundle adjustment for vision-based slam</article-title>,&#x201d; in <conf-name>Computer Vision&#x2013;ACCV 2016: 13th Asian Conference on Computer Vision</conf-name>, <conf-loc>Taipei, Taiwan</conf-loc>, <conf-date>November 20-24, 2016</conf-date> (<publisher-name>Springer</publisher-name>), <fpage>324</fpage>&#x2013;<lpage>341</lpage>. <comment>Revised Selected Papers, Part IV</comment>.</citation>
</ref>
<ref id="B5">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Aloui</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Guizani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hammadi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Haddar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Soriano</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Systematic literature review of collaborative slam applied to autonomous mobile robots</article-title>,&#x201d; in <conf-name>2022 IEEE Information Technologies and Smart Industrial Systems (ITSIS)</conf-name>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Altawil</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Can</surname>
<given-names>F. C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Design and analysis of a four dof robotic arm with two grippers used in agricultural operations</article-title>. <source>Int. J. Appl. Math. Electron. Comput.</source> <volume>11</volume>, <fpage>79</fpage>&#x2013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.18100/ijamec.1217072</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arfa</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Study and implementation of LiDAR-based SLAM algorithm and map-based autonomous navigation for a telepresence robot to be used as a chaperon for smart laboratory requirements</article-title>. <source>Master&#x2019;s thesis</source>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aslan</surname>
<given-names>M. F.</given-names>
</name>
<name>
<surname>Durdu</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Yusefi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sabanci</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Sungur</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A tutorial: mobile robotics, slam, bayesian filter, keyframe bundle adjustment and ros applications</article-title>. <source>Robot Operating Syst. (ROS) Complete Reference</source> <volume>6</volume>, <fpage>227</fpage>&#x2013;<lpage>269</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Awais</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Henrich</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Human-robot collaboration by intention recognition using probabilistic state machines</article-title> , <fpage>75</fpage>&#x2013;<lpage>80</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bahraini</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Bozorg</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rad</surname>
<given-names>A. B.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Slam in dynamic environments via ml-ransac</article-title>. <source>Mechatronics</source> <volume>49</volume>, <fpage>105</fpage>&#x2013;<lpage>118</lpage>. <pub-id pub-id-type="doi">10.1016/j.mechatronics.2017.12.002</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Beghdadi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mallem</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A comprehensive overview of dynamic visual slam and deep learning: concepts, methods and challenges</article-title>. <source>Mach. Vis. Appl.</source> <volume>33</volume>, <fpage>54</fpage>. <pub-id pub-id-type="doi">10.1007/s00138-022-01306-w</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Blo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Omari</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hutter</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Siegwart</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Robust visual inertial odometry using a direct ekf-based approach</article-title>,&#x201d; in <conf-name>2015 IEEE/RSJ international conference on intelligent robots and systems (IROS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>298</fpage>&#x2013;<lpage>304</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Bongard</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2008</year>). <source>Probabilistic robotics. sebastian thrun, wolfram burgard, and dieter fox</source>. <publisher-loc>Cambridge, MA, United States</publisher-loc>: <publisher-name>MIT press</publisher-name>, <fpage>647</fpage>. <comment>2005</comment>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bruno</surname>
<given-names>H. M. S.</given-names>
</name>
<name>
<surname>Colombini</surname>
<given-names>E. L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Lift-slam: a deep-learning feature-based monocular visual slam method</article-title>. <source>Neurocomputing</source> <volume>455</volume>, <fpage>97</fpage>&#x2013;<lpage>110</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2021.05.027</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Burri</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Nikolic</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gohl</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Schneider</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Rehder</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Omari</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>The euroc micro aerial vehicle datasets</article-title>. <source>Int. J. Robotics Res.</source> <volume>35</volume>, <fpage>1157</fpage>&#x2013;<lpage>1163</lpage>. <pub-id pub-id-type="doi">10.1177/0278364915620033</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bustos</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Chin</surname>
<given-names>T.-J.</given-names>
</name>
<name>
<surname>Eriksson</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Reid</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Visual slam: why bundle adjust?</article-title>,&#x201d; in <conf-name>2019 international conference on robotics and automation (ICRA)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>2385</fpage>&#x2013;<lpage>2391</lpage>.</citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Buyval</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Afanasyev</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Magid</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Comparative analysis of ros-based monocular slam methods for indoor navigation</article-title>,&#x201d; in <conf-name>Ninth International Conference on Machine Vision (ICMV 2016)</conf-name> (<publisher-name>SPIE</publisher-name>), <fpage>305</fpage>&#x2013;<lpage>310</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ca</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Elvira</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rodr&#xed;guez</surname>
<given-names>J. J. G.</given-names>
</name>
<name>
<surname>Montiel</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Tard&#xf3;s</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Orb-slam3: an accurate open-source library for visual, visual&#x2013;inertial, and multimap slam</article-title>. <source>IEEE Trans. Robotics</source> <volume>37</volume>, <fpage>1874</fpage>&#x2013;<lpage>1890</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2021.3075644</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Campos</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Elvira</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rodr&#xed;guez</surname>
<given-names>J. J. G.</given-names>
</name>
<name>
<surname>Montiel</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Tard&#xf3;s</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Orb-slam3: an accurate open-source library for visual, visual&#x2013;inertial, and multimap slam</article-title>. <source>IEEE Trans. Robotics</source> <volume>37</volume>, <fpage>1874</fpage>&#x2013;<lpage>1890</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2021.3075644</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="thesis">
<person-group person-group-type="author">
<name>
<surname>Castro</surname>
<given-names>G. I.</given-names>
</name>
</person-group> (<year>2021</year>). <source>Scalability and consistency improvements in SLAM systems with applications in active multi-robot exploration</source>. <comment>Ph.D. thesis</comment> (<publisher-name>FACULTY OF EXACT AND NATURAL SCIENCES DEPARTMENT OF COMPUTATION&#xd3;N Improvements</publisher-name>).</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Yolov4-tiny-based robust rgb-d slam approach with point and surface feature fusion in complex indoor environments</article-title>. <source>J. Field Robotics</source> <volume>40</volume>, <fpage>521</fpage>&#x2013;<lpage>534</lpage>. <pub-id pub-id-type="doi">10.1002/rob.22145</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Weng</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Advanced mapping robot and high-resolution dataset</article-title>. <source>Robotics Aut. Syst.</source> <volume>131</volume>, <fpage>103559</fpage>. <pub-id pub-id-type="doi">10.1016/j.robot.2020.103559</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Davis</surname>
<given-names>T. A.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>On the tunable sparse graph solver for pose graph optimization in visual slam problems</article-title>,&#x201d; in <conf-name>2019 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1300</fpage>&#x2013;<lpage>1306</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chu</surname>
<given-names>P. M.</given-names>
</name>
<name>
<surname>Sung</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cho</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Generative adversarial network-based method for transforming single rgb image into 3d point cloud</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>1021</fpage>&#x2013;<lpage>1029</lpage>. <pub-id pub-id-type="doi">10.1109/access.2018.2886213</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chung</surname>
<given-names>C.-M.</given-names>
</name>
<name>
<surname>Tseng</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>X.-Q.</given-names>
</name>
<name>
<surname>Hua</surname>
<given-names>Y.-H.</given-names>
</name>
<name>
<surname>Yeh</surname>
<given-names>J.-F.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). &#x201c;<article-title>Orbeez-slam: a real-time monocular visual slam with orb features and nerf-realized mapping</article-title>,&#x201d; in <conf-name>2023 IEEE International Conference on Robotics and Automation (ICRA)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>9400</fpage>&#x2013;<lpage>9406</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Civera</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>G&#xe1;lvez-L&#xf3;pez</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Riazuelo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tard&#xf3;s</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Montiel</surname>
<given-names>J. M. M.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>Towards semantic slam using a monocular camera</article-title>,&#x201d; in <conf-name>2011 IEEE/RSJ international conference on intelligent robots and systems</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1277</fpage>&#x2013;<lpage>1284</lpage>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Bow3d: bag of words for real-time loop closing in 3d lidar slam</article-title>. <source>IEEE Robotics Automation Lett.</source> <volume>8</volume>, <fpage>2828</fpage>&#x2013;<lpage>2835</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2022.3221336</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="web">
<collab>CVG, T. U. o. M.</collab> (<year>2023</year>). <article-title>LSD-SLAM: large-scale direct monocular SLAM</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://cvg.cit.tum.de/research/vslam/lsdslam?redirect">https://cvg.cit.tum.de/research/vslam/lsdslam?redirect</ext-link>.</comment>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Rgb-d slam with moving object tracking in dynamic environments</article-title>. <source>IET Cyber-Systems Robotics</source> <volume>3</volume>, <fpage>281</fpage>&#x2013;<lpage>291</lpage>. <pub-id pub-id-type="doi">10.1049/csy2.12019</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="web">
<collab>[Dataset] uz.slaml</collab> (<year>2023</year>). <article-title>ORB-SLAM3</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/UZ-SLAMLab/ORB_SLAM3">https://github.com/UZ-SLAMLab/ORB_SLAM3</ext-link>.</comment>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Davison</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Reid</surname>
<given-names>I. D.</given-names>
</name>
<name>
<surname>Molton</surname>
<given-names>N. D.</given-names>
</name>
<name>
<surname>Stasse</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Monoslam: real-time single camera slam</article-title>. <source>IEEE Trans. pattern analysis Mach. Intell.</source> <volume>29</volume>, <fpage>1052</fpage>&#x2013;<lpage>1067</lpage>. <pub-id pub-id-type="doi">10.1109/tpami.2007.1049</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Croce</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pire</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Bergero</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ds-ptam: distributed stereo parallel tracking and mapping slam system</article-title>. <source>J. Intelligent Robotic Syst.</source> <volume>95</volume>, <fpage>365</fpage>&#x2013;<lpage>377</lpage>. <pub-id pub-id-type="doi">10.1007/s10846-018-0913-6</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Junginger</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Thurow</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Deep learning for visual slam in transportation robotics: a review</article-title>. <source>Transp. Saf. Environ.</source> <volume>1</volume>, <fpage>177</fpage>&#x2013;<lpage>184</lpage>. <pub-id pub-id-type="doi">10.1093/tse/tdz019</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Durrant-Whyte</surname>
<given-names>H. F.</given-names>
</name>
</person-group> (<year>2012</year>). <source>Integration, coordination and control of multi-sensor robot systems</source>, <volume>36</volume>. <publisher-name>Springer Science and Business Media</publisher-name>.</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>El Bouazzaoui</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Rodriguez</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vincke</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>El Ouardi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Indoor visual slam dataset with various acquisition modalities</article-title>. <source>Data Brief</source> <volume>39</volume>, <fpage>107496</fpage>. <pub-id pub-id-type="doi">10.1016/j.dib.2021.107496</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>End</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hess</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Engelhard</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Sturm</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cremers</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Burgard</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>An evaluation of the rgb-d slam system</article-title>,&#x201d; in <conf-name>2012 IEEE international conference on robotics and automation</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1691</fpage>&#x2013;<lpage>1696</lpage>.</citation>
</ref>
<ref id="B37">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Eng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sch&#xf6;ps</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Cremers</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Lsd-slam: large-scale direct monocular slam</article-title>,&#x201d; in <conf-name>European conference on computer vision</conf-name> (<publisher-name>Springer</publisher-name>), <fpage>834</fpage>&#x2013;<lpage>849</lpage>.</citation>
</ref>
<ref id="B38">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Engel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>St&#xfc;ckler</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cremers</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Large-scale direct slam with stereo cameras</article-title>,&#x201d; in <conf-name>2015 IEEE/RSJ international conference on intelligent robots and systems (IROS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1935</fpage>&#x2013;<lpage>1942</lpage>.</citation>
</ref>
<ref id="B39">
<citation citation-type="web">
<collab>eth.a</collab> (<year>2023a</year>). <article-title>OKVIS: open keyframe-based visual-inertial SLAM</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/ethz-asl/okvis">https://github.com/ethz-asl/okvis</ext-link>.</comment>
</citation>
</ref>
<ref id="B40">
<citation citation-type="web">
<collab>eth.a</collab> (<year>2023b</year>). <article-title>Rovio: robust visual inertial odometry</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/ethz-asl/rovio">https://github.com/ethz-asl/rovio</ext-link>.</comment>
</citation>
</ref>
<ref id="B41">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Eudes</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lhuillier</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Naudet-Collette</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dhome</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Fast odometry integration in local bundle adjustment-based visual slam</article-title>,&#x201d; in <conf-name>2010 20th International Conference on Pattern Recognition</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>290</fpage>&#x2013;<lpage>293</lpage>.</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eyvazpour</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Shoaran</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Karimian</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Hardware implementation of slam algorithms: a survey on implementation approaches and platforms</article-title>. <source>Artif. Intell. Rev.</source> <volume>56</volume>, <fpage>6187</fpage>&#x2013;<lpage>6239</lpage>. <pub-id pub-id-type="doi">10.1007/s10462-022-10310-5</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Rubenstein</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Murphey</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Cpl-slam: efficient and certifiably correct planar graph-based slam using the complex number representation</article-title>. <source>IEEE Trans. Robotics</source> <volume>36</volume>, <fpage>1719</fpage>&#x2013;<lpage>1737</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2020.3006717</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="web">
<collab>felix</collab> (<year>2023</year>). <article-title>RGB-D SLAM v2</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/felixendres/rgbdslam_v2">https://github.com/felixendres/rgbdslam_v2</ext-link>.</comment>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fern&#xe1;ndez-Moral</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Jim&#xe9;nez</surname>
<given-names>J. G.</given-names>
</name>
<name>
<surname>Ar&#xe9;valo</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Creating metric-topological maps for large-scale monocular slam</article-title>. <source>ICINCO</source> (<issue>2</issue>), <fpage>39</fpage>&#x2013;<lpage>47</lpage>.</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fiedler</surname>
<given-names>M.-A.</given-names>
</name>
<name>
<surname>Werner</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Khalifa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Al-Hamadi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Sfpd: simultaneous face and person detection in real-time for human&#x2013;robot interaction</article-title>. <source>Sensors</source> <volume>21</volume>, <fpage>5918</fpage>. <pub-id pub-id-type="doi">10.3390/s21175918</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fong</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Nourbakhsh</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Dautenhahn</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>A survey of socially interactive robots</article-title>. <source>Robotics Aut. Syst.</source> <volume>42</volume>, <fpage>143</fpage>&#x2013;<lpage>166</lpage>. <pub-id pub-id-type="doi">10.1016/s0921-8890(02)00372-x</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Lang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Stereo visual slam for autonomous vehicles: a review</article-title>,&#x201d; in <conf-name>2020 IEEE International Conference on Systems, Man, and Cybernetics (SMC)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1316</fpage>&#x2013;<lpage>1322</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Geiger</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lenz</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Stiller</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Urtasun</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Vision meets robotics: the kitti dataset</article-title>. <source>Int. J. Robotics Res.</source> <volume>32</volume>, <fpage>1231</fpage>&#x2013;<lpage>1237</lpage>. <pub-id pub-id-type="doi">10.1177/0278364913491297</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Geiger</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lenz</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Urtasun</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>Are we ready for autonomous driving? the kitti vision benchmark suite</article-title>,&#x201d; in <conf-name>2012 IEEE conference on computer vision and pattern recognition</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>3354</fpage>&#x2013;<lpage>3361</lpage>.</citation>
</ref>
<ref id="B51">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Geneva</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Eckenhoff</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>OpenVINS: a research platform for visual-inertial estimation</article-title>,&#x201d; in <conf-name>Proc. of the IEEE International Conference on Robotics and Automation</conf-name>, <conf-loc>Paris, France</conf-loc>.</citation>
</ref>
<ref id="B52">
<citation citation-type="web">
<collab>GPL</collab> (<year>2023</year>). <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/Oxford-PTAM/PTAM-GPL">https://github.com/Oxford-PTAM/PTAM-GPL</ext-link>.</comment>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grisetti</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Stachniss</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Burgard</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Improved techniques for grid mapping with rao-blackwellized particle filters</article-title>. <source>IEEE Trans. Robotics</source> <volume>23</volume>, <fpage>34</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2006.889486</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Real-time visual inertial odometry with a resource-efficient harris corner detection accelerator on fpga platform</article-title> , <fpage>10542</fpage>&#x2013;<lpage>10548</lpage>.</citation>
</ref>
<ref id="B55">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Gurel</surname>
<given-names>C. S.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Real-time 2d and 3d slam using rtab-map, gmapping, and cartographer packages</source>. <publisher-name>University of Maryland</publisher-name>.</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Mokhtarzadeh</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Novel cartographer using an oak-d smart camera for indoor robots location and navigation</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>2467</volume>, <fpage>012029</fpage>. <pub-id pub-id-type="doi">10.1088/1742-6596/2467/1/012029</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Handa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Whelan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>McDonald</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Davison</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>A benchmark for rgb-d visual odometry, 3d reconstruction and slam</article-title>,&#x201d; in <conf-name>2014 IEEE international conference on Robotics and automation (ICRA)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1524</fpage>&#x2013;<lpage>1531</lpage>.</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hast&#xfc;rk</surname>
<given-names>&#xd6;.</given-names>
</name>
<name>
<surname>Erkmen</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Dudmap: 3d rgb-d mapping for dense, unstructured, and dynamic environment</article-title>. <source>Int. J. Adv. Robotic Syst.</source> <volume>18</volume>, <fpage>172988142110161</fpage>. <pub-id pub-id-type="doi">10.1177/17298814211016178</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hempel</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Al-Hamadi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Pixel-wise motion segmentation for slam in dynamic environments</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>164521</fpage>&#x2013;<lpage>164528</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.3022506</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hempel</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dinges</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Al-Hamadi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Sentiment-based engagement strategies for intuitive human-robot interaction</article-title>,&#x201d; in <conf-name>Proceedings of the 18th International Joint Conference on Computer Vision</conf-name>, <fpage>680</fpage>&#x2013;<lpage>686</lpage>. <comment>
<italic>Imaging and Computer Graphics Theory and Applications (VISIGRAPP 2023) - Volume 4: VISAPP</italic>. INSTICC (SciTePress)</comment>. <pub-id pub-id-type="doi">10.5220/0011772900003417</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Henein</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mahony</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ila</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2020</year>). <source>Dynamic slam: the need for speed</source>, <fpage>2123</fpage>&#x2013;<lpage>2129</lpage>.</citation>
</ref>
<ref id="B62">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hess</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kohler</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Rapp</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Andor</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Real-time loop closure in 2d lidar slam</article-title>,&#x201d; in <conf-name>2016 IEEE international conference on robotics and automation (ICRA)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1271</fpage>&#x2013;<lpage>1278</lpage>.</citation>
</ref>
<ref id="B63">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Heyer</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Human-robot interaction and future industrial robotics applications</article-title>,&#x201d; in <conf-name>2010 ieee/rsj international conference on intelligent robots and systems</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>4749</fpage>&#x2013;<lpage>4754</lpage>.</citation>
</ref>
<ref id="B64">
<citation citation-type="web">
<collab>hkust.a</collab> (<year>2023</year>). <article-title>VINS-Mono</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/HKUST-Aerial-Robotics/VINS-Mono">https://github.com/HKUST-Aerial-Robotics/VINS-Mono</ext-link>.</comment>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bangunharcana</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>J.-M.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shin</surname>
<given-names>H.-S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Visual slam-based robotic mapping method for planetary construction</article-title>. <source>Sensors</source> <volume>21</volume>, <fpage>7715</fpage>. <pub-id pub-id-type="doi">10.3390/s21227715</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hsiao</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Westman</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kaess</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Keyframe-based dense planar slam</article-title>,&#x201d; in <conf-name>2017 IEEE International Conference on Robotics and Automation (ICRA)</conf-name> (<publisher-name>Ieee</publisher-name>), <fpage>5110</fpage>&#x2013;<lpage>5117</lpage>.</citation>
</ref>
<ref id="B67">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Review on lidar-based slam techniques</article-title>,&#x201d; in <conf-name>2021 International Conference on Signal Processing and Machine Learning (CONF-SPML)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>163</fpage>&#x2013;<lpage>168</lpage>.</citation>
</ref>
<ref id="B68">
<citation citation-type="web">
<collab>Introlab</collab> (<year>2023</year>). <article-title>RTAB-Map</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://introlab.github.io/rtabmap/">http://introlab.github.io/rtabmap/</ext-link>.</comment>
</citation>
</ref>
<ref id="B69">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ji</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Towards real-time semantic rgb-d slam in dynamic environments</article-title>,&#x201d; in <conf-name>2021 IEEE International Conference on Robotics and Automation (ICRA)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>11175</fpage>&#x2013;<lpage>11181</lpage>.</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Joo</surname>
<given-names>S.-H.</given-names>
</name>
<name>
<surname>Manzoor</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rocha</surname>
<given-names>Y. G.</given-names>
</name>
<name>
<surname>Bae</surname>
<given-names>S.-H.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>K.-H.</given-names>
</name>
<name>
<surname>Kuc</surname>
<given-names>T.-Y.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Autonomous navigation framework for intelligent robots based on a semantic environment modeling</article-title>. <source>Appl. Sci.</source> <volume>10</volume>, <fpage>3219</fpage>. <pub-id pub-id-type="doi">10.3390/app10093219</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kasyanov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Engelmann</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>St&#xfc;ckler</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Leibe</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Keyframe-based visual-inertial online slam with relocalization</article-title> , <fpage>6662</fpage>&#x2013;<lpage>6669</lpage>.</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kazerouni</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Fitzgerald</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Dooly</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Toal</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A survey of state-of-the-art on visual slam</article-title>. <source>Expert Syst. Appl.</source> <volume>205</volume>, <fpage>117734</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2022.117734</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kerl</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sturm</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cremers</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Dense visual slam for rgb-d cameras</article-title>,&#x201d; in <conf-name>2013 IEEE/RSJ International Conference on Intelligent Robots and Systems</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>2100</fpage>&#x2013;<lpage>2106</lpage>.</citation>
</ref>
<ref id="B74">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khoyani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Amini</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A survey on visual slam algorithms compatible for 3d space reconstruction and navigation</article-title> , <fpage>01</fpage>&#x2013;<lpage>06</lpage>.</citation>
</ref>
<ref id="B75">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Klein</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Murray</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>Parallel tracking and mapping for small ar workspaces</article-title>,&#x201d; in <conf-name>2007 6th IEEE and ACM international symposium on mixed and augmented reality</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>225</fpage>&#x2013;<lpage>234</lpage>.</citation>
</ref>
<ref id="B76">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kuang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A real-time and robust monocular visual inertial slam system based on point and line features for mobile robots of smart cities toward 6g</article-title>. <source>IEEE Open J. Commun. Soc.</source> <volume>3</volume>, <fpage>1950</fpage>&#x2013;<lpage>1962</lpage>. <pub-id pub-id-type="doi">10.1109/ojcoms.2022.3217147</pub-id>
</citation>
</ref>
<ref id="B77">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kucner</surname>
<given-names>T. P.</given-names>
</name>
<name>
<surname>Magnusson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mghames</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Palmieri</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Verdoja</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Swaminathan</surname>
<given-names>C. S.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Survey of maps of dynamics for mobile robots</article-title>. <source>Int. J. Robotics Res.</source>, <fpage>02783649231190428</fpage>.</citation>
</ref>
<ref id="B78">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Labb&#xe9;</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Michaud</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Rtab-map as an open-source lidar and visual simultaneous localization and mapping library for large-scale and long-term online operation</article-title>. <source>J. field robotics</source> <volume>36</volume>, <fpage>416</fpage>&#x2013;<lpage>446</lpage>. <pub-id pub-id-type="doi">10.1002/rob.21831</pub-id>
</citation>
</ref>
<ref id="B79">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laidlow</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Czarnowski</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Leutenegger</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Deepfusion: real-time dense 3d reconstruction for monocular slam using single-view depth and gradient predictions</article-title> , <fpage>4068</fpage>&#x2013;<lpage>4074</lpage>.</citation>
</ref>
<ref id="B80">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Moon</surname>
<given-names>B.-C.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Fusion of the slam with wi-fi-based positioning methods for mobile robot-based learning data collection, localization, and tracking in indoor spaces</article-title>. <source>Sensors</source> <volume>20</volume>, <fpage>5182</fpage>. <pub-id pub-id-type="doi">10.3390/s20185182</pub-id>
</citation>
</ref>
<ref id="B81">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leut</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lynen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bosse</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Siegwart</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Furgale</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Keyframe-based visual&#x2013;inertial odometry using nonlinear optimization</article-title>. <source>Int. J. Robotics Res.</source> <volume>34</volume>, <fpage>314</fpage>&#x2013;<lpage>334</lpage>. <pub-id pub-id-type="doi">10.1177/0278364914554813</pub-id>
</citation>
</ref>
<ref id="B82">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Leutenegger</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <source>Okvis2: realtime scalable visual-inertial slam with loop closure</source>. <comment>
<italic>arXiv preprint arXiv:2202.09199</italic>
</comment>.</citation>
</ref>
<ref id="B83">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). &#x201c;<article-title>Dxslam: a robust and efficient visual slam system with deep features</article-title>,&#x201d; in <conf-name>2020 IEEE/RSJ International conference on intelligent robots and systems (IROS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>4958</fpage>&#x2013;<lpage>4965</lpage>.</citation>
</ref>
<ref id="B84">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Fei</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>Robust stereo inertial odometry based on self-supervised feature points</article-title>. <source>Appl. Intell.</source> <volume>53</volume>, <fpage>7093</fpage>&#x2013;<lpage>7107</lpage>. <pub-id pub-id-type="doi">10.1007/s10489-022-03278-w</pub-id>
</citation>
</ref>
<ref id="B85">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Stereo vision-based semantic 3d object and ego-motion tracking for autonomous driving</article-title>,&#x201d; in <conf-name>Proceedings of the European Conference on Computer Vision</conf-name> (<publisher-name>ECCV</publisher-name>), <fpage>646</fpage>&#x2013;<lpage>661</lpage>.</citation>
</ref>
<ref id="B86">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022a</year>). <article-title>Point-line feature fusion based field real-time rgb-d slam</article-title>. <source>Comput. Graph.</source> <volume>107</volume>, <fpage>10</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1016/j.cag.2022.06.013</pub-id>
</citation>
</ref>
<ref id="B87">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Xian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022b</year>). <source>Overview of deep learning application on visual slam</source>, <fpage>102298</fpage>. <publisher-name>Displays</publisher-name>.</citation>
</ref>
<ref id="B88">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X. V.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2023b</year>). <article-title>Proactive human&#x2013;robot collaboration: mutual-cognitive, predictable, and self-organising perspectives</article-title>. <source>Robotics Computer-Integrated Manuf.</source> <volume>81</volume>, <fpage>102510</fpage>. <pub-id pub-id-type="doi">10.1016/j.rcim.2022.102510</pub-id>
</citation>
</ref>
<ref id="B89">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tombari</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2023c</year>). <source>Open-structure: a structural benchmark dataset for slam algorithms</source>. <comment>
<italic>arXiv preprint arXiv:2310.10931</italic>
</comment>.</citation>
</ref>
<ref id="B90">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>H.-Y.</given-names>
</name>
<name>
<surname>Yeh</surname>
<given-names>M.-C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Drift-free visual slam for mobile robot localization by integrating uwb technology</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>93636</fpage>&#x2013;<lpage>93645</lpage>. <pub-id pub-id-type="doi">10.1109/access.2022.3203438</pub-id>
</citation>
</ref>
<ref id="B91">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Miura</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Rds-slam: real-time dynamic slam using semantic segmentation methods</article-title>. <source>Ieee Access</source> <volume>9</volume>, <fpage>23772</fpage>&#x2013;<lpage>23785</lpage>. <pub-id pub-id-type="doi">10.1109/access.2021.3050617</pub-id>
</citation>
</ref>
<ref id="B92">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lopez</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sanchez-Vilarino</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cacho</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Guill&#xe9;n</surname>
<given-names>E. L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Obstacle avoidance in dynamic environments based on velocity space optimization</article-title>. <source>Robotics Aut. Syst.</source> <volume>131</volume>, <fpage>103569</fpage>. <pub-id pub-id-type="doi">10.1016/j.robot.2020.103569</pub-id>
</citation>
</ref>
<ref id="B93">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Pape</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Reithmeier</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Robust rgbd visual odometry using windowed direct bundle adjustment and slanted support plane</article-title>. <source>IEEE Robotics Automation Lett.</source> <volume>7</volume>, <fpage>350</fpage>&#x2013;<lpage>357</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2021.3126347</pub-id>
</citation>
</ref>
<ref id="B94">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lynch</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wahid</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tompson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Betker</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Baruch</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Interactive language: talking to robots in real time</article-title>. <source>IEEE Robotics Automation Lett.</source>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2023.3295255</pub-id>
</citation>
</ref>
<ref id="B95">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Macario Barros</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Moline</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Corre</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Carrel</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A comprehensive survey of visual slam algorithms</article-title>. <source>Robotics</source> <volume>11</volume>, <fpage>24</fpage>. <pub-id pub-id-type="doi">10.3390/robotics11010024</pub-id>
</citation>
</ref>
<ref id="B96">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Mane</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Parihar</surname>
<given-names>M. N.</given-names>
</name>
<name>
<surname>Jadhav</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Gadre</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Data acquisition analysis in slam applications</article-title>,&#x201d; in <conf-name>2016 International Conference on Automatic Control and Dynamic Optimization Techniques (ICACDOT)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>339</fpage>&#x2013;<lpage>343</lpage>.</citation>
</ref>
<ref id="B97">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mart&#xed;nez-Otzeta</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Rodr&#xed;guez-Moreno</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Mendialdua</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Sierra</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Ransac for robotic applications: a survey</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>327</fpage>. <pub-id pub-id-type="doi">10.3390/s23010327</pub-id>
</citation>
</ref>
<ref id="B98">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mazumdar</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chakraborty</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sathvik</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jayakumar</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kaushik</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Optimizing pix2pix gan with attention mechanisms for ai-driven polyp segmentation in iomt-enabled smart healthcare</article-title>. <source>IEEE J. Biomed. Health Inf.</source>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/jbhi.2023.3328962</pub-id>
</citation>
</ref>
<ref id="B99">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Dense rgb-d slam with multiple cameras</article-title>. <source>Sensors</source> <volume>18</volume>, <fpage>2118</fpage>. <pub-id pub-id-type="doi">10.3390/s18072118</pub-id>
</citation>
</ref>
<ref id="B100">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Prob-slam: real-time visual slam based on probabilistic graph optimization</article-title>,&#x201d; in <conf-name>Proceedings of the 8th International Conference on Robotics and Artificial Intelligence</conf-name>, <fpage>39</fpage>&#x2013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1145/3573910.3573920</pub-id>
</citation>
</ref>
<ref id="B101">
<citation citation-type="web">
<collab>MIT.S</collab> (<year>2023</year>). <article-title>Kimera: an open-source library for real-time metric-semantic localization and mapping</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/MIT-SPARK/Kimera">https://github.com/MIT-SPARK/Kimera</ext-link>.</comment>
</citation>
</ref>
<ref id="B102">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Mohamed</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Al-Jaroodi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jawhar</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Middleware for robotics: a survey</article-title>,&#x201d; in <conf-name>2008 IEEE Conference on Robotics, Automation and Mechatronics</conf-name> (<publisher-name>Ieee</publisher-name>), <fpage>736</fpage>&#x2013;<lpage>742</lpage>.</citation>
</ref>
<ref id="B103">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Mur-A</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Tars</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Orb-slam: tracking and mapping recognizable</article-title>,&#x201d; in <conf-name>Proceedings of the Workshop on Multi View Geometry in Robotics (MVIGRO)-RSS</conf-name>.</citation>
</ref>
<ref id="B104">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mur-Artal</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Montiel</surname>
<given-names>J. M. M.</given-names>
</name>
<name>
<surname>Tardos</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Orb-slam: a versatile and accurate monocular slam system</article-title>. <source>IEEE Trans. robotics</source> <volume>31</volume>, <fpage>1147</fpage>&#x2013;<lpage>1163</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2015.2463671</pub-id>
</citation>
</ref>
<ref id="B105">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mur-Artal</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Tard&#xf3;s</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2017a</year>). <article-title>Orb-slam2: an open-source slam system for monocular, stereo, and rgb-d cameras</article-title>. <source>IEEE Trans. robotics</source> <volume>33</volume>, <fpage>1255</fpage>&#x2013;<lpage>1262</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2017.2705103</pub-id>
</citation>
</ref>
<ref id="B106">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mur-Artal</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Tard&#xf3;s</surname>
<given-names>J. D.</given-names>
</name>
</person-group> (<year>2017b</year>). <article-title>Visual-inertial monocular slam with map reuse</article-title>. <source>IEEE Robotics Automation Lett.</source> <volume>2</volume>, <fpage>796</fpage>&#x2013;<lpage>803</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2017.2653359</pub-id>
</citation>
</ref>
<ref id="B107">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nakamura</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kobayashi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Motoi</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Path planning for mobile robot considering turnabouts on narrow road by deep q-network</article-title>. <source>IEEE Access</source> <volume>11</volume>, <fpage>19111</fpage>&#x2013;<lpage>19121</lpage>. <pub-id pub-id-type="doi">10.1109/access.2023.3247730</pub-id>
</citation>
</ref>
<ref id="B108">
<citation citation-type="web">
<collab>Navvis</collab> (<year>2023</year>). <article-title>Map forming</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.navvis.com/technology/slam">https://www.navvis.com/technology/slam</ext-link> (Accessed on November 14, 2023)</comment>.</citation>
</ref>
<ref id="B109">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ne</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Lovegrove</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Davison</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>Dtam: dense tracking and mapping in real-time</article-title>,&#x201d; in <conf-name>2011 international conference on computer vision</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>2320</fpage>&#x2013;<lpage>2327</lpage>.</citation>
</ref>
<ref id="B110">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Newcombe</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Izadi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hilliges</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Molyneaux</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Davison</surname>
<given-names>A. J.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). &#x201c;<article-title>Kinectfusion: real-time dense surface mapping and tracking</article-title>,&#x201d; in <conf-name>2011 10th IEEE international symposium on mixed and augmented reality</conf-name> (<publisher-name>Ieee</publisher-name>), <fpage>127</fpage>&#x2013;<lpage>136</lpage>.</citation>
</ref>
<ref id="B111">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nguyen</surname>
<given-names>Q. H.</given-names>
</name>
<name>
<surname>Johnson</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Latham</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Performance evaluation of ros-based slam algorithms for handheld indoor mapping and tracking systems</article-title>. <source>IEEE Sensors J.</source> <volume>23</volume>, <fpage>706</fpage>&#x2013;<lpage>714</lpage>. <pub-id pub-id-type="doi">10.1109/jsen.2022.3224224</pub-id>
</citation>
</ref>
<ref id="B112">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nguyen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Mann</surname>
<given-names>G. K.</given-names>
</name>
<name>
<surname>Vardy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gosine</surname>
<given-names>R. G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Ckf-based visual inertial odometry for long-term trajectory operations</article-title>. <source>J. Robotics</source> <volume>2020</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1155/2020/7362952</pub-id>
</citation>
</ref>
<ref id="B113">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Niu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Rgb-d indoor simultaneous location and mapping based on inliers tracking statistics</article-title>,&#x201d; in <conf-name>Journal of Physics: Conference Series</conf-name> (<publisher-name>IOP Publishing</publisher-name>), <volume>1176</volume>, <fpage>062023</fpage>.</citation>
</ref>
<ref id="B114">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ortega-Gomez</surname>
<given-names>J. I.</given-names>
</name>
<name>
<surname>Morales-Hernandez</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Cruz-Albarran</surname>
<given-names>I. A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A specialized database for autonomous vehicles based on the kitti vision benchmark</article-title>. <source>Electronics</source> <volume>12</volume>, <fpage>3165</fpage>. <pub-id pub-id-type="doi">10.3390/electronics12143165</pub-id>
</citation>
</ref>
<ref id="B115">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gupta</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ghosh</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Evolution of simultaneous localization and mapping framework for autonomous robotics&#x2014;a comprehensive review</article-title>. <source>J. Aut. Veh. Syst.</source> <volume>2</volume>, <fpage>020801</fpage>. <pub-id pub-id-type="doi">10.1115/1.4055161</pub-id>
</citation>
</ref>
<ref id="B116">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Palazzolo</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Behley</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lottes</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Giguere</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Stachniss</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Refusion: 3d reconstruction in dynamic environments for rgb-d cameras exploiting residuals</article-title>,&#x201d; in <conf-name>2019 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>7855</fpage>&#x2013;<lpage>7862</lpage>.</citation>
</ref>
<ref id="B117">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Persson</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ekstr&#xf6;m</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Ekstr&#xf6;m</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Papadopoulos</surname>
<given-names>A. V.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>On the initialization problem for timed-elastic bands</article-title>,&#x201d; in <conf-name>Proceedings of the 22nd IFAC World Congress (IFAC WC)</conf-name>.</citation>
</ref>
<ref id="B118">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peter</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Mohan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Development of an autonomous ground robot using a real-time appearance based (rtab) algorithm for enhanced spatial mapping</article-title>
</citation>
</ref>
<ref id="B119">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Picard</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Chevobbe</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Darouich</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Didier</surname>
<given-names>J.-Y.</given-names>
</name>
</person-group> (<year>2023</year>). <source>A survey on real-time 3d scene reconstruction with slam methods in embedded systems</source>. <comment>
<italic>arXiv preprint arXiv:2309.05349</italic>
</comment>.</citation>
</ref>
<ref id="B120">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Placed</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Strader</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Carrillo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Atanasov</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Indelman</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Carlone</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>A survey on active simultaneous localization and mapping: state of the art and new frontiers</article-title>. <source>IEEE Trans. Robotics</source> <volume>39</volume>, <fpage>1686</fpage>&#x2013;<lpage>1705</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2023.3248510</pub-id>
</citation>
</ref>
<ref id="B121">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prati</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Villani</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Grandi</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Peruzzini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sabattini</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Use of interaction design methodologies for human&#x2013;robot collaboration in industrial scenarios</article-title>. <source>IEEE Trans. Automation Sci. Eng.</source> <volume>19</volume>, <fpage>3126</fpage>&#x2013;<lpage>3138</lpage>. <pub-id pub-id-type="doi">10.1109/tase.2021.3107583</pub-id>
</citation>
</ref>
<ref id="B122">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qin</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Vins-mono: a robust and versatile monocular visual-inertial state estimator</article-title>. <source>IEEE Trans. Robotics</source> <volume>34</volume>, <fpage>1004</fpage>&#x2013;<lpage>1020</lpage>. <pub-id pub-id-type="doi">10.1109/tro.2018.2853729</pub-id>
</citation>
</ref>
<ref id="B123">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ragot</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Khemmar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pokala</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rossi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ertaud</surname>
<given-names>J.-Y.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Benchmark of visual slam algorithms: orb-slam2 vs rtab-map</article-title>,&#x201d; in <conf-name>2019 Eighth International Conference on Emerging Security Technologies (EST)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B124">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raikwar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Herlitzius</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>2d lidar slam localization system for a mobile robotic platform in gps denied environment</article-title>. <source>J. Biosyst. Eng.</source> <volume>48</volume>, <fpage>123</fpage>&#x2013;<lpage>135</lpage>. <pub-id pub-id-type="doi">10.1007/s42853-023-00176-y</pub-id>
</citation>
</ref>
<ref id="B125">
<citation citation-type="web">
<collab>raulmur</collab> (<year>2023a</year>). <article-title>ORB-SLAM</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/raulmur/ORB_SLAM">https://github.com/raulmur/ORB_SLAM</ext-link>.</comment>
</citation>
</ref>
<ref id="B126">
<citation citation-type="web">
<collab>raulmur</collab> (<year>2023b</year>). <article-title>ORB-SLAM2</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/raulmur/ORB_SLAM2">https://github.com/raulmur/ORB_SLAM2</ext-link>.</comment>
</citation>
</ref>
<ref id="B127">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Plj-slam: monocular visual slam with points, lines, and junctions of coplanar lines</article-title>. <source>IEEE Sensors J.</source> <volume>22</volume>, <fpage>15465</fpage>&#x2013;<lpage>15476</lpage>. <pub-id pub-id-type="doi">10.1109/jsen.2022.3185122</pub-id>
</citation>
</ref>
<ref id="B128">
<citation citation-type="web">
<collab>Rintar</collab> (<year>2023</year>). <article-title>dtam-1</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/Rintarooo/dtam-1">https://github.com/Rintarooo/dtam-1</ext-link>.</comment>
</citation>
</ref>
<ref id="B129">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roch</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fayyad</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Najjaran</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Dopeslam: high-precision ros-based semantic 3d slam in a dynamic environment</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>4364</fpage>. <pub-id pub-id-type="doi">10.3390/s23094364</pub-id>
</citation>
</ref>
<ref id="B130">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ros</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Abate</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Carlone</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Kimera: an open-source library for real-time metric-semantic localization and mapping</article-title>,&#x201d; in <conf-name>2020 IEEE International Conference on Robotics and Automation (ICRA)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1689</fpage>&#x2013;<lpage>1696</lpage>.</citation>
</ref>
<ref id="B131">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rosinol</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Violette</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Abate</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hughes</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Kimera: from slam to spatial perception with 3d dynamic scene graphs</article-title>. <source>Int. J. Robotics Res.</source> <volume>40</volume>, <fpage>1510</fpage>&#x2013;<lpage>1546</lpage>. <pub-id pub-id-type="doi">10.1177/02783649211056674</pub-id>
</citation>
</ref>
<ref id="B132">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scaradozzi</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zingaretti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ferrari</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Simultaneous localization and mapping (slam) robotics techniques: a possible application in surgery</article-title>. <source>Shanghai Chest</source> <volume>2</volume>, <fpage>5</fpage>. <pub-id pub-id-type="doi">10.21037/shc.2018.01.01</pub-id>
</citation>
</ref>
<ref id="B133">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schneider</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dymczyk</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fehr</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Egger</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lynen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gilitschenski</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>maplab: an open framework for research in visual-inertial mapping and localization</article-title>. <source>IEEE Robotics Automation Lett.</source> <volume>3</volume>, <fpage>1418</fpage>&#x2013;<lpage>1425</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2018.2800113</pub-id>
</citation>
</ref>
<ref id="B134">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sch&#xf6;ps</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Engel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cremers</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Semi-dense visual odometry for ar on a smartphone</article-title>,&#x201d; in <conf-name>2014 IEEE international symposium on mixed and augmented reality (ISMAR)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>145</fpage>&#x2013;<lpage>150</lpage>.</citation>
</ref>
<ref id="B135">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Servi&#xe8;res</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Renaudin</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Dupuis</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Antigny</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Visual and visual-inertial slam: state of the art, classification, and experimental benchmarking</article-title>. <source>J. Sensors</source> <volume>2021</volume>, <fpage>1</fpage>&#x2013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1155/2021/2054828</pub-id>
</citation>
</ref>
<ref id="B136">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharafutdinov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Griguletskii</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kopanev</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kurenkov</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ferrer</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Burkov</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Comparison of modern open-source visual slam approaches</article-title>. <source>J. Intelligent Robotic Syst.</source> <volume>107</volume>, <fpage>43</fpage>. <pub-id pub-id-type="doi">10.1007/s10846-023-01812-7</pub-id>
</citation>
</ref>
<ref id="B137">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ouyang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Unsupervised collaborative learning of keyframe detection and visual odometry towards monocular deep slam</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF International Conference on Computer Vision</conf-name>, <fpage>4302</fpage>&#x2013;<lpage>4311</lpage>.</citation>
</ref>
<ref id="B138">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sheridan</surname>
<given-names>T. B.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Human&#x2013;robot interaction: status and challenges</article-title>. <source>Hum. factors</source> <volume>58</volume>, <fpage>525</fpage>&#x2013;<lpage>532</lpage>. <pub-id pub-id-type="doi">10.1177/0018720816644364</pub-id>
</citation>
</ref>
<ref id="B139">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soares</surname>
<given-names>J. C. V.</given-names>
</name>
<name>
<surname>Gattass</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Meggiolaro</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Crowd-slam: visual slam towards crowded environments using object detection</article-title>. <source>J. Intelligent Robotic Syst.</source> <volume>102</volume>, <fpage>50</fpage>. <pub-id pub-id-type="doi">10.1007/s10846-021-01414-1</pub-id>
</citation>
</ref>
<ref id="B140">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soliman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bonardi</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sidib&#xe9;</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Bouchafa</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Dh-ptam: a deep hybrid stereo events-frames parallel tracking and mapping system</article-title>. <comment>
<italic>arXiv preprint arXiv:2306.01891</italic>
</comment>
</citation>
</ref>
<ref id="B141">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Son</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Sce-slam: a real-time semantic rgbd slam system in dynamic scenes based on spatial coordinate error</article-title>. <source>Meas. Sci. Technol.</source> <volume>34</volume>, <fpage>125006</fpage>. <pub-id pub-id-type="doi">10.1088/1361-6501/aceb7e</pub-id>
</citation>
</ref>
<ref id="B142">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Monocular visual-inertial odometry for agricultural environments</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>103975</fpage>&#x2013;<lpage>103986</lpage>. <pub-id pub-id-type="doi">10.1109/access.2022.3209186</pub-id>
</citation>
</ref>
<ref id="B143">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A right invariant extended kalman filter for object based slam</article-title>. <source>IEEE Robotics Automation Lett.</source> <volume>7</volume>, <fpage>1316</fpage>&#x2013;<lpage>1323</lpage>. <pub-id pub-id-type="doi">10.1109/lra.2021.3139370</pub-id>
</citation>
</ref>
<ref id="B144">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sousa</surname>
<given-names>R. B.</given-names>
</name>
<name>
<surname>Sobreira</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Moreira</surname>
<given-names>A. P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A systematic literature review on long-term localization and mapping for mobile robots</article-title>. <source>J. Field Robotics</source> <volume>40</volume>, <fpage>1245</fpage>&#x2013;<lpage>1322</lpage>. <pub-id pub-id-type="doi">10.1002/rob.22170</pub-id>
</citation>
</ref>
<ref id="B145">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Steinbr&#xfc;cker</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Sturm</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cremers</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>Real-time visual odometry from dense rgb-d images</article-title>,&#x201d; in <conf-name>2011 IEEE international conference on computer vision workshops (ICCV Workshops)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>719</fpage>&#x2013;<lpage>722</lpage>.</citation>
</ref>
<ref id="B146">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Strazdas</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hintz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fel&#xdf;berg</surname>
<given-names>A.-M.</given-names>
</name>
<name>
<surname>Al-Hamadi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Robots and wizards: an investigation into natural human&#x2013;robot interaction</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>207635</fpage>&#x2013;<lpage>207642</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.3037724</pub-id>
</citation>
</ref>
<ref id="B147">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sumikura</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shibuya</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sakurada</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Openvslam: a versatile visual slam framework</article-title>,&#x201d; in <conf-name>Proceedings of the 27th ACM International Conference on Multimedia</conf-name>, <fpage>2292</fpage>&#x2013;<lpage>2295</lpage>.</citation>
</ref>
<ref id="B148">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>M. Q.-H.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Improving rgb-d slam in dynamic environments: a motion removal approach</article-title>. <source>Robotics Aut. Syst.</source> <volume>89</volume>, <fpage>110</fpage>&#x2013;<lpage>122</lpage>. <pub-id pub-id-type="doi">10.1016/j.robot.2016.11.012</pub-id>
</citation>
</ref>
<ref id="B149">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taheri</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>Z. C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Slam; definition and evolution</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>97</volume>, <fpage>104032</fpage>. <pub-id pub-id-type="doi">10.1016/j.engappai.2020.104032</pub-id>
</citation>
</ref>
<ref id="B150">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taketomi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Uchiyama</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ikeda</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Visual slam algorithms: a survey from 2010 to 2016</article-title>. <source>IPSJ Trans. Comput. Vis. Appl.</source> <volume>9</volume>, <fpage>16</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1186/s41074-017-0027-2</pub-id>
</citation>
</ref>
<ref id="B151">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Theodorou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Velisavljevic</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Dyo</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Nonyelu</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Visual slam algorithms and their application for ar, mapping, localization and wayfinding</article-title>. <source>Array</source> <volume>15</volume>, <fpage>100222</fpage>. <pub-id pub-id-type="doi">10.1016/j.array.2022.100222</pub-id>
</citation>
</ref>
<ref id="B152">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Quang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Schang</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nieto-Granda</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>How</surname>
<given-names>J. P.</given-names>
</name>
<etal/>
</person-group> (<year>2023a</year>). <source>Resilient and distributed multi-robot visual slam: datasets, experiments, and lessons learned</source>. <comment>
<italic>arXiv preprint arXiv:2304.04362</italic>
</comment>.</citation>
</ref>
<ref id="B153">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Quang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Schang</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nieto-Granda</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>How</surname>
<given-names>J. P.</given-names>
</name>
<etal/>
</person-group> (<year>2023b</year>). <source>Resilient and distributed multi-robot visual slam: datasets, experiments, and lessons learned</source>. <comment>
<italic>arXiv preprint arXiv:2304.04362</italic>
</comment>.</citation>
</ref>
<ref id="B154">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tourani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bavle</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sanchez-Lopez</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Voos</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Visual slam: what are the current trends and what to expect?</article-title> <source>Sensors</source> <volume>22</volume>, <fpage>9297</fpage>. <pub-id pub-id-type="doi">10.3390/s22239297</pub-id>
</citation>
</ref>
<ref id="B155">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tsintotas</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Bampis</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gasteratos</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>The revisiting problem in simultaneous localization and mapping: a survey on visual loop closure detection</article-title>. <source>IEEE Trans. Intelligent Transp. Syst.</source> <volume>23</volume>, <fpage>19929</fpage>&#x2013;<lpage>19953</lpage>. <pub-id pub-id-type="doi">10.1109/tits.2022.3175656</pub-id>
</citation>
</ref>
<ref id="B156">
<citation citation-type="web">
<collab>tum.v</collab> (<year>2023</year>). <article-title>DVO-SLAM: direct visual odometry for monocular cameras</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/tum-vision/dvo_slam">https://github.com/tum-vision/dvo_slam</ext-link>.</comment>
</citation>
</ref>
<ref id="B157">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ullah</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Simultaneous localization and mapping based on kalman filter and extended kalman filter</article-title>. <source>Wirel. Commun. Mob. Comput.</source> <volume>2020</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1155/2020/2138643</pub-id>
</citation>
</ref>
<ref id="B158">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Van Nam</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gon-Woo</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Solid-state lidar based-slam: a concise review and application</article-title>,&#x201d; in <conf-name>2021 IEEE International Conference on Big Data and Smart Computing (BigComp)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>302</fpage>&#x2013;<lpage>305</lpage>.</citation>
</ref>
<ref id="B159">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ko</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <source>Multi-modal semantic slam for complex dynamic environments</source>. <comment>
<italic>arXiv preprint arXiv:2205.04300</italic>
</comment>.</citation>
</ref>
<ref id="B160">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Pang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Robust visual-inertial odometry based on a kalman filter and factor graph</article-title>. <source>IEEE Trans. Intelligent Transp. Syst.</source> <volume>24</volume>, <fpage>7048</fpage>&#x2013;<lpage>7060</lpage>. <pub-id pub-id-type="doi">10.1109/tits.2023.3258526</pub-id>
</citation>
</ref>
<ref id="B161">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>You</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Yolo-slam: a semantic slam system towards dynamic environment with geometric constraint</article-title>. <source>Neural Comput. Appl.</source> <volume>34</volume>, <fpage>6011</fpage>&#x2013;<lpage>6026</lpage>. <pub-id pub-id-type="doi">10.1007/s00521-021-06764-3</pub-id>
</citation>
</ref>
<ref id="B162">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Rong</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Dynamic-slam: semantic monocular visual localization and mapping based on deep learning in dynamic environment</article-title>. <source>Robotics Aut. Syst.</source> <volume>117</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1016/j.robot.2019.03.012</pub-id>
</citation>
</ref>
<ref id="B163">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Robust visual-inertial navigation system for low precision sensors under indoor and outdoor environments</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <fpage>772</fpage>. <pub-id pub-id-type="doi">10.3390/rs13040772</pub-id>
</citation>
</ref>
<ref id="B164">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yan</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Dgs-slam: a fast and robust rgbd slam in dynamic environments combined by geometric and semantic information</article-title>. <source>Remote Sens.</source> <volume>14</volume>, <fpage>795</fpage>. <pub-id pub-id-type="doi">10.3390/rs14030795</pub-id>
</citation>
</ref>
<ref id="B165">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhai</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ming</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Vox-fusion: dense tracking and mapping with voxel-based neural implicit representation</article-title>,&#x201d; in <conf-name>2022 IEEE International Symposium on Mixed and Augmented Reality (ISMAR)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>499</fpage>&#x2013;<lpage>507</lpage>.</citation>
</ref>
<ref id="B166">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yousif</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Bab-Hadiashar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hoseinnezhad</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>An overview to visual odometry and visual slam: applications to mobile robotics</article-title>. <source>Intell. Ind. Syst.</source> <volume>1</volume>, <fpage>289</fpage>&#x2013;<lpage>311</lpage>. <pub-id pub-id-type="doi">10.1007/s40903-015-0032-7</pub-id>
</citation>
</ref>
<ref id="B167">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>An adaptive orb-slam3 system for outdoor dynamic environments</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>1359</fpage>. <pub-id pub-id-type="doi">10.3390/s23031359</pub-id>
</citation>
</ref>
<ref id="B168">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2021a</year>). <article-title>Rosefusion: random optimization for online dense reconstruction under fast camera motion</article-title>. <source>ACM Trans. Graph. (TOG)</source> <volume>40</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1145/3476576.3476604</pub-id>
</citation>
</ref>
<ref id="B169">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Semantic slam for mobile robots in dynamic environments based on visual camera sensors</article-title>. <source>Meas. Sci. Technol.</source> <volume>34</volume>, <fpage>085202</fpage>. <pub-id pub-id-type="doi">10.1088/1361-6501/acd1a4</pub-id>
</citation>
</ref>
<ref id="B170">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>Survey and evaluation of rgb-d slam</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>21367</fpage>&#x2013;<lpage>21387</lpage>. <pub-id pub-id-type="doi">10.1109/access.2021.3053188</pub-id>
</citation>
</ref>
<ref id="B171">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Haala</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Bamf-slam: bundle adjusted multi-fisheye visual-inertial slam using recurrent field transforms</article-title>. <comment>
<italic>arXiv preprint arXiv:2306.01173</italic>
</comment>
</citation>
</ref>
<ref id="B172">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A visual simultaneous localization and mapping approach based on scene segmentation and incremental optimization</article-title>. <source>Int. J. Adv. Robotic Syst.</source> <volume>17</volume>, <fpage>172988142097766</fpage>. <pub-id pub-id-type="doi">10.1177/1729881420977669</pub-id>
</citation>
</ref>
<ref id="B173">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Loop closure detection for visual slam systems using convolutional neural network</article-title>,&#x201d; in <conf-name>2017 23rd International Conference on Automation and Computing (ICAC)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B174">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nassehi</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A visual reasoning-based approach for mutual-cognitive human-robot collaboration</article-title>. <source>CIRP Ann.</source> <volume>71</volume>, <fpage>377</fpage>&#x2013;<lpage>380</lpage>. <pub-id pub-id-type="doi">10.1016/j.cirp.2022.04.016</pub-id>
</citation>
</ref>
<ref id="B175">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rizos</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>El-Mowafy</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Simultaneous localization and mapping (slam) for autonomous driving: concept and analysis</article-title>. <source>Remote Sens.</source> <volume>15</volume>, <fpage>1156</fpage>. <pub-id pub-id-type="doi">10.3390/rs15041156</pub-id>
</citation>
</ref>
<ref id="B176">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Koppel</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ju</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Steinbruecker</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kaess</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>An efficient planar bundle adjustment algorithm</article-title>,&#x201d; in <conf-name>2020 IEEE International Symposium on Mixed and Augmented Reality (ISMAR)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>136</fpage>&#x2013;<lpage>145</lpage>.</citation>
</ref>
<ref id="B177">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Larsson</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Bao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Nice-slam: neural implicit scalable encoding for slam</article-title> , <fpage>12786</fpage>&#x2013;<lpage>12796</lpage>.</citation>
</ref>
</ref-list>
</back>
</article>