<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Virtual Real.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Virtual Reality</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Virtual Real.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-4192</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1733259</article-id>
<article-id pub-id-type="doi">10.3389/frvir.2026.1733259</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>AIMERS: an AI-based MR scene design system with human-centric perception optimization</article-title>
<alt-title alt-title-type="left-running-head">Wei et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frvir.2026.1733259">10.3389/frvir.2026.1733259</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wei</surname>
<given-names>Xiaokang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3237124"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Yuqian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Ao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3268702"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Luximon</surname>
<given-names>Yan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1352805"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>School of Design, The Hong Kong Polytechnic University</institution>, <city>Hong Kong</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Laboratory for Artificial intelligence in Design (AiDLab)</institution>, <city>Hong Kong</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Yan Luximon, <email xlink:href="mailto:yan.luximon@polyu.edu.hk">yan.luximon@polyu.edu.hk</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-19">
<day>19</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>7</volume>
<elocation-id>1733259</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>29</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Wei, Wang, Zhu and Luximon.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Wei, Wang, Zhu and Luximon</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-19">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Visual realism is fundamental to convincing Mixed Reality (MR) experiences. However, current design workflows implicitly assume that physically-based rendering parameters naturally lead to perceptually realistic results. We begin from the opposite hypothesis: physically accurate parameters and users&#x2019; perceived realism are often misaligned, leading to inconsistent visual fusion and significant design overhead. To address this problem, we present AIMERS, an AI- and perception-guided MR scene design framework. First, AI-based neural inverse rendering is used to automatically estimate lighting-independent material, geometry, and illumination properties from multi-view RGB inputs, removing the need for manual material calibration. We then introduce an interactive MR perceptual interface that allows users to adjust key realism parameters during immersive viewing, enabling us to capture perception-aligned preferences across scenes. By jointly analyzing physically derived parameters and perceptual data, we derive optimal parameter intervals that best match perceived realism across different scene categories. Controlled user studies reveal a consistent mismatch between physical correctness and human perception, and demonstrate that combining AI estimation with perception-guided adjustment leads to more coherent and convincing MR visual fusion. Overall, this work establishes a perception-aligned paradigm for MR scene design, bridging the gap between physical accuracy and human perception and providing practical guidance for building realistic MR applications.</p>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>inverse rendering</kwd>
<kwd>mixed reality</kwd>
<kwd>perceptual enhancement system</kwd>
<kwd>research methods</kwd>
<kwd>visual fusion</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the Laboratory for Artificial Intelligence in Design (Project 3.1), the Innovation and Technology Fund of the Hong Kong Special Administrative Region, and Project P0050655 from the Non-PAIR Research Centers of The Hong Kong Polytechnic University. Their financial support is gratefully acknowledged.</funding-statement>
</funding-group>
<counts>
<fig-count count="9"/>
<table-count count="5"/>
<equation-count count="6"/>
<ref-count count="41"/>
<page-count count="15"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Virtual Reality and Human Behaviour</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Mixed Reality (MR) technology, encompassing both Augmented Reality (AR) and Virtual Reality (VR), enables seamless integration of physical and virtual elements &#x2013; from inserting 3D-scanned human avatars into virtual environments to embedding digital objects in real-world settings. This transformative capability has revolutionized industries ranging from education and healthcare to architectural visualization and virtual production <xref ref-type="bibr" rid="B22">Milgram and Kishino (1994)</xref>; <xref ref-type="bibr" rid="B29">Rokhsaritalemi et al. (2020)</xref>. However, for MR content designers, creating these immersive experiences remains a complex process requiring expertise in 3D scanning, material authoring, and visual calibration. In particular, there is still a lack of principled guidance on how to configure rendering parameters so that virtual and real elements appear maximally realistic to human observers.</p>
<p>The primary bottleneck lies in achieving visual fusion &#x2013; the perceptual coherence between real and virtual elements. Current designer workflows face two fundamental limitations: 1) Legacy 3D scanning pipelines preserve original scene lighting artifacts in scanned avatars, forcing designers to manually adjust material properties across different environments <xref ref-type="bibr" rid="B5">Fan et al. (2017)</xref>; 2) Existing rendering engines and development systems prioritize physical accuracy over human perception, leaving designers to empirically test countless parameter combinations (lighting intensity, shadow softness, material roughness, etc.) through trial-and-error <xref ref-type="bibr" rid="B35">Wei and Luximon (2024)</xref>. This disconnect between technical rendering outputs and human visual perception creates a &#x201c;designer&#x2019;s dilemma&#x201d; &#x2013; many MR artists face challenges in understanding which parameter configurations truly maximize perceived realism <xref ref-type="bibr" rid="B29">Rokhsaritalemi et al. (2020)</xref>; <xref ref-type="bibr" rid="B14">Kent et al. (2021)</xref>. Without an explicit model of how physical parameters relate to perceptual judgments, it is difficult to systematically reach an optimally realistic setting <xref ref-type="bibr" rid="B16">Kyrlitsias and Michael-Grigoriou (2022)</xref>.</p>
<p>Three critical technical barriers exacerbate these challenges. First, lighting and material inconsistencies between scanned assets and virtual environments persist due to baked-in illumination from source scans, making it unclear what the underlying, lighting-independent material properties should be. Second, the perceptual gap between physically accurate rendering and human visual preferences forces designers to develop heuristic adjustment strategies, which are often subjective and hard to generalize. Third, existing tools lack integrated solutions for scene-adaptive parameter analysis, making it difficult to compare physical parameters with user preferences and to reason about where perceptual optimality lies across different MR scenarios.</p>
<p>To address these issues and to better understand how optimal visual realism should be parameterized in MR, we present AIMERS, an AI-assisted MR scene design system that focuses on aligning physically grounded parameters with human perception. Our key idea is to first obtain reliable physical realism factors from real-world visual input, and then systematically capture how users adjust these parameters in immersive MR environments, so that we can derive parameter ranges that correspond to high perceived realism. To operationalize this idea, AIMERS consists of three tightly connected components.</p>
<p>AI-Based Neural Inverse Rendering: Leveraging a multi-stage neural network framework with diffusion priors, we automatically reconstruct 3D avatars from multi-view images and recover physically-based material properties (diffuse albedo, roughness, and metallic), while eliminating the influence of environmental lighting. This provides a set of lighting-independent, physically grounded realism factors that can be consistently compared across different MR scenes and environments.</p>
<p>Human-Centric Perceptual Parameter Capture: We design an MR user perception system to capture perception-aligned realism parameters directly within immersive experiences. Specifically, we map over 40 technical parameters into 8 intuitive controls across three categories: Global Perception (environment light blending intensity and type, shadow direction and intensity), Material Adaptation (part-aware material adjustments such as skin, clothing, hair, pants, and shoes), and Scene Context (e.g., classroom, studio). These controls allow users to adjust visual settings based on their subjective sense of realism with real-time feedback, enabling us to record the parameter configurations that users consider visually most convincing.</p>
<p>Optimal Realism Parameter Ranges: Based on iterative adjustments collected from 20 participants across multiple MR scenarios, we statistically derive optimal parameter ranges (e.g., preferred intervals for light intensity or skin roughness) that maximize perceived visual fusion. Rather than treating realism as a single fixed setting, these ranges characterize where perceived realism is consistently high and reveal systematic deviations between physically derived parameters and perceptual preferences. The resulting parameter intervals serve as perception-aligned guidance for configuring MR scenes, grounded in both physical measurements and user studies.</p>
<p>Rigorous evaluations demonstrate that the realism parameter ranges obtained through AIMERS lead to higher visual fusion scores compared with purely physically derived configurations. Our analysis further confirms that physically accurate parameters and perceptually optimal parameters are not always aligned, and that the derived ranges capture stable, perception-consistent regions in the parameter space. By unifying inverse rendering with perceptual measurement, AIMERS helps resolve the &#x201c;designer&#x2019;s dilemma&#x201d; from the perspective of realism quality: it clarifies how technical precision and human-centric visual fusion relate, and provides principled guidance for achieving convincing MR realism.</p>
<p>For the selected three scenarios, two types of classroom scenes are adopted as education is a vital application of MR technology and there are different classroom environments <xref ref-type="bibr" rid="B32">Tang et al. (2020)</xref>; <xref ref-type="bibr" rid="B26">Patel et al. (2020)</xref>. The other one, the MR studio scene, is chosen to cover virtual scenario diversity and observe how real-person hosts are perceived in a virtual broadcasting setting, as currently, some weather forecasts online or on televisions are given by real hosts in MR studios <xref ref-type="bibr" rid="B34">Wang (2024)</xref>.</p>
<p>In summary, the main contributions of this work include:<list list-type="bullet">
<list-item>
<p>We utilize AI-driven methods to reliably extract physically grounded realism factors from real-world visual input.</p>
</list-item>
<list-item>
<p>We introduce a user-centered system for capturing perception-aligned realism parameters in immersive MR environments.</p>
</list-item>
<list-item>
<p>We formulate optimal visual realism parameter ranges that integrate physical accuracy with human perception.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec id="s2-1">
<label>2.1</label>
<title>MR visualization development</title>
<p>Mixed Reality is a class of simulators that combines both virtual and real objects to create a hybrid of the virtual and real worlds <xref ref-type="bibr" rid="B25">Ohta (1999)</xref>. MR visualization acts as a bridge between virtual content and the real world, forming the core of the entire MR system. In recent years, advancements in graphic rendering algorithms and GPU hardware have made the blending of virtual objects with the real-world environment on MR devices increasingly natural and realistic. Enhancing light and shadow models necessitates the incorporation of temporal and dynamic effects, such as sudden changes in lighting and dynamic scenes, as well as the classification of algorithms tailored to different light paths <xref ref-type="bibr" rid="B21">Marques et al. (2018)</xref>. <xref ref-type="bibr" rid="B10">Gierlinger et al. (2010)</xref> propsed a real-time rendering engine specifically tailored to the needs of MR visualization, which utilize image-based techniques for lighting and material acquisition allows for consistent integration of virtual objects into real-world environments. recommended using the fuzzy logic model to add soft shadows to a virtual object during embedding in a real scene. <xref ref-type="bibr" rid="B24">Nasr Eddine and Junjun (2019)</xref> used a holographic approach using georeferenced raster-based data integrated into a virtual world to enhance geographic visualization and data observation, with experiments conducted on the HoloLens to improve geographic edutainment. <xref ref-type="bibr" rid="B42">Zhou and Zhou (2023)</xref> proposed a mixed reality (MR) video fusion framework that dynamically projects video images onto 3D models as textures, utilizing remote rendering and browser-based implementation to overcome client limitations and reduce computational and bandwidth demands.</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>AI-driven inverse rendering</title>
<p>For inverse rendering, accurately Lighting estimation, particularly in indoor scenes, is a complex and essential task. Most current illumination estimation methods operate on single images, with a primary emphasis on integrating virtual objects into real images rather than making substantial alterations to the scene&#x2019;s illumination <xref ref-type="bibr" rid="B13">Karsch et al. (2011)</xref>; <xref ref-type="bibr" rid="B8">Garon et al. (2019)</xref>; <xref ref-type="bibr" rid="B37">Zhan et al. (2021)</xref>. While traditional methods like a single environment map <xref ref-type="bibr" rid="B7">Gardner et al. (2017)</xref>; <xref ref-type="bibr" rid="B17">LeGendre et al. (2019)</xref> and spherical lobes <xref ref-type="bibr" rid="B8">Garon et al. (2019)</xref> have been used, they often neglect spatial variations and high-frequency details in lighting. Recent innovations <xref ref-type="bibr" rid="B30">Song and Funkhouser (2019)</xref>; <xref ref-type="bibr" rid="B31">Srinivasan et al. (2020)</xref> have attempted to improve 3D lighting representation, but still grapple with challenges like spatial instability and the lack of HDR information. <xref ref-type="bibr" rid="B18">Li et al. (2020)</xref> propose per-pixel spatially-varying spherical Gaussians (SVSG) lighting representation to capture high-frequency effects and demonstrate that SGs are superior to spherical harmonics (SH) for depicting lighting details in indoor scenes. Neural-PIL <xref ref-type="bibr" rid="B2">Boss et al. (2021b)</xref> proposes a pre-integrated lighting network based on image-based lighting (IBL), showing better performances on conveying global illumination than SGs and SH. Hence, we utilize a neural HDR-radiance field to represent the IBL at any spatial point, thereby ensuring a more accurate and detailed depiction of indoor lighting scenarios with a focus on physically accurate HDR lighting prediction.</p>
<p>Material estimation in inverse rendering can be categorized into two levels: object level and scene level. Object-level estimation <xref ref-type="bibr" rid="B38">Zhang et al. (2021)</xref>, <xref ref-type="bibr" rid="B39">Zhang et al. (2022)</xref>; <xref ref-type="bibr" rid="B23">Munkberg et al. (2022)</xref>; <xref ref-type="bibr" rid="B20">Liang et al. (2022)</xref>; <xref ref-type="bibr" rid="B1">Boss et al. (2021a)</xref> focuses on individual objects, often in controlled or simplified environments. Object-level approaches are generally less complex, as they deal with fewer variables and more straightforward lighting conditions. In contrast, scene-level material estimation <xref ref-type="bibr" rid="B3">Choi et al. (2023)</xref>; <xref ref-type="bibr" rid="B19">Li et al. (2023)</xref> is significantly more challenging due to the complexity and variability of entire scenes. This includes diverse lighting, multiple objects with different materials, and shadows.</p>
<p>The complexity of scene-level material estimation is further compounded by the choice between single-view and multi-view approaches. Single-view material estimation <xref ref-type="bibr" rid="B18">Li et al. (2020)</xref>; <xref ref-type="bibr" rid="B7">Gardner et al. (2017)</xref>, despite its simplicity and lower data requirements, often faces the ill-posed issue, where insufficient information leads to ambiguous or inaccurate estimations. This is particularly evident in complex scenes where a single viewpoint cannot capture the entirety of the scene&#x2019;s lighting and material properties. In contrast, multi-view material estimation <xref ref-type="bibr" rid="B3">Choi et al. (2023)</xref>; <xref ref-type="bibr" rid="B39">Zhang et al. (2022)</xref>, <xref ref-type="bibr" rid="B38">Zhang et al. (2021)</xref>; <xref ref-type="bibr" rid="B23">Munkberg et al. (2022)</xref> leverages images from multiple viewpoints, providing a more comprehensive understanding of the scene. It can significantly reduce the ambiguity associated with single-view estimations, allowing for more accurate and reliable material property extraction. Our work utilizes multi-view images for material estimation, specifically addressing the challenges at the scene level.</p>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Visual percetion in MR</title>
<p>Since vision is crucial for perception, achieving the best fusion requires considering the overall impact of visual integration. Consequently, visual perception in MR systems has consistently been a primary research topic. <xref ref-type="bibr" rid="B6">Fleming (2014)</xref> proposed material perception plays a crucial role in the visual system by allowing us to effortlessly recognize and distinguish materials despite varying appearances. <xref ref-type="bibr" rid="B41">Zhdanov et al. (2019)</xref> explored virtual prototyping methods to assess and mitigate visual discomfort in AR, VR, and MR systems, addressing issues like vergence-accommodation conflicts and illumination differences. However, it may still face limitations in fully capturing the complexities of real-world visual perception. <xref ref-type="bibr" rid="B27">Petikam et al. (2018)</xref> study the relationship between real-world depth fidelity and visual quality in MR rendering, providing perceptual thresholds for various composition artifacts through user experiments. However, it is limited by its focus solely on depth information, without analyzing other rendering factors, which may constrain its applicability in real-world scenarios. <xref ref-type="bibr" rid="B28">Potemin et al. (2018)</xref> proposing a virtual prototyping approach to analyze visual perception problems in augmented and mixed reality devices, comparing physically correct images with expected ones. However, it is limited by its reliance on physical results without further analyzing the impact on human visual perception. <xref ref-type="bibr" rid="B4">Du et al. (2024)</xref> proposed how the integration of subtle visual cues, such as shadows, lighting, textures, blur, and distortions, in MR interfaces can enhance user experience by making digital elements appear as natural components of the physical environment. However, it is limited by its focus on how virtual objects blend into real-world settings, without further exploring how real objects can be seamlessly integrated into virtual environments to improve overall MR fusion.</p>
</sec>
</sec>
<sec sec-type="methods" id="s3">
<label>3</label>
<title>Methods</title>
<p>To address the challenge of achieving perceptually convincing visual realism in MR scenes, we develop a system that integrates AI-driven physical parameter extraction with perception-aligned user interaction. Our goal is to understand how rendering parameters should be configured so that virtual and real elements appear visually coherent to human observers. The overall pipeline of our mixed reality design system (AIMERS) as shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The overall pipeline of our mixed reality design system (AIMERS), combining an AI-based inverse rendering model and a human-centric visual perception adjustment system.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g001.tif">
<alt-text content-type="machine-generated">Diagram comparing traditional and AI-based mixed reality (MR) rendering pipelines. The top section shows traditional MR rendering, starting with 3D scanning to model a 3D avatar without PBR texture, using MR development software, and interacting with a user in an MR scene. The bottom section illustrates the AI-based rendering pipeline, using 2D multi-view images to model a 3D avatar with PBR texture, followed by a visual perception adjustment system, and user interaction in an MR scene. Annotations indicate iterative revisions, material adjustments, and immersive refinements.</alt-text>
</graphic>
</fig>
<p>We first observe that directly placing 3D-scanned real elements into MR environments often preserves residual lighting and shadows from the source scene, which conflicts with the new environment and degrades perceived realism. To isolate physically meaningful scene attributes, we employ a deep learning&#x2013;based neural inverse rendering approach to reconstruct the geometry and material properties of real elements while removing baked illumination effects (Section 3.2). This yields lighting-independent, physically grounded realism factors that serve as a baseline reference across MR scenes.</p>
<p>However, physically correct parameters alone do not necessarily correspond to users&#x2019; perceived realism. To explicitly capture perceptual preferences, we design a human-centered MR interaction system that allows users to adjust realism-related parameters directly inside immersive environments. Over 40 technical rendering controls are mapped into 8 intuitive parameters covering global lighting, material behavior, and scene context. Users adjust these parameters with real-time feedback, enabling us to record the configurations they perceive as most visually realistic (Section 3.3).</p>
<p>Based on the realism settings collected from 20 participants across multiple MR scenarios, we then analyze the relationship between physically derived parameters and perceptually preferred values. Instead of predicting a single &#x201c;optimal&#x201d; configuration, we derive perception-aligned parameter ranges that consistently correspond to high visual realism. These ranges reveal where perceptual realism deviates from purely physically defined settings and provide principled guidance for MR scene configuration.</p>
<p>Finally, we evaluate the derived parameter ranges in representative MR applications, assessing visual fusion quality and alignment with user perception. Our results confirm that the perception-aligned intervals lead to more convincing visual realism than physically derived parameters alone, demonstrating the practical value of combining AI-based inverse rendering with user-driven perceptual measurement.</p>
<sec id="s3-1">
<label>3.1</label>
<title>Neural inverse rendering for real-scanned avatar</title>
<p>When building MR systems, we typically embed 3D models of real-scanned elements into virtual 3D models. However, we have observed that since real-scanned objects often carry the lighting and shadow effects of the original scene, this residual light and shadow create disconnection in the new scene. Therefore, we consider how to eliminate the impact of the original lighting. Given that the material is not affected by lighting, we can obtain the material properties of the real elements to remove the interference of light and shadow. Nevertheless, compared to virtual objects (designed by artists), it is usually difficult to directly obtain the display material parameter values of real objects. Leveraging the recently developed Ref-GS technique <xref ref-type="bibr" rid="B40">Zhang et al. (2025)</xref>, an AI-based inverse rendering approach built on differentiable neural rendering method, we achieve efficient PBR material and geometry reconstruction of avatars directly from multi-view images. This established method enables rapid decomposition of visual appearance into intrinsic material properties and geometric attributes, providing a practical solution for high-quality avatar creation in mixed reality applications.</p>
<p>Given a set of posed RGB images of an real-world human avatar, our goal is to accurately decompose the geometry, materials (Basecolor/Roughness) under unknown illumination, which can be used in re-render process with new environment light. Based on Ref-GS technique <xref ref-type="bibr" rid="B40">Zhang et al. (2025)</xref>, we firstly use the 3D-GS <xref ref-type="bibr" rid="B15">Kerbl et al. (2023)</xref> to represent the avatar&#x2019;s 3&#xa0;days Neural Radiance Field, which include global illumination and geometry property. And then, we use the additional Neural Network MLP to represent an material field. Meanwhile, to eliminate residual shadow and efficiently improve the quality of material, we introduce a diffusion-based material estimation prior as material regularization, the pipeline as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The inverse rendering pipeline consists of two phases: 1) geometry estimation: we input multi-view images from scanned avatar and obtain surface normal information by 3dgs-based nerual rendering; 2) material estimation: we jointly optimize basecolor and roughness with diffusion prior.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a photorealistic rendering process. Multi-view images feed into a Geometry Neural Network and a Material Neural Network, creating normal, base color, and roughness maps. There&#x27;s a PBR Diffusion Model Prior involved. Rendering is compared to a reference through image loss to refine outputs.</alt-text>
</graphic>
</fig>
<p>After obtaining precise geometry, we utilized differentiable rendering techniques to optimize materials, include basecolor <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> and roughness <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>. The rendering equation can be written as <xref ref-type="disp-formula" rid="e1">Equation 1</xref>:<disp-formula id="e1">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo>&#x222b;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mtext>d</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is normal at surface point <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is light incident direction, <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is view direction.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Human-centric perception adjustment system for MR</title>
<p>Visual realism in VR is often achieved by estimating physically based parameters such as lighting, materials, and geometry. Many VR design workflows assume that physically accurate rendering will naturally lead to realistic visual experiences. However, observations from realistic VR applications show that this assumption does not always hold. Even when physical parameters are correctly estimated, users may still perceive the scene as unrealistic.</p>
<p>This mismatch suggests that physical realism and human perceptual realism are not always aligned in interactive VR environments. Based on this observation, this study is guided by the following hypothesis:</p>
<p>
<statement content-type="h1" id="H1">
<label>H1</label>
<p>Physically derived visual realism parameters do not always align with users&#x2019; perceptual judgments of realism in realistic VR scenes.</p>
<p>Testing this hypothesis requires access to realism parameters as perceived by users during interaction. However, existing VR systems mainly support physically based rendering and rely on parameters predefined by designers. They do not provide mechanisms for users to directly adjust realism-related parameters or to record perception-aligned values.</p>
<p>To address this gap, this study introduces a user-centered VR system that allows users to freely adjust visual realism parameters within realistic VR scenes. Through interactive adjustment, the system captures parameter values that reflect users&#x2019; perceptual preferences rather than physical correctness alone. These perception-aligned parameters enable direct comparison between physical and perceptual realism. This system includes global and local variables, and allows users to adjust different variable parameters to obtain the optimal parameter combination to achieve optimal immersion in MR scenes, as shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. User studies are conducted to compare physically derived parameters with user-adjusted parameters across three realistic VR scenarios: a Classroom, a Computer Room, and a Studio. These scenes represent common and socially relevant uses of realistic VR. The experimental results confirm the proposed hypothesis and provide the foundation for building an optimal visual realism parameter model in later studies.</p>
<p>Notably, inspired by <xref ref-type="bibr" rid="B35">Wei and Luximon (2024)</xref> and Disney&#x2019;s PBR model, we select an group significant variables for visual fusion adjustment.</p>
<p>
<inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:mtext mathvariant="bold">Global Variables:</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> To ensure the avatar and background blend seamlessly, we need to blend the lighting and shadows of the avatar with those of the background scene <xref ref-type="bibr" rid="B12">Hughes et al. (2004)</xref>. Therefore, we designed the lighting and shadow variables of the environment as global control variables for blending. However, we assume there might be a gap between the physical rendering results and human perception in some variables. For example, the direction of the shadows on the rendered avatar may not necessarily match the optimal blending perception from a human perspective.<list list-type="bullet">
<list-item>
<p>Light Types: This is a discrete variable. And here, we simplify the types of lighting by using general illumination in MR, include point light, direction light and spot light.</p>
</list-item>
<list-item>
<p>Light Intensity: This is a continuous variable. We consider that different lighting intensities can affect the visual blending effect, we selected a moderate range of lighting intensities and normalized it to a scale from 0 to 1. As the intensity increases, the ambient light becomes brighter.</p>
</list-item>
<list-item>
<p>Shadow Intensity: This is a continuous variable. Although there is a linear relationship between shadow intensity and lighting intensity, our previous work has shown that human visual perception is quite sensitive to variations in shadow intensity under the same lighting conditions. Therefore, we designed shadow intensity as a separate variable to determine the range that best aligns with human perception of seamless blending.</p>
</list-item>
<list-item>
<p>Shadow Direction: This is a discrete variable. Here, based on the earlier assumption that the shadow direction obtained from physical rendering may not be optimal for seamless blending, we designed a test range for shadow direction derived from the directly rendered shadows.</p>
</list-item>
</list>
</p>
<p>
<inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:mtext mathvariant="bold">Local Variables:</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> Since the avatar is essentially a complex composite, made up of multiple parts and different materials, and because user have varying levels of visual perception for different parts, we assume that the visual blending of different parts affects the overall blending effect in an MR scene. We divided the avatar into five parts: hair, skin, top, pants, and shoes. For each part, we designed two local variables: roughness and base color saturation. As pants and shoes are black and therefore the base color saturation is not adjusted for pants and shoes.<list list-type="bullet">
<list-item>
<p>Roughness: This is a continuous variable. Roughness describes the degree of smoothness of the material for the adjusted part. In our MR environment, we consider that the roughness calculated through inverse rendering may not fully achieve optimal blending. Therefore, the default parameter value for roughness is set to the result from inverse rendering. We then adjust this parameter based on the physical results, ultimately aiming to find the optimal roughness range for each part that aligns with the majority of users&#x2019; perception of seamless blending.</p>
</list-item>
<list-item>
<p>Base-color Saturation: This is a continuous variable. Since determining the optimal color for each part of the avatar is very challenging, given the infinite possibilities for base colors like those of clothing, we opted to analyze the saturation of the base color to understand its impact on visual blending. This approach simplifies the analysis to some extent, while also providing valuable insights for designing the colors of avatars in the MR system.</p>
</list-item>
</list>
</p>
<p>Furthermore, we use Participatory Design Method to obtain the optimal MR fusion system for our research scene. By allowing multiple users to adjust the parameters of all variables to achieve their perceived optimal fusion effect, we can continuously refine our optimal fusion parameter range through this process. The optimal equation is shown in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>:<disp-formula id="e2">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>in</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2209;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf9">
<mml:math id="m11">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf10">
<mml:math id="m12">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is denote the left and right boundaries of the test interval, respectively. The term <inline-formula id="inf11">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> represents the total number of data points within the interval <inline-formula id="inf12">
<mml:math id="m14">
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. For each data point <inline-formula id="inf13">
<mml:math id="m15">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> inside this interval, 1 is added. The denominator <inline-formula id="inf14">
<mml:math id="m16">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the width of the interval <inline-formula id="inf15">
<mml:math id="m17">
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. Therefore, <inline-formula id="inf16">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>in</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is defined as the number of data points inside the interval <inline-formula id="inf17">
<mml:math id="m19">
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> divided by the width of the interval, which is the density of data points inside the interval, and <inline-formula id="inf18">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is external density.</p>
<p>We define the objective function <inline-formula id="inf19">
<mml:math id="m21">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> which quantifies the difference in data point densities inside and outside the interval <inline-formula id="inf20">
<mml:math id="m22">
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and then we optimization goal is to find the optimal as shown in <xref ref-type="disp-formula" rid="e3">Equation 3</xref>, and the objective function is <xref ref-type="disp-formula" rid="e4">Equation 4</xref>.<disp-formula id="e3">
<mml:math id="m24">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="|" close="|">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>in</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>out</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<disp-formula id="e4">
<mml:math id="m25">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>arg</mml:mi>
<mml:munder>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>f</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where <inline-formula id="inf22">
<mml:math id="m26">
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the center of the densest region in the histogram, representing the peak of the distribution. <inline-formula id="inf23">
<mml:math id="m27">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is an offset used to explore intervals of different widths centered at <inline-formula id="inf24">
<mml:math id="m28">
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. By optimizing this objective <inline-formula id="inf25">
<mml:math id="m29">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, we can find the optimal offset that maximizes the density difference between inside and outside the interval <inline-formula id="inf26">
<mml:math id="m30">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, thereby determining the corresponding optimal interval <inline-formula id="inf27">
<mml:math id="m31">
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</statement>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The Human-centric MR perceptual adjustment system design for all test variables. We use the physical material parameters obtained from the inverse rendering network as default values, and further enable adjustable controls for five key avatar body parts. Human judgement of these parameters is then used to achieve optimal immersion in MR scenes.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g003.tif">
<alt-text content-type="machine-generated">Diagram illustrating the integration of a 3D avatar into various scenes using global and local variables, optimized by human judgment. Global variables include light and shadow properties; local variables involve hair, skin, cloth, pants, and shoes adjustments. An inverse rendering neural network refines the avatar&#x27;s appearance. Human assessment enhances visual fusion, leading to perceived optimal adjustment.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experiments</title>
<sec id="s4-1">
<label>4.1</label>
<title>Study 1: deriving physical visual realism factors for MR</title>
<sec id="s4-1-1">
<label>4.1.1</label>
<title>Experiment setting</title>
<p>We use a two-phase training strategy to implement our inverse rendering method, which includes the geometry and radiance field, and material estimation. Our input is multi-view images for real-scanned avatar, and output is normal, basecolor and roughness of the avatar. All experiments are conducted on a single NVIDIA GeForce RTX 3090 GPU. The full AI-based inverse rendering process requires approximately <inline-formula id="inf28">
<mml:math id="m32">
<mml:mrow>
<mml:mn mathvariant="bold">5</mml:mn>
<mml:mspace width="0.3333em"/>
<mml:mi mathvariant="bold-italic">h</mml:mi>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">u</mml:mi>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to complete. The details are as follows:</p>
<p>Stage 1: Geometry Estimation. We jointly optimize the geometry network <inline-formula id="inf29">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and radiance network <inline-formula id="inf30">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in this stage following VolSDF <xref ref-type="bibr" rid="B36">Yariv et al. (2021)</xref>. We use two separate MLPs for <inline-formula id="inf31">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf32">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, each consisting of 4 layers of 256 hidden units with a rectified linear unit (ReLU) activation function. In addition, we encode the input surface position <inline-formula id="inf33">
<mml:math id="m37">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> with 10 levels of periodic functions, respectively, before feeding them into our network. We optimize our geometry and HDR-radiance network for 250K iterations with a batch size of 1,024 in this stage, which takes about 3&#xa0;h for an object. We design the optimize loss is <inline-formula id="inf34">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> by <xref ref-type="disp-formula" rid="e5">Equation 5</xref>.<disp-formula id="e5">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">render</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">eik</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">eik</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf35">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">render</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is <inline-formula id="inf36">
<mml:math id="m41">
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and we set weights <inline-formula id="inf37">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">eik</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. Here, <inline-formula id="inf38">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is ground truth rgb and <inline-formula id="inf39">
<mml:math id="m44">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> is predicted rgb.</p>
<p>Stage 2: Material Estimation. In the section of material estimation, the Material MLPs include four fully connected layers, each with 512 hidden units and ReLU activation functions. Following these four layers, the Material feature network is divided into separate basecolor and roughness layers with 512 hidden units and Sigmoid activation. Specifically, basecolor layer outputs the basecolor term <inline-formula id="inf40">
<mml:math id="m45">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and roughness layer outputs the roughness term <inline-formula id="inf41">
<mml:math id="m46">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> at surface position <inline-formula id="inf42">
<mml:math id="m47">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>. In this stage, we optimize our material model for 25K iterations with a batch size of 256, which takes approximately 2&#xa0;h for a single object. The optimize loss is L2 by <xref ref-type="disp-formula" rid="e6">Equation 6</xref>.<disp-formula id="e6">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">render</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">basecolor</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">basecolor</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">rough</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">rough</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where <inline-formula id="inf43">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">basecolor</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf44">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">roughness</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the MSE Loss between predicted value and diffusion prior value. We set weights <inline-formula id="inf45">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">eik</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf46">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">normal</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf47">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">basecolor</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.0003</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf48">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">rough</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> in our experiments.</p>
</sec>
<sec id="s4-1-2">
<label>4.1.2</label>
<title>Result</title>
<p>After optimizing geometry and material neural network, we can get the rendering factor of the scene and avatar, which can delight and remove shadow for the real-scanned avatar from raw scene illumination effect. And we can use the clean basecolor and geometry to relighting in our MR system. We compare the effect between raw rgb input and recovered material in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>The inverse rendering results for the computer room. We input RGB images, and then estimate the Albedo, Roughness, Metallic and Normal.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g004.tif">
<alt-text content-type="machine-generated">Input photo of a classroom transitions into four rendered images labeled Albedo, Roughness, Metallic, and Normal. Each rendering represents different surface properties, with variations in color and texture depiction.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Study 2: human-centric perceptual optimization for MR scene design</title>
<sec id="s4-2-1">
<label>4.2.1</label>
<title>Experiment setting</title>
<p>To reduce reliance on designer experience and repeated testing in traditional MR scene design, we propose a human-centric system that helps designers efficiently create immersive experiences. Our approach further narrows the gap between physically rendered outputs and human perception in MR environments by introducing a parametric evaluation framework that systematically examines rendering factors through user-driven perceptual tuning. We define different variables in the adjustment system, including global and local variables in <xref ref-type="fig" rid="F5">Figure 5</xref>. Global variables are Light and Shadow, and local variables are Roughness and Base-color Saturation in different avatar body parts. All continuous variables are normalized to the range of 0&#x2013;1. Discrete variables include the type of lighting and the direction of the shadows. We have selected three common types of lighting: Point Light, Direction Light, and Spot Light. Additionally, for the default shadow direction, we have adjusted by adding <inline-formula id="inf49">
<mml:math id="m55">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>15</mml:mn>
<mml:mo>&#xb0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf50">
<mml:math id="m56">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>30</mml:mn>
<mml:mo>&#xb0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. And our system is developed based on Unity, and for different default types of light sources, we have selected three scenarios as test environments: a classroom, a computer room, and a studio. The classroom typically features a combination of indoor and outdoor lighting. The computer room is characterized by primarily indoor point lighting (here, we consider area lighting as a collection of multiple point sources), and the main light source in the studio is usually indoor spotlights. Furthermore, we conducted a comparative time analysis between our optimized workflow and conventional development pipelines across three benchmark MR scenarios, which validate the efficiency of our system.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The MR interface of Human-centric perceptual adjustment system. Users adjust both global and local variables in the system to find the optimal parameter combination for immersive experience. Global variables include lighting and shadow settings, while local variables cover the roughness and base color of five avatar parts.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g005.tif">
<alt-text content-type="machine-generated">A digital rendering of a classroom setting shows a woman standing with a tablet, and a person seated wearing a virtual reality headset. The scene on the right displays control panels for adjusting lighting, shadows, and materials, indicating customization options in a virtual environment.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-2-2">
<label>4.2.2</label>
<title>Questionnaire</title>
<p>The questionnaire is designed to collect users&#x2019; perceptual judgments of visual realism under different parameter settings and to validate <xref ref-type="statement" rid="H1">Hypothesis H1</xref>, which states that physically derived realism parameters do not always align with human perception in realistic VR scenes.</p>
<p>The questionnaire consists of three core items, explicitly defined as Q1&#x2013;Q3, which directly support the comparison between physically derived and perception-aligned realism settings. After completing the interaction for each scene, participants were asked to answer the following questions:<list list-type="bullet">
<list-item>
<p>Q1 (Physical Realism Rating): Participants rated the overall visual realism of the scene rendered under the Physical condition using a 7-point Likert scale, where 1 indicates very unrealistic and 7 indicates very realistic.</p>
</list-item>
<list-item>
<p>Q2 (User-Tuned Realism Rating): Participants rated the overall visual realism of the scene rendered under the User-tuned condition, reflecting their preferred parameter configuration, using the same 7-point Likert scale.</p>
</list-item>
<list-item>
<p>Q3 (Preference Judgment): Participants indicated which condition appeared more visually realistic by choosing between the Physical condition, the User-tuned condition, or no noticeable difference.</p>
</list-item>
</list>
</p>
<p>Questions Q1 and Q2 provide quantitative measures of perceived realism before and after user adjustment, while Q3 offers a direct qualitative comparison of perceptual preference between the two conditions. Together, these three items form the primary evidence used to evaluate the perceptual&#x2013;physical mismatch in visual realism.</p>
</sec>
<sec id="s4-2-3">
<label>4.2.3</label>
<title>Participants</title>
<p>We recruited 20 participants (13 male, 7 female, ages 18&#x2013;35) from our university. The recruitment was based on their knowledge of rendering and MR, which was screened using an eight-question quiz (provided in the <xref ref-type="sec" rid="s14">Supplementary Material</xref>). Regarding MR and Rendering Software experience, half of the people have experience with professional rendering software, and 17 individuals have experience using MR-related equipment, seven of the participants use MR weekly. All participants had either normal vision or vision corrected to normal. For their time and contribution to the study, each participant was compensated with participant fee.</p>
</sec>
<sec id="s4-2-4">
<label>4.2.4</label>
<title>Data collection and procedures</title>
<p>By placing real avatars into different virtual scenarios, we have constructed a virtual reality experimental environment. Our user study followed a within-subjects design, with a usability testing method and Questionnaires. The study, on average, took less than 1&#xa0;h. Participants were initially welcomed and reviewed a consent form. Then, we briefly introduced the study&#x2019;s objectives and procedural steps. The users wear the VR device, Quest 3, to observe and determine how to adjust the variables. Specifically, to simplify the process, the experimenters will adjust variable values based on the user&#x2019;s command until the user finds the optimal value. The details of this experiment is shown as follow:</p>
<p>Preparation and Training: We asked participants to adjust the chair height to a comfortable level and adjust the Quest 3 headset for the VR condition before they started the training session. We confirmed that the participant was comfortable and could see the content in all display environments clearly. And then, we first explain the concepts of visual fusion level and adjustable variables to the participants. Then, we will adjust these variables within the system to ensure that participants can perceive the feedback related to these variables before proceeding with the experiment.</p>
<p>System Adjustment Process: During the experiment, we test each scenario separately. Taking into account the relationship between global variables and local variables, the global variables may have a overall impact. Therefore, we first adjust the global variables, and then adjust the local variables of each body part. Additionally, we randomize the sequence of scenarios and test variables for each participant. We set default value is based on our physically inverse rendering model, and normalize our adjust value from 0 to 1. The physical variable value as shown in <xref ref-type="table" rid="T1">Table 1</xref>. And the user-tuned variable value as shown in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Descriptive statistics of system physical variable value for Scene 1&#x2013;3.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Parameter</th>
<th align="center">Scene 1</th>
<th align="center">Scene 2</th>
<th align="center">Scene 3</th>
<th align="center">Avg</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Light_intensity</td>
<td align="center">0.4</td>
<td align="center">0.4</td>
<td align="center">0.45</td>
<td align="center">0.42</td>
</tr>
<tr>
<td align="left">Light_type</td>
<td align="center">0.5</td>
<td align="center">0.5</td>
<td align="center">1.0</td>
<td align="center">0.67</td>
</tr>
<tr>
<td align="left">Shadow_direction</td>
<td align="center">0.0</td>
<td align="center">0.0</td>
<td align="center">0.0</td>
<td align="center">0.0</td>
</tr>
<tr>
<td align="left">Shadow_intensity</td>
<td align="center">0.45</td>
<td align="center">0.55</td>
<td align="center">0.45</td>
<td align="center">0.48</td>
</tr>
<tr>
<td align="left">Cloth_basecolor</td>
<td align="center">0.25</td>
<td align="center">0.55</td>
<td align="center">0.85</td>
<td align="center">0.55</td>
</tr>
<tr>
<td align="left">Cloth_roughness</td>
<td align="center">0.15</td>
<td align="center">0.30</td>
<td align="center">0.45</td>
<td align="center">0.30</td>
</tr>
<tr>
<td align="left">Hair_basecolor</td>
<td align="center">0.25</td>
<td align="center">0.45</td>
<td align="center">0.55</td>
<td align="center">0.42</td>
</tr>
<tr>
<td align="left">Hair_roughness</td>
<td align="center">0.15</td>
<td align="center">0.25</td>
<td align="center">0.25</td>
<td align="center">0.22</td>
</tr>
<tr>
<td align="left">Pants_roughness</td>
<td align="center">0.25</td>
<td align="center">0.25</td>
<td align="center">0.15</td>
<td align="center">0.22</td>
</tr>
<tr>
<td align="left">Shoes_roughness</td>
<td align="center">0.55</td>
<td align="center">0.55</td>
<td align="center">0.25</td>
<td align="center">0.45</td>
</tr>
<tr>
<td align="left">Skin_basecolor</td>
<td align="center">0.45</td>
<td align="center">0.45</td>
<td align="center">0.45</td>
<td align="center">0.45</td>
</tr>
<tr>
<td align="left">Skin_roughness</td>
<td align="center">0.25</td>
<td align="center">0.5</td>
<td align="center">0.25</td>
<td align="center">0.33</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Descriptive statistics of system User-tuned variable value for Scene 1&#x2013;3.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Parameter</th>
<th align="center">Scene 1</th>
<th align="center">Scene 2</th>
<th align="center">Scene 3</th>
<th align="center">Avg</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Light_intensity</td>
<td align="center">0.5</td>
<td align="center">0.5</td>
<td align="center">0.4</td>
<td align="center">0.47</td>
</tr>
<tr>
<td align="left">Light_type</td>
<td align="center">0.5</td>
<td align="center">1.0</td>
<td align="center">1.0</td>
<td align="center">0.83</td>
</tr>
<tr>
<td align="left">Shadow_direction</td>
<td align="center">0.25</td>
<td align="center">0.0</td>
<td align="center">0.25</td>
<td align="center">0.17</td>
</tr>
<tr>
<td align="left">Shadow_intensity</td>
<td align="center">0.45</td>
<td align="center">0.55</td>
<td align="center">0.55</td>
<td align="center">0.52</td>
</tr>
<tr>
<td align="left">Cloth_basecolor</td>
<td align="center">0.25</td>
<td align="center">0.65</td>
<td align="center">0.85</td>
<td align="center">0.58</td>
</tr>
<tr>
<td align="left">Cloth_roughness</td>
<td align="center">0.15</td>
<td align="center">0.30</td>
<td align="center">0.75</td>
<td align="center">0.40</td>
</tr>
<tr>
<td align="left">Hair_basecolor</td>
<td align="center">0.25</td>
<td align="center">0.40</td>
<td align="center">0.65</td>
<td align="center">0.43</td>
</tr>
<tr>
<td align="left">Hair_roughness</td>
<td align="center">0.15</td>
<td align="center">0.35</td>
<td align="center">0.25</td>
<td align="center">0.25</td>
</tr>
<tr>
<td align="left">Pants_roughness</td>
<td align="center">0.25</td>
<td align="center">0.25</td>
<td align="center">0.25</td>
<td align="center">0.25</td>
</tr>
<tr>
<td align="left">Shoes_roughness</td>
<td align="center">0.35</td>
<td align="center">0.25</td>
<td align="center">0.55</td>
<td align="center">0.38</td>
</tr>
<tr>
<td align="left">Skin_basecolor</td>
<td align="center">0.45</td>
<td align="center">0.65</td>
<td align="center">0.65</td>
<td align="center">0.58</td>
</tr>
<tr>
<td align="left">Skin_roughness</td>
<td align="center">0.25</td>
<td align="center">0.85</td>
<td align="center">0.25</td>
<td align="center">0.45</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-2-5">
<label>4.2.5</label>
<title>Data analysis and results</title>
<p>In this section, we present the statistical analysis of our collected data, outline the strategies participants employed to manage the display space, and provide summarized qualitative feedback for each condition. We documented significance at levels of <inline-formula id="inf51">
<mml:math id="m57">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x2217;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. and all participants successfully completed the study tasks, resulting in no variance in accuracy metrics.</p>
<p>Preference Data Processing: <xref ref-type="table" rid="T3">Table 3</xref> summarizes participants&#x2019; preference judgments between the Physical and User-tuned conditions across the three VR scenes. For all scenes, a larger proportion of participants reported that the User-tuned condition appeared more visually realistic than the Physical condition.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Preference distributions between Physical and User-tuned conditions across VR scenes <inline-formula id="inf52">
<mml:math id="m58">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>20</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Scene</th>
<th align="center">Physical (%)</th>
<th align="center">User-tuned (%)</th>
<th align="center">No difference (%)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Computer room</td>
<td align="center">15</td>
<td align="center">77</td>
<td align="center">8</td>
</tr>
<tr>
<td align="left">Classroom</td>
<td align="center">10</td>
<td align="center">80</td>
<td align="center">10</td>
</tr>
<tr>
<td align="left">Studio</td>
<td align="center">30</td>
<td align="center">65</td>
<td align="center">5</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As illustrated in <xref ref-type="fig" rid="F6">Figure 6</xref>, participants across all three scenes showed a clear tendency to favor the User-tuned condition over the Physical condition, while the proportion of &#x201c;No Difference&#x201d; responses remained relatively low. Specifically, in Computer room, 77% of participants preferred the User-tuned condition, compared to 15% who favored the Physical condition, while 8% reported no noticeable difference. A similar pattern was observed in Classroom, where 80% of participants preferred the User-tuned condition and 10% preferred the Physical condition. In the Studio scene, the preference for the User-tuned condition was most pronounced, with 65% of participants selecting it as more visually realistic, compared to only 30% favoring the Physical condition.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Distribution of participants&#x2019; visual realism preference judgments across Physical, User-tuned, and No Difference conditions in three realistic VR scenes <inline-formula id="inf53">
<mml:math id="m59">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>20</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g006.tif">
<alt-text content-type="machine-generated">Bar chart showing the percentage of participants preferring Physical, User-tuned, and No Difference in three VR scenes: Computer room, Classroom, and Studio. User-tuned preference is highest in all scenes, especially Classroom and Studio. Physical preference is higher in the Studio compared to other scenes.</alt-text>
</graphic>
</fig>
<p>Across all three scenes, the proportion of participants indicating no noticeable difference remained relatively low. These descriptive results suggest a consistent tendency for participants to favor perception-aligned parameter settings over physically derived parameters. To determine whether these observed preference distributions reflect statistically significant deviations from random choice, formal statistical analyses were conducted and are reported in the following section.</p>
<p>Participants&#x2019; preference judgments across the Physical, User-tuned, and No Difference conditions are summarized in <xref ref-type="table" rid="T3">Table 3</xref>. Across all three VR scenes, the User-tuned condition consistently received the highest proportion of selections, indicating a strong perceptual preference for user-adjusted realism parameters over physically derived values.</p>
<p>To examine whether these preference distributions deviated from random choice, chi-square goodness-of-fit tests were conducted for each scene. As reported in <xref ref-type="table" rid="T4">Table 4</xref>, the results show that preference distributions for Computer room, Classroom, and the Studio scene all significantly deviated from a uniform distribution across the three response options <inline-formula id="inf54">
<mml:math id="m60">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. The corresponding effect sizes, measured by Cram&#xe9;r&#x2019;s V, ranged from 0.55 to 0.70, indicating medium to strong effects. These results confirm that participants&#x2019; realism judgments are not randomly distributed.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Statistical analysis of participants&#x2019; preference judgments between Physical and User-tuned conditions across VR scenes <inline-formula id="inf55">
<mml:math id="m61">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>20</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Scene</th>
<th align="center">
<inline-formula id="inf56">
<mml:math id="m62">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">&#x3c7;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn mathvariant="bold-italic">2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">df</th>
<th align="center">p-value</th>
<th align="center">Cram&#xe9;r&#x2019;s V</th>
<th align="center">Binomial p-value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Computer room</td>
<td align="center">25.80</td>
<td align="center">2</td>
<td align="center">
<inline-formula id="inf57">
<mml:math id="m63">
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.66</td>
<td align="center">
<inline-formula id="inf58">
<mml:math id="m64">
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Classroom</td>
<td align="center">29.40</td>
<td align="center">2</td>
<td align="center">
<inline-formula id="inf59">
<mml:math id="m65">
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.70</td>
<td align="center">
<inline-formula id="inf60">
<mml:math id="m66">
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Studio</td>
<td align="center">18.20</td>
<td align="center">2</td>
<td align="center">
<inline-formula id="inf61">
<mml:math id="m67">
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="center">0.55</td>
<td align="center">0.02</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Chi-square goodness-of-fit tests examine whether preference distributions across three response options (Physical, User-tuned, No Difference) deviate from a uniform distribution. Binomial tests compare User-tuned and Physical conditions after excluding &#x201c;No Difference&#x201d; responses. Statistical significance was assessed at <inline-formula id="inf62">
<mml:math id="m68">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>To further assess whether participants with a clear preference systematically favored perception-aligned settings over physically derived parameters, binomial tests were performed after excluding responses indicating &#x201c;No Difference.&#x201d; The results demonstrate that, for all three scenes, participants selected the User-tuned condition significantly more often than the Physical condition. Strong preferences were observed in Computer room and Classroom <inline-formula id="inf63">
<mml:math id="m69">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.001</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, while a statistically significant but more moderate preference was found in the Studio scene<inline-formula id="inf64">
<mml:math id="m70">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Overall, these results provide consistent evidence that physically derived realism parameters do not fully align with human perceptual judgments in realistic VR scenes. Participants across different scenarios tended to prefer perception-aligned parameter configurations over physically computed values. These results support <xref ref-type="statement" rid="H1">Hypothesis H1</xref> and motivate the need to explicitly incorporate human perceptual factors in subsequent modeling and optimization of visual realism.</p>
<p>Statistical Analysis of MR Visual Fusion adjustment System: Our goal is to validate the effectiveness of our system between before and after Adjustment. Consequently, we formulated Null Hypothesis (H0) grounded in empirical findings from prior research and the testing conditions, and here, the H0 Hypothesis is no difference in the mean values before and after adjustment. We analyze the results for single scenarios and multiple scenarios separately. And here, we show the qualitative result after adjust in <xref ref-type="fig" rid="F7">Figure 7</xref>.<list list-type="bullet">
<list-item>
<p>Single scenarios: we conducted T-tests on the data of all test participants to determine the significance in different scenarios. The results showed that the P-value for Scenario 1 was 1.186e-70; for Scenario 2, it was 5.907e-75; and for Scenario 3, it was 1.380e-58. The P-values for all scenarios were significantly less than 0.05, indicating that there are significant differences in the test data across these scenarios, as shown in <xref ref-type="fig" rid="F8">Figure 8</xref>.</p>
</list-item>
<list-item>
<p>Multiple scenarios: we also conducted an corresponding T-tests of the default and adjusted fusion scores for all three scenarios. The results showed that the P-value was 2.504e-196, which is definitely less than 0.05. This indicates that the human adjustment makes a significant difference between the default parameters obtained through the physics-based inverse rendering method and the fusion data adjusted based on user perception in our adjustment system, as shown in <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
</list-item>
</list>
</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Scene qualitative results after adjusting from default values to the optimal range, showing significant improvement in fusion across different scenarios.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g007.tif">
<alt-text content-type="machine-generated">A series of images showing a person walking in three different environments: a computer room, classroom, and studio. Each environment features two variations labeled as &#x22;Default&#x22; and &#x22;Optimal&#x22; with minimal visible differences. The person carries a book and walks toward the foreground. &#x2018;Default&#x2019; means physicals value, &#x2018;Optimal&#x2019; is user-tuned value.</alt-text>
</graphic>
</fig>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>The quantitative results of the T-test for each scene. These results indicate that the fusion adjustment system significantly increased the visual fusion in every scenario.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g008.tif">
<alt-text content-type="machine-generated">Box plots comparing fusion levels before and after adjustment in three scenes. Scene 1 shows increased fusion levels after adjustment. Scene 2 shows similar results. Scene 3 also shows improvement after adjustment. All p-values are less than 0.05, indicating significant differences.</alt-text>
</graphic>
</fig>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>The quantitative results of the T-test for all scene variables. These results indicate that the fusion adjustment system is significant across all scene variables.</p>
</caption>
<graphic xlink:href="frvir-07-1733259-g009.tif">
<alt-text content-type="machine-generated">Box plot comparing fusion levels before and after adjustment. The &#x201c;Before Adjustment&#x201d; box shows lower fusion levels, while the &#x201c;After Adjustment&#x201d; box indicates higher levels, indicating significant differences.</alt-text>
</graphic>
</fig>
<p>Optimal Range Estimation: Based on participants continuously adjusting system variables to achieve their optimal fusion, we expect to obtain the optimal parameter combinations for the test environment from these data. We first analyzed each scene separately. For the global and local variables of each scene, we used the method proposed in the previous section to perform the analysis and calculations. By optimizing the density difference inside and outside the interval, we obtained the optimal parameter ranges. Each parameter calculated for every scene is shown in the <xref ref-type="table" rid="T5">Table 5</xref>. We normalized the range of parameter values for all variables to 0&#x2013;1. Meanwhile, we also analyzed the intersection range across all scenes and found that the range is the same for most of the scenes. More details in Appendix.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Descriptive statistics of system variable ranges for Scene 1&#x2013;3<bold>.</bold> The intersection range across all scenes.&#x201c;None&#x201d; means no shared range exists and each scene is set independently.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Parameter</th>
<th align="center">Scene 1</th>
<th align="center">Scene 2</th>
<th align="center">Scene 3</th>
<th align="center">Intersection</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Light_intensity</td>
<td align="center">(0.4, 0.6)</td>
<td align="center">(0.4, 0.6)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
</tr>
<tr>
<td align="left">Light_type</td>
<td align="center">0.5</td>
<td align="center">0.5</td>
<td align="center">1.0</td>
<td align="center">None</td>
</tr>
<tr>
<td align="left">Shadow_direction</td>
<td align="center">0.0</td>
<td align="center">0.0</td>
<td align="center">0.0</td>
<td align="center">0.0</td>
</tr>
<tr>
<td align="left">Shadow_intensity</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
</tr>
<tr>
<td align="left">Cloth_basecolor</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
</tr>
<tr>
<td align="left">Cloth_roughness</td>
<td align="center">(0.15, 0.45)</td>
<td align="center">(0, 0.30)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">None</td>
</tr>
<tr>
<td align="left">Hair_basecolor</td>
<td align="center">(0.25, 0.35)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">None</td>
</tr>
<tr>
<td align="left">Hair_roughness</td>
<td align="center">(0, 0.25)</td>
<td align="center">(0, 0.25)</td>
<td align="center">(0.25, 0.35)</td>
<td align="center">None</td>
</tr>
<tr>
<td align="left">Pants_roughness</td>
<td align="center">(0, 0.25)</td>
<td align="center">(0.25, 0.35)</td>
<td align="center">(0.15, 0.45)</td>
<td align="center">None</td>
</tr>
<tr>
<td align="left">Shoes_roughness</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.25, 0.75)</td>
<td align="center">None</td>
</tr>
<tr>
<td align="left">Skin_basecolor</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
<td align="center">(0.45, 0.55)</td>
</tr>
<tr>
<td align="left">Skin_roughness</td>
<td align="center">(0.15, 0.45)</td>
<td align="center">(0.4, 0.6)</td>
<td align="center">(0.25, 0.35)</td>
<td align="center">None</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Findings and discussions</title>
<p>Our primary objective is to validate the effectiveness of our physics-based inverse rendering model and user perception-based MR system, in order to identify the optimal system parameter combinations that achieve the best possible integration. Although physics-based inverse rendering has rapidly advanced in achieving photorealistic quality in the implementation of MR systems, the physics-based rendering model does not entirely align with human perception of optimal visual integration in MR environments. Therefore, our research aims to develop an adjustable rendering system for MR that combines physical models with user perception experiences. By adjusting parameters based on feedback from multiple users, we seek to identify the MR system parameters that best meet user preferences, thereby achieving optimal integration in test scenarios. This will provide valuable reference for the development of existing MR system pipelines. Our subsequent findings and discussions will focus on the interplay between physical rendering models and perceptual outcomes.</p>
<sec id="s5-1">
<label>5.1</label>
<title>Does physically accurate rendering correspond to what users perceive as visually real?</title>
<p>The experimental results provide clear empirical evidence that this consistency does not always hold. Across all three scenarios, participants consistently favored perception-aligned parameter settings over physically computed values, supporting <xref ref-type="statement" rid="H1">Hypothesis H1</xref>. These findings challenge the common assumption that physically accurate rendering alone is sufficient to achieve convincing visual realism in VR environments.</p>
<p>One possible explanation for this mismatch lies in the nature of human visual perception. While physically based rendering aims to simulate light transport and material interactions according to physical laws, human perception of realism is influenced by additional factors such as visual comfort, contextual expectation, and tolerance to physical inaccuracies. Users may therefore prefer parameter configurations that deviate from strict physical correctness but better match their subjective experience of what appears &#x201c;natural&#x201d; or &#x201c;believable&#x201d; in a given VR context.</p>
<p>The observed differences in preference strength across scenes further suggest that the perceptual&#x2013;physical mismatch is context-dependent. In the Computer room and Classroom scenarios, strong preferences for the User-tuned condition indicate that users are particularly sensitive to realism-related parameters in structured indoor environments, where lighting consistency and material appearance strongly affect plausibility. In contrast, although the Studio scene also showed a significant preference for perception-aligned settings, the effect was comparatively more moderate. This may be attributed to greater perceptual tolerance in broadcast-style environments, where stylization and controlled lighting are more common and therefore less likely to violate user expectations.</p>
<p>Importantly, these findings highlight a fundamental limitation of current VR design workflows that rely exclusively on physically derived parameters. Without mechanisms to capture and incorporate user perceptual preferences, designers risk producing visually accurate yet perceptually unconvincing scenes. The results of this chapter demonstrate the necessity of treating perceptual realism as a first-class component in VR scene design rather than as a byproduct of physical simulation.</p>
</sec>
<sec id="s5-2">
<label>5.2</label>
<title>Is MR scene adjustable fusion system beneficial?</title>
<p>Our study results reveal that an MR system combining both physical and perceptual aspects can optimize users&#x2019; visual fusion in MR environments. We conducted evaluations across different scenarios and found that this approach is effective. During user testing, we had users observe the visual fusion between the inserted avatar and the environment. We found that, regardless of whether the MR environment was a classroom, computer room, or studio, users experienced a noticeable improvement in visual fusion before and after adjusting the MR system. This subjective feedback aligns closely with findings from <xref ref-type="bibr" rid="B35">Wei and Luximon (2024)</xref>, indicating that there is still a gap between users&#x2019; perception of visual fusion in MR systems and the values derived from physics-based calculations. In comparison, we conducted a more in-depth exploration within the MR system, allowing users to make intuitive judgments, thereby providing more credible experimental results. Given the significant impact of ambient lighting on overall effectiveness, we selected test environments based on common lighting types. This allowed users to intuitively perceive the interaction effects between the inserted avatar and the MR environment, and based on their potential cognition of the scene and a certain type of lighting, we made our evaluations. During the testing process, we first had users observe the MR system designed with rendering factor values obtained through physics-based inverse rendering model. Users were then asked to judge the visual fusion in this state. The results showed that most users were dissatisfied with the current visual fusion, particularly noting that the global lighting variables and the clothing materials did not completely match the settings of the current scene, resulting in a noticeable sense of separation. We found that people tend to prefer point light sources as the optimal choice in classroom settings, while in studio environments, they lean towards spotlights. This reflects the differing lighting needs of various scenarios and highlights the sensitivity of human visual perception to these differences. It is understandable that classrooms typically require even lighting to ensure all students can clearly see the blackboard and other visual aids. In contrast, studios use spotlights to highlight specific objects or areas.</p>
</sec>
<sec id="s5-3">
<label>5.3</label>
<title>The optimal range of scenarios parameters</title>
<p>Another objective of our study is to identify the optimal parameters combination range for visual fusion in MR test scenarios. We conducted a optimal range analysis of all adjustable variables for each scenario according the <xref ref-type="sec" rid="s4-2">Section 4.2</xref>. Delving deeper into underlying factors, we explored the issue from two aspects. First, by comparing the optimal adjustment ranges for each scenario with the default values obtained from the physical-based inverse rendering model, we found that some variables were distributed near the default values, while others deviated. This indicates that solely relying on physical-based models to design MR systems is still insufficient to fully meet users&#x2019; optimal perception of visual fusion. Specifically, most roughness-related variables, including Cloth_roughness, Hair_roughness, Pants_roughness, and Skin_roughness, have optimal ranges that are smaller compared to the default values in all scenes. This indicates that, from the users&#x2019; perceptual standpoint, the avatar&#x2019;s overall roughness distribution in these test environments should appear rougher rather than more specular.</p>
<p>Secondly, we observed that the optimal ranges for the same variables may vary between different scenarios, while also sharing some commonalities. Although the default lighting types differ between computer bar and teaching room, most of the optimal ranges are similar since both scenarios are classroom types. The primary exceptions are the distributions of Pants_roughness and Skin_roughness, which show significant differences with almost no overlapping areas. This indicates that in the computer room and classroom scenarios, the roughness of pants and skin requires special attention. In the computer room environment, users believe that lower values of Pants_roughness and Skin_roughness enhance the visual fusion. Meanwhile, compared to the other two scenarios, Studio scenario has fewer variables with overlapping optimal ranges due to its greater environmental differences. Only a few variables&#x2014;Light_intensity, Shadow_direction, Shadow_intensity, Cloth_basecolor, and Skin_basecolor overlapping ranges, meaning they are less influenced by the environment and need only slight tuning for scene fusion.</p>
</sec>
<sec id="s5-4">
<label>5.4</label>
<title>Key factors influencing the degree of user fusion perception in MR</title>
<p>After participants completed the system adjustments, we also asked them to rank the importance of various variables. From the ranking data, we found that 77.78% of participants considered lighting to be the most important factor for global variables such as illumination and shadow. Regarding the impact of different body parts on the fusion effect of the avatar with the background environment, 77.78% of participants ranked skin as the most critical, while 50% considered shoes to have the least impact. Additionally, 44.44% ranked cloth as the second most important, with hair and pants having relatively similar weights in the middle rankings. Our findings are consistent with prior research work <xref ref-type="bibr" rid="B11">Gon&#xe7;alves et al. (2023)</xref>, both demonstrating that global lighting has the greatest impact on visual perception, followed by shadows. Meanwhile, users in MR environments perceive that the upper body of the avatar, including the skin and top garments, has a greater influence on their judgment of the fusion effect. This finding is also consistent with previous work <xref ref-type="bibr" rid="B33">Van der Veer et al. (2018)</xref>.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s6">
<label>6</label>
<title>Conclusion</title>
<p>Traditional MR scene design has largely depended on designer experience and repeated perceptual adjustments, making it difficult to reason about how rendering parameters should be configured to achieve convincing visual realism. In this work, we propose AIMERS, a perception-aligned MR realism framework that integrates neural inverse rendering with immersive perceptual parameter capture, with the goal of understanding where perceptual optimality lies in MR visual fusion.</p>
<p>We first utilize AI-based neural inverse rendering with diffusion-based priors, allowing us to reconstruct geometry and physically-based material attributes from real scenes while eliminating baked lighting artifacts. This provides lighting-independent baseline parameters for MR assets when inserted into new environments. Building on these physically grounded factors, we design a parameter adjustment interface derived from the physically-based rendering model and tailored for complex real-world avatars. The interface exposes global and local variables in a perceptually meaningful way, enabling users to explore and select visually convincing realism settings.</p>
<p>Through systematic perceptual experiments across multiple MR scenarios, we analyze the distributions of user-preferred configurations and derive optimal parameter ranges for visual fusion rather than a single fixed solution. These ranges offer reliable design references and reveal systematic deviations between physically accurate and perceptually optimal settings. Our findings further show that global illumination and material properties of the upper body (especially roughness and base color) exert the strongest influence on perceived realism, while other variables have more limited contributions.</p>
<p>Overall, AIMERS reframes MR realism as a problem of aligning physical accuracy with human perception. By combining AI-based parameter extraction with perceptual measurement, our work provides principled guidance for configuring MR scenes and contributes empirical insight into how different rendering parameters shape the perception of visual fusion. We believe these results form a foundation for future research on perception-aware MR system design, inverse rendering, and visual realism modeling.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="s8">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Faculty Research Committee (on behalf of PolyU Institutional Review Board). The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study. Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>XW: Software, Investigation, Formal Analysis, Validation, Methodology, Writing &#x2013; original draft, Data curation, Project administration, Visualization, Conceptualization. YW: Writing &#x2013; review and editing, Validation, Conceptualization, Visualization. AZ: Validation, Writing &#x2013; review and editing, Visualization, Data curation. YL: Formal Analysis, Resources, Visualization, Writing &#x2013; review and editing, Supervision.</p>
</sec>
<sec sec-type="COI-statement" id="s11">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s12">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s14">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frvir.2026.1733259/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frvir.2026.1733259/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Boss</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Braun</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jampani</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Barron</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lensch</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021a</year>). &#x201c;<article-title>Nerd: neural reflectance decomposition from image collections</article-title>,&#x201d; in <source>ICCV</source>.</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Boss</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jampani</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Braun</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Barron</surname>
<given-names>J. T</given-names>
</name>
<name>
<surname>Lensch</surname>
<given-names>H. P. A.</given-names>
</name>
</person-group> (<year>2021b</year>). &#x201C;<article-title>Neural-PIL: neural pre-integrated lighting for reflectance decomposition</article-title>,&#x201d; in <source>Proceedings of the 35th International Conference on Neural Information Processing Systems (NIPS &#x2019;21)</source> (<publisher-name>Red Hook, NY, USA: Curran Associates Inc</publisher-name>.) Article <volume>818</volume>, <fpage>10691</fpage>&#x2013;<lpage>10704</lpage>.</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Choi</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>Y. M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>IBL&#x2010;NeRF: image-Based lighting formulation of neural radiance fields</article-title>. <source>Comput. Graph. Forum</source>. <volume>42</volume> (<issue>7</issue>), <pub-id pub-id-type="doi">10.1111/cgf.14929</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Du</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>El-Zanfaly</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Subtle visual cues in mixed reality: influencing user perception and facilitating interaction</article-title>,&#x201d; in <conf-name>Proceedings of the 16th Conference on Creativity and Cognition</conf-name>, <fpage>556</fpage>&#x2013;<lpage>560</lpage>.</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ng</surname>
<given-names>T.-T.</given-names>
</name>
<name>
<surname>Koenig</surname>
<given-names>B. L.</given-names>
</name>
<name>
<surname>Herberg</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Image visual realism: from human perception to machine computation</article-title>. <source>IEEE Transactions Pattern Analysis Machine Intelligence</source> <volume>40</volume>, <fpage>2180</fpage>&#x2013;<lpage>2193</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2017.2747150</pub-id>
<pub-id pub-id-type="pmid">28866484</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fleming</surname>
<given-names>R. W.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Visual perception of materials and their properties</article-title>. <source>Vis. Research</source> <volume>94</volume>, <fpage>62</fpage>&#x2013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1016/j.visres.2013.11.004</pub-id>
<pub-id pub-id-type="pmid">24291494</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gardner</surname>
<given-names>M.-A.</given-names>
</name>
<name>
<surname>Sunkavalli</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yumer</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Gambaretto</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Gagn&#xe9;</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Learning to predict indoor illumination from a single image</article-title>. <source>ACM Trans. Graph.</source> <volume>36</volume> (<issue>6</issue>), <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1145/3130800.3130891</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Garon</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sunkavalli</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Hadap</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Carr</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Lalonde</surname>
<given-names>J.-F.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201C;<article-title>Fast spatially-varying indoor lighting estimation</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>6908</fpage>&#x2013;<lpage>6917</lpage>.</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gierlinger</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Danch</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Stork</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Rendering techniques for mixed reality</article-title>. <source>J. Real-Time Image Process.</source> <volume>5</volume>, <fpage>109</fpage>&#x2013;<lpage>120</lpage>. <pub-id pub-id-type="doi">10.1007/s11554-009-0137-x</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gon&#xe7;alves</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Melo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Monteiro</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Coelho</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Bessa</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>The role of different light settings on the perception of realism in virtual replicas in immersive virtual reality</article-title>. <source>Comput. and Graph.</source> <volume>117</volume>, <fpage>172</fpage>&#x2013;<lpage>182</lpage>. <pub-id pub-id-type="doi">10.1016/j.cag.2023.10.021</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hughes</surname>
<given-names>C. E.</given-names>
</name>
<name>
<surname>Konttinen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pattanaik</surname>
<given-names>S. N.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>The future of mixed reality: issues in illumination and shadows</article-title>. <fpage>6</fpage>&#x2013;<lpage>9</lpage>.</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karsch</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Hedau</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Forsyth</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hoiem</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Rendering synthetic objects into legacy photographs</article-title>. <source>ACM Trans. Graph.</source> <volume>30</volume> (<issue>6</issue>), <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1145/2070781.2024191</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kent</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Snider</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gopsill</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hicks</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Mixed reality in design prototyping: a systematic review</article-title>. <source>Des. Stud.</source> <volume>77</volume>, <fpage>101046</fpage>. <pub-id pub-id-type="doi">10.1016/j.destud.2021.101046</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kerbl</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kopanas</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Leimk&#xfc;hler</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Drettakis</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>3d gaussian splatting for real-time radiance field rendering</article-title>. <source>ACM Trans. Graph.</source> <volume>42</volume> (<issue>4</issue>), <fpage>1</fpage>&#x2013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1145/3592433</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kyrlitsias</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Michael-Grigoriou</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Social interaction with agents and avatars in immersive virtual environments: a survey</article-title>. <source>Front. Virtual Real.</source> <volume>2</volume>, <fpage>786665</fpage>. <pub-id pub-id-type="doi">10.3389/frvir.2021.786665</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>LeGendre</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>W.-C.</given-names>
</name>
<name>
<surname>Fyffe</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Flynn</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Charbonnel</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Busch</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). &#x201c;<article-title>Deeplight: learning illumination for unconstrained mobile mixed reality</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>5918</fpage>&#x2013;<lpage>5928</lpage>.</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Shafiei</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ramamoorthi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Sunkavalli</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Chandraker</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Inverse rendering for complex indoor scenes: Shape, spatially-varying lighting and svbrdf from a single image</article-title>,&#x201d; <source>in Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>2475</fpage>&#x2013;<lpage>2484</lpage>.</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201C;<article-title>Multi-view inverse rendering for large-scale real-world indoor scenes</article-title>,&#x201D; in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>12499</fpage>&#x2013;<lpage>12509</lpage>.</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Vijaykumar</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Spidr: Sdf-based neural point fields for illumination and deformation</article-title>. <source>arXiv Preprint arXiv:2210.08398</source>.</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marques</surname>
<given-names>B. A. D.</given-names>
</name>
<name>
<surname>Clua</surname>
<given-names>E. W. G.</given-names>
</name>
<name>
<surname>Vasconcelos</surname>
<given-names>C. N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deep spherical harmonics light probe estimator for mixed reality games</article-title>. <source>Comput. and Graph.</source> <volume>76</volume>, <fpage>96</fpage>&#x2013;<lpage>106</lpage>. <pub-id pub-id-type="doi">10.1016/j.cag.2018.09.003</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Milgram</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kishino</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>A taxonomy of mixed reality visual displays</article-title>. <source>IEICE Trans. Inf. Syst.</source> <volume>77</volume>, <fpage>1321</fpage>&#x2013;<lpage>1329</lpage>.</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Munkberg</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hasselgren</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Evans</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). &#x201c;<article-title>Extracting triangular 3d models, materials, and lighting from images</article-title>,&#x201D; in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>8280</fpage>&#x2013;<lpage>8290</lpage>.</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Nasr Eddine</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Junjun</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Geospatial data holographic rendering using windows mixed reality</article-title>,&#x201d; in <conf-name>E-Learning and Games: 12th International Conference, Edutainment 2018</conf-name>, <conf-loc>Xi&#x2019;an, China</conf-loc>, <conf-date>June 28&#x2013;30, 2018</conf-date> (<publisher-name>Springer</publisher-name>), <fpage>21</fpage>&#x2013;<lpage>25</lpage>.</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ohta</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Mixed reality: merging real and virtual worlds</article-title>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Patel</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Panchotiya</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Patel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Budharani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ribadiya</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A survey: virtual, augmented and mixed reality in education</article-title>. <source>Int. J. Eng. Res. and Technol. (IJERT)</source> <volume>9</volume> (<issue>5</issue>), <fpage>1067</fpage>&#x2013;<lpage>1072</lpage>. <pub-id pub-id-type="doi">10.17577/IJERTV9IS050652</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Petikam</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chalmers</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rhee</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Visual perception of real world depth map resolution for mixed reality rendering</article-title>,&#x201d; in <conf-name>2018 IEEE Conference on Virtual Reality and 3D User Interfaces (VR)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>401</fpage>&#x2013;<lpage>408</lpage>.</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Potemin</surname>
<given-names>I. S.</given-names>
</name>
<name>
<surname>Zhdanov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bogdanov</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Zhdanov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Livshits</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Analysis of the visual perception conflicts in designing mixed reality systems</article-title>. <source>
<italic>Opt. Des. and Test. VIII</italic> (SPIE)</source> <volume>10815</volume>, <fpage>181</fpage>&#x2013;<lpage>194</lpage>. <pub-id pub-id-type="doi">10.1117/12.2503397</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rokhsaritalemi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sadeghi-Niaraki</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>S.-M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A review on mixed reality: current trends, challenges and prospects</article-title>. <source>Appl. Sci.</source> <volume>10</volume>, <fpage>636</fpage>. <pub-id pub-id-type="doi">10.3390/app10020636</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Funkhouser</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Neural illumination: lighting prediction for indoor environments</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>6918</fpage>&#x2013;<lpage>6926</lpage>.</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Srinivasan</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Mildenhall</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Tancik</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Barron</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Tucker</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Snavely</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201C;<article-title>Lighthouse: predicting lighting volumes for spatially-coherent illumination</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>8080</fpage>&#x2013;<lpage>8089</lpage>.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>Y.-M.</given-names>
</name>
<name>
<surname>Au</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Lau</surname>
<given-names>H. C.</given-names>
</name>
<name>
<surname>Ho</surname>
<given-names>G. T.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>C.-H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Evaluating the effectiveness of learning design with mixed reality (mr) in higher education</article-title>. <source>Virtual Real.</source> <volume>24</volume>, <fpage>797</fpage>&#x2013;<lpage>807</lpage>. <pub-id pub-id-type="doi">10.1007/s10055-020-00427-9</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van der Veer</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Alsmith</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Longo</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>H. Y.</given-names>
</name>
<name>
<surname>Mohler</surname>
<given-names>B. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Where am i in virtual reality?</article-title> <source>PLoS One</source> <volume>13</volume>, <fpage>e0204358</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0204358</pub-id>
<pub-id pub-id-type="pmid">30304008</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Application of virtual reality technology in video news reporting</article-title>. <source>J. Electr. Syst.</source> <volume>20</volume>, <fpage>160</fpage>&#x2013;<lpage>166</lpage>.</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Luximon</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Exploring factors influencing visual realism in augmented reality user experience</article-title>,&#x201d; in <conf-name>International Conference on Human-Computer Interaction</conf-name> (<publisher-name>Springer</publisher-name>), <fpage>169</fpage>&#x2013;<lpage>182</lpage>.</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Yariv</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kasten</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lipman</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Volume rendering of neural implicit surfaces</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>34</volume>, <fpage>4805</fpage>&#x2013;<lpage>4815</lpage>.</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhan</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). &#x201C;<article-title>Emlight: lighting estimation via spherical distribution approximation</article-title>,&#x201d; in <source>Proceedings of the AAAI Conference on Artificial Intelligence</source>, <comment>Vol. 35, No. 4</comment>, <fpage>3287</fpage>&#x2013;<lpage>3295</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v35i4.16440</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Luan</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Bala</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Snavely</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201C;<article-title>Physg: inverse rendering with spherical gaussians for physics-based material editing and relighting</article-title>,&#x201D; in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>5453</fpage>&#x2013;<lpage>5462</lpage>.</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <source>Modeling indirect illumination for inverse rendering</source>. <publisher-name>CVPR</publisher-name>.</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). &#x201c;<article-title>Directional factorization for 2d gaussian splatting</article-title>,&#x201d; in <conf-name>Proceedings of the Computer Vision and Pattern Recognition Conference</conf-name>, <fpage>26483</fpage>&#x2013;<lpage>26492</lpage>.</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhdanov</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Zhdanov</surname>
<given-names>D. D.</given-names>
</name>
<name>
<surname>Bogdanov</surname>
<given-names>N. N.</given-names>
</name>
<name>
<surname>Potemin</surname>
<given-names>I. S.</given-names>
</name>
<name>
<surname>Galaktionov</surname>
<given-names>V. A.</given-names>
</name>
<name>
<surname>Sorokin</surname>
<given-names>M. I.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Discomfort of visual perception in virtual and mixed reality systems</article-title>. <source>Program. Comput. Softw.</source> <volume>45</volume>, <fpage>147</fpage>&#x2013;<lpage>155</lpage>. <pub-id pub-id-type="doi">10.1134/s036176881904011x</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Web-based mixed reality video fusion with remote rendering</article-title>. <source>Virtual Real. and Intelligent Hardw.</source> <volume>5</volume>, <fpage>188</fpage>&#x2013;<lpage>199</lpage>. <pub-id pub-id-type="doi">10.1016/j.vrih.2022.03.005</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3032222/overview">Koen Smit</ext-link>, HU University of Applied Sciences Utrecht, Netherlands</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2308613/overview">Philippine Waisvisz</ext-link>, HU University of Applied Sciences Utrecht, Netherlands</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3287891/overview">Stan Van Ginkel</ext-link>, HU University of Applied Sciences Utrecht, Netherlands</p>
</fn>
</fn-group>
</back>
</article>