<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Neurorobot.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Neurorobotics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Neurorobot.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1662-5218</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fnbot.2026.1760494</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Multimodal sequence dynamics and convergence optimization in dual-stream LSTM networks for complex physiological state estimation</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Cao</surname>
<given-names>Xiaoxiao</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3304540"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Department of Public Physical Education, China Academy of Art</institution>, <city>Hangzhou</city>, <state>Zhejiang</state>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Xiaoxiao Cao, <email xlink:href="mailto:caoxiaoxiao17@outlook.com">caoxiaoxiao17@outlook.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-06">
<day>06</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>20</volume>
<elocation-id>1760494</elocation-id>
<history>
<date date-type="received">
<day>04</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>07</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Cao.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Cao</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-06">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>The integration of virtual simulation with intelligent modeling is crucial for advancing the scientization and personalization of volleyball physical training. This study aims to overcome the convergence instability and feature misalignment in modeling multimodal kinematic and physiological sequences.</p>
</sec>
<sec>
<title>Methods</title>
<p>A dynamical framework based on a Dual-Stream Long Short-Term Memory network integrated with a temporal attention mechanism is proposed. The framework decouples heterogeneous feature learning and optimizes temporal weight distribution.</p>
</sec>
<sec>
<title>Results</title>
<p>Experimental validation on complex motion state estimation demonstrates that the proposed model reduces load modeling error to 3.8% and achieves a motion classification accuracy of 93.1%. The velocity trajectory fitting coefficient of determination is 0.91 with a peak deviation of 0.05 m/s.</p>
</sec>
<sec>
<title>Discussion</title>
<p>These results confirm the effectiveness of the attention-based DS-LSTM in optimizing multimodal sequence modeling for training state estimation and feedback.</p>
</sec>
</abstract>
<kwd-group>
<kwd>attention mechanism</kwd>
<kwd>convergence analysis</kwd>
<kwd>multimodal dynamics</kwd>
<kwd>recurrent neural networks</kwd>
<kwd>sequence modeling</kwd>
<kwd>state estimation</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare that no financial support was received for the research and/or publication of this article.</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="7"/>
<equation-count count="21"/>
<ref-count count="34"/>
<page-count count="15"/>
<word-count count="10203"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>As a sport that combines both competitive and entertaining elements, volleyball places high demands on athletes&#x2019; physical fitness (<xref ref-type="bibr" rid="ref1">Albaladejo-Saura et al., 2023</xref>; <xref ref-type="bibr" rid="ref15">Lin et al., 2024</xref>; <xref ref-type="bibr" rid="ref17">Pawlik and Mroczek, 2023</xref>). Physical training not only determines an athlete&#x2019;s explosive power, endurance, and agility during competition but also plays a crucial role in injury prevention and extending their athletic career (<xref ref-type="bibr" rid="ref7">Esposito et al., 2024</xref>; <xref ref-type="bibr" rid="ref19">Rebelo et al., 2022</xref>; <xref ref-type="bibr" rid="ref26">Tan et al., 2023</xref>). In recent years, the development of virtual simulation technology has brought new opportunities to sports training. Virtual reality and augmented reality platforms, combined with sensor acquisition and intelligent analysis, can provide athletes with immersive training environments and data-driven feedback (<xref ref-type="bibr" rid="ref14">Li et al., 2024</xref>; <xref ref-type="bibr" rid="ref23">Sousa et al., 2023</xref>). Research on how to combine virtual simulation with intelligent modeling for volleyball physical training not only promotes innovation in training models but also has practical value in improving the scientific nature and personalization of training (<xref ref-type="bibr" rid="ref6">Duan, 2021</xref>; <xref ref-type="bibr" rid="ref34">Yunchao et al., 2023</xref>).</p>
<p>Current research on volleyball physical training faces three key challenges. First, motion capture and muscle load monitoring data often exhibit high dimensionality and temporal dependencies, making it difficult for traditional statistical methods to accurately characterize their dynamic patterns (<xref ref-type="bibr" rid="ref3">Amin et al., 2023</xref>; <xref ref-type="bibr" rid="ref5">Bae et al., 2024</xref>; <xref ref-type="bibr" rid="ref25">Suo et al., 2024</xref>). Second, the data types collected by multimodal sensors vary widely, and asynchrony and noise exist between posture, muscle load, and velocity curves, impacting data fusion and modeling accuracy (<xref ref-type="bibr" rid="ref8">Hafer et al., 2023</xref>; <xref ref-type="bibr" rid="ref28">Tsanousa et al., 2022</xref>). Third, existing training feedback mechanisms largely rely on empirical judgment and lack real-time prediction and interpretable feedback based on neural network models, making personalized training optimization difficult to implement (<xref ref-type="bibr" rid="ref10">Hribernik et al., 2022</xref>; <xref ref-type="bibr" rid="ref30">Vec et al., 2024</xref>). These difficulties have hindered the practical application of virtual simulation technology in volleyball physical training.</p>
<p>To address these challenges, various technical approaches have been proposed (<xref ref-type="bibr" rid="ref18">Putranto et al., 2023</xref>; <xref ref-type="bibr" rid="ref20">Richlan et al., 2023</xref>). Convolutional neural networks have been used for motion recognition, but their structure is biased towards static feature extraction, making it difficult to capture the dynamic evolution of long time series (<xref ref-type="bibr" rid="ref11">Huang and Cai, 2023</xref>; <xref ref-type="bibr" rid="ref12">Le et al., 2022</xref>). Recurrent neural networks have demonstrated effectiveness in movement classification tasks (<xref ref-type="bibr" rid="ref4">Amin and Noh, 2024</xref>), and possess certain advantages in time series modeling. However, they are susceptible to vanishing gradients in multimodal data fusion, resulting in insufficient convergence speed and stability (<xref ref-type="bibr" rid="ref24">Su et al., 2025</xref>; <xref ref-type="bibr" rid="ref33">Waqas and Humphries, 2024</xref>). Multi-sensor fusion methods have attempted to use weighted averaging and Kalman filtering, but their ability to fit complex nonlinear relationships is limited, making it difficult to accurately reflect the coupling effects between different training features (<xref ref-type="bibr" rid="ref27">Tang et al., 2023</xref>; <xref ref-type="bibr" rid="ref29">Urrea and Agramonte, 2021</xref>). Attention mechanisms have been introduced in some studies to highlight key features, but they are mostly used in single-modal scenarios and lack cross-modal interaction (<xref ref-type="bibr" rid="ref16">Lu et al., 2023</xref>; <xref ref-type="bibr" rid="ref31">Wang and Liang, 2023</xref>). Overall, these methods still have shortcomings in accurately modeling multimodal time series data and optimizing personalized training, and have not yet addressed the need for scientific and real-time feedback in volleyball physical training (<xref ref-type="bibr" rid="ref21">Salim et al., 2024</xref>; <xref ref-type="bibr" rid="ref32">Wang et al., 2022</xref>).</p>
<p>To address the shortcomings of traditional methods in multimodal time-series data modeling and training feedback, this paper proposes a dynamic modeling approach based on DS-LSTM and a temporal attention mechanism. This framework uniquely applies temporal attention after separate kinematic and physiological temporal encoding, rather than before fusion. This structured decoupling and reweighting of modalities enhances convergence stability and mitigates temporal misalignment. This approach first synchronizes and suppresses the temporal noise of posture, electromyography, and acceleration data. Then, in a dual-stream architecture, kinematic and physiological features are fed into recursive units to capture their internal dynamic patterns. Interactive feature fusion is then used to integrate cross-modal information. Furthermore, a temporal attention mechanism is introduced to automatically identify key performance-affecting periods during training and weight these features, ultimately outputting training status predictions and optimization recommendations. This approach enables dynamic modeling and intelligent feedback for volleyball physical training in a virtual simulation environment, enhancing the interpretability and convergence stability of training data and providing athletes with a personalized, scientific training path.</p>
</sec>
<sec sec-type="methods" id="sec2">
<label>2</label>
<title>Methods</title>
<sec id="sec3">
<label>2.1</label>
<title>Data collection and preprocessing</title>
<p>In virtual simulation experiments of volleyball physical training, the acquisition of multimodal time-series data is a prerequisite for dynamic modeling. This paper constructs a dataset based on motion capture sensors, myoelectric sensors, and accelerometers. Motion capture sensors output the athlete&#x2019;s joint positions and angles in three-dimensional space, reflecting the temporal changes in posture and movement. Myoelectric sensors record the electrophysiological signals of major muscle groups during training, revealing the muscle load under high-intensity exercise. Accelerometers measure the velocity and acceleration curves of training movements to characterize the explosive power and rhythmic characteristics of the movement. Due to differences in sampling rate, data dimension, and duration settings among the three sensor types, the raw data must be standardized. The data acquisition parameters are shown in <xref ref-type="table" rid="tab1">Table 1</xref>.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Data acquisition parameters.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Sensor type</th>
<th align="left" valign="top">Collected features</th>
<th align="center" valign="top">Sampling rate (Hz)</th>
<th align="center" valign="top">Data dimension</th>
<th align="center" valign="top">Duration per trial (s)</th>
<th align="center" valign="top">Output format</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Motion Capture</td>
<td align="left" valign="middle">Joint positions, angles</td>
<td align="center" valign="middle">120</td>
<td align="center" valign="middle">18</td>
<td align="center" valign="middle">30</td>
<td align="left" valign="middle">3D coordinate matrix</td>
</tr>
<tr>
<td align="left" valign="middle">EMG Sensor</td>
<td align="left" valign="middle">Muscle electrical signals</td>
<td align="center" valign="middle">1,000</td>
<td align="center" valign="middle">8</td>
<td align="center" valign="middle">30</td>
<td align="left" valign="middle">Voltage time series</td>
</tr>
<tr>
<td align="left" valign="middle">Accelerometer</td>
<td align="left" valign="middle">Velocity, acceleration</td>
<td align="center" valign="middle">200</td>
<td align="center" valign="middle">6</td>
<td align="center" valign="middle">30</td>
<td align="left" valign="middle">Vector time series</td>
</tr>
<tr>
<td align="left" valign="middle">Gyroscope</td>
<td align="left" valign="middle">Angular velocity</td>
<td align="center" valign="middle">200</td>
<td align="center" valign="middle">3</td>
<td align="center" valign="middle">30</td>
<td align="left" valign="middle">Three-axis sequence</td>
</tr>
<tr>
<td align="left" valign="middle">Force Sensor</td>
<td align="left" valign="middle">Ground reaction force</td>
<td align="center" valign="middle">500</td>
<td align="center" valign="middle">3</td>
<td align="center" valign="middle">30</td>
<td align="left" valign="middle">Force time series</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The raw data will be affected by noise and drift during the acquisition process, and directly inputting the model will cause convergence difficulties. To this end, targeted preprocessing methods are used for different signals: the electromyographic signal is filtered by a Butterworth bandpass filter to remove low-frequency drift and high-frequency interference, the motion capture and acceleration signals are filtered by sliding average to eliminate jitter, and the mechanical data are filtered by low-pass filtering to reduce high-frequency noise. All signals are then normalized to eliminate dimensional differences so that data from different modes are comparable within the same range. Normalization uses the standard scaler method, as shown in <xref ref-type="disp-formula" rid="E1">Equation 1</xref> (<xref ref-type="bibr" rid="ref22">Sinsomboonthong, 2022</xref>).</p>
<disp-formula id="E1">
<mml:math id="M1">
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x03BC;</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mi>&#x03C3;</mml:mi>
</mml:mfrac>
</mml:math>
<label>(1)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E1">Equation 1</xref>, <inline-formula>
<mml:math id="M2">
<mml:mi>x</mml:mi>
</mml:math>
</inline-formula> is the original data value, <inline-formula>
<mml:math id="M3">
<mml:mi>&#x03BC;</mml:mi>
</mml:math>
</inline-formula> represents the mean of the feature dimension, <inline-formula>
<mml:math id="M4">
<mml:mi>&#x03C3;</mml:mi>
</mml:math>
</inline-formula> represents its standard deviation, and <inline-formula>
<mml:math id="M5">
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:math>
</inline-formula> is the normalized output. This process centers the data and scales it based on variance, which is more robust for handling potential out-of-range data during model application.</p>
<p>Because the sampling rates of various sensors vary, timing alignment is necessary on a unified time axis. This paper uses the electromyographic signal with the highest sampling rate as the time reference and employs linear interpolation to interpolate the lower-sampling-rate signals, synchronizing all data at the same time step. To verify the effectiveness of preprocessing, a comparative analysis of signal quality metrics before and after filtering and alignment was performed, including signal-to-noise ratio, timing alignment error, and stability index. The results are shown in <xref ref-type="table" rid="tab2">Table 2</xref>.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Comparison of data preprocessing effects.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Data type</th>
<th align="center" valign="top">SNR before filtering (dB)</th>
<th align="center" valign="top">SNR after filtering (dB)</th>
<th align="center" valign="top">Alignment error (ms)</th>
<th align="center" valign="top">Stability index (0&#x2013;1)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Motion Capture</td>
<td align="char" valign="middle" char=".">18.2</td>
<td align="char" valign="middle" char=".">28.7</td>
<td align="char" valign="middle" char=".">4.6</td>
<td align="char" valign="middle" char=".">0.82</td>
</tr>
<tr>
<td align="left" valign="middle">EMG Data</td>
<td align="char" valign="middle" char=".">12.5</td>
<td align="char" valign="middle" char=".">24.3</td>
<td align="char" valign="middle" char=".">3.2</td>
<td align="char" valign="middle" char=".">0.88</td>
</tr>
<tr>
<td align="left" valign="middle">Velocity Data</td>
<td align="char" valign="middle" char=".">15.7</td>
<td align="char" valign="middle" char=".">26.8</td>
<td align="char" valign="middle" char=".">5.1</td>
<td align="char" valign="middle" char=".">0.85</td>
</tr>
<tr>
<td align="left" valign="middle">Angular Velocity</td>
<td align="char" valign="middle" char=".">14.3</td>
<td align="char" valign="middle" char=".">25.6</td>
<td align="char" valign="middle" char=".">4.9</td>
<td align="char" valign="middle" char=".">0.81</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec4">
<label>2.2</label>
<title>DS-LSTM structure modeling</title>
<p>In the dynamic modeling of multimodal time series data, kinematic and physiological signals differ significantly. Previous dual-stream LSTM models commonly perform early or mid-level fusion without explicit temporal reweighting, which limits their ability to suppress redundant segments in long training sequences. Attention-based recurrent architectures reported in related studies typically operate on concatenated multimodal representations, leading to interference between heterogeneous dynamics during attention computation. By contrast, the proposed DS-LSTM with temporal attention performs attention weighting only after independent temporal encoding, ensuring that the attention distribution reflects modality-consistent temporal patterns rather than mixed feature responses. Posture capture sequences are characterized by high dimensionality, strong temporal dependencies, and dynamic continuity, while electromyographic and acceleration signals reflect the changing patterns of muscle load and exercise intensity. To avoid the coupling interference of single-stream networks when processing cross-modal features, this paper employs a DS-LSTM architecture to perform temporal modeling of kinematic and physiological features in independent branches, and then implements interactive fusion of cross-modal features at a high level.</p>
<p>The kinematic flow takes the coordinate sequence of the posture key points as input and recursively updates the hidden state through the LSTM unit <inline-formula>
<mml:math id="M6">
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>&#x2026;</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula> expanded in time steps to capture the dynamic dependency between spatial displacement and joint motion. Let the input sequence be, where <inline-formula>
<mml:math id="M7">
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> represents <inline-formula>
<mml:math id="M8">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula> the key point feature vector at the moment, and the recursive process of LSTM can be expressed as <xref ref-type="bibr" rid="ref13">Lees et al. (2021)</xref>:</p>
<disp-formula id="E2">
<mml:math id="M9">
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mtext>LSTM</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(2)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E2">Equation 2</xref>, <inline-formula>
<mml:math id="M10">
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> represents the hidden state of <inline-formula>
<mml:math id="M11">
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> the kinematic flow at time t, <inline-formula>
<mml:math id="M12">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula> is the unit state, and the recursive function includes nonlinear updates of the input gate, forget gate, and output gate. The output sequence of this branch <inline-formula>
<mml:math id="M13">
<mml:mo stretchy="true">{</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>&#x2026;</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula> describes the dynamic structure of the action sequence.</p>
<p>The physiological flow takes the fusion vector of myoelectric and acceleration signals as input and focuses on the activation intensity of muscle groups and the load changes during exercise. Assuming the input sequence is <inline-formula>
<mml:math id="M14">
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>p</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo>&#x2026;</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula>, its recursive process is consistent with <xref ref-type="disp-formula" rid="E2">Equation 2</xref>, but the hidden state and unit state are recorded as and, respectively, <inline-formula>
<mml:math id="M15">
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>p</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula>. <inline-formula>
<mml:math id="M16">
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>p</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> This branch is formed through feature modeling of time series expansion <inline-formula>
<mml:math id="M17">
<mml:mo stretchy="true">{</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo>&#x2026;</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula> and is used to characterize the dynamic changes at the physiological level of exercise.</p>
<p>In the dual-stream architecture, the kinematic and physiological streams maintain the integrity of temporal feature representation within independent recursive units, thus avoiding feature aliasing in early stages. High-level feature fusion will subsequently incorporate attention mechanisms and fully connected mapping to achieve cross-modal interaction modeling and prediction optimization. The overall framework of the dual-stream architecture is shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>The proposed dual-stream LSTM (DS-LSTM) modeling framework. The architecture decouples kinematic (posture) and physiological (EMG/acceleration) data processing into independent streams. This ensures feature integrity before high-level fusion, allowing the model to capture distinct dynamic patterns for each modality.</p>
</caption>
<graphic xlink:href="fnbot-20-1760494-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart diagram illustrating a multi-stream data processing pipeline for training state prediction and personalized feedback. Pose and physiological data are acquired, preprocessed, modeled using LSTM networks, fused via temporal attention weighting, then used for prediction and feedback generation.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec5">
<label>2.3</label>
<title>Temporal attention mechanism fusion</title>
<p>After the DS-LSTM architecture completes modeling of both kinematic and physiological features, a temporal attention mechanism is introduced to achieve weighted screening of key training segments. Multimodal time series data exhibits significant phase-by-phase differences during training. Features from some time periods contribute significantly to overall training state prediction, while others represent noise or redundant information. To address this issue, the hidden state sequences output by the DS-LSTM are set to be <inline-formula>
<mml:math id="M18">
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo>&#x2026;</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M19">
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mi>p</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>p</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M20">
<mml:mspace width="0.25em"/>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>k</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula>and represent <inline-formula>
<mml:math id="M21">
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>p</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula>the kinematic and physiological flow feature vectors <inline-formula>
<mml:math id="M22">
<mml:mi>T</mml:mi>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>at time t, respectively, <inline-formula>
<mml:math id="M23">
<mml:mi>t</mml:mi>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>and is the total length of the sequence.</p>
<p>The calculation of attention weight is achieved through the interaction between the learnable parameter vector and the feature latent representation, and the weight distribution is defined as <xref ref-type="bibr" rid="ref9">Hern&#x00E1;ndez and Amig&#x00F3; (2021)</xref>:</p>
<disp-formula id="E3">
<mml:math id="M24">
<mml:msub>
<mml:mi>&#x03B1;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo>exp</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:msup>
<mml:mi>u</mml:mi>
<mml:mo>&#x22A4;</mml:mo>
</mml:msup>
<mml:mo>tanh</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>W</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
<mml:mspace width="0.25em"/>
<mml:mo>exp</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:msup>
<mml:mi>u</mml:mi>
<mml:mo>&#x22A4;</mml:mo>
</mml:msup>
<mml:mo>tanh</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>W</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(3)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E3">Equation 3</xref>, <inline-formula>
<mml:math id="M25">
<mml:msub>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> represents the candidate feature vector after stacking and fusion of the dual-stream output, <inline-formula>
<mml:math id="M26">
<mml:mi>W</mml:mi>
</mml:math>
</inline-formula>and <inline-formula>
<mml:math id="M27">
<mml:mi>b</mml:mi>
</mml:math>
</inline-formula>is the trainable linear transformation parameter, and <inline-formula>
<mml:math id="M28">
<mml:mi>u</mml:mi>
</mml:math>
</inline-formula> is the global context vector. In <xref ref-type="disp-formula" rid="E3">Equation 3</xref>, the numerator is used to measure the moment <inline-formula>
<mml:math id="M29">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula>. The denominator ensures that the weights are normalized at all times to satisfy the constraints of the probability distribution.</p>
<p>The weights obtained <inline-formula>
<mml:math id="M30">
<mml:msub>
<mml:mi>&#x03B1;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> based on <xref ref-type="disp-formula" rid="E3">Equation 3</xref> are used to weight the feature sequence to obtain the context representation after time series fusion:</p>
<disp-formula id="E4">
<mml:math id="M31">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
<mml:mspace width="0.25em"/>
<mml:msub>
<mml:mi>&#x03B1;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(4)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E4">Equation 4</xref>, <inline-formula>
<mml:math id="M32">
<mml:mi>c</mml:mi>
</mml:math>
</inline-formula> the temporal context vector integrates the dynamic contributions of key segments across modalities, eliminating interference from redundant features. In this process, training phases corresponding to moments with larger weights have a higher proportion in the final representation, ensuring that the model prioritizes high-value segments during prediction and feedback.</p>
<p>To further improve the effectiveness of cross-modal interaction, this paper introduces a feature fusion operation after the attention layer. The context vectors of kinematic and physiological features filtered by attention are spliced and nonlinearly mapped in the feature space:</p>
<disp-formula id="E5">
<mml:math id="M33">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:mi>z</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>&#x03C3;</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo stretchy="true">[</mml:mo>
<mml:msup>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mi>c</mml:mi>
<mml:mi>p</mml:mi>
</mml:msup>
<mml:mo stretchy="true">]</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(5)</label>
</disp-formula>
<p>Where, <inline-formula>
<mml:math id="M34">
<mml:msup>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
</mml:math>
</inline-formula> and are <inline-formula>
<mml:math id="M35">
<mml:msup>
<mml:mi>c</mml:mi>
<mml:mi>p</mml:mi>
</mml:msup>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>the attention-weighted contextual representations of the kinematic and physiological flows, respectively, represents <inline-formula>
<mml:math id="M36">
<mml:mo stretchy="true">[</mml:mo>
<mml:msup>
<mml:mi>c</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mi>c</mml:mi>
<mml:mi>p</mml:mi>
</mml:msup>
<mml:mo stretchy="true">]</mml:mo>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>the vector concatenation operation, <inline-formula>
<mml:math id="M37">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>and are <inline-formula>
<mml:math id="M38">
<mml:mspace width="0.25em"/>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>the fusion layer parameters, <inline-formula>
<mml:math id="M39">
<mml:mi>&#x03C3;</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00B7;</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>and is a nonlinear activation function. <xref ref-type="disp-formula" rid="E5">Equation 5</xref> implements cross-modal interactive expression, establishing an association between kinematic and physiological features in a shared space, ensuring that the final training state prediction not only relies on information from a single modality, but also integrates the interactive features of multimodal key time series segments.</p>
<p>While ensuring information selectivity, this structure also provides a more compact and discriminative input for the subsequent fully connected prediction layer, effectively improving the accuracy and stability of training optimization feedback. The overall process is shown in <xref ref-type="fig" rid="fig2">Figure 2</xref>.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Schematic diagram of the temporal attention mechanism fusion structure. This layer calculates weight distributions for different time segments to filter key motion phases. It effectively integrates kinematic and physiological context vectors to generate a robust fused feature representation for state prediction.</p>
</caption>
<graphic xlink:href="fnbot-20-1760494-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart diagram showing a deep learning model architecture with four main layers: Input Layer (Kinematic and Physiological LSTM output sequences), Temporal Attention Mechanism Layer, Fusion Layer, and Output Layer generating fused interaction feature representation.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec6">
<label>2.4</label>
<title>Training optimization and feedback generation</title>
<p>The fused time series feature set <inline-formula>
<mml:math id="M40">
<mml:mi>Z</mml:mi>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo stretchy="true">{</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="true">}</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> Through a multi-layer feedforward network and task-specific output heads, the training objectives are mapped into posture prediction <inline-formula>
<mml:math id="M41">
<mml:msup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mtext mathvariant="italic">pose</mml:mtext>
</mml:msup>
</mml:math>
</inline-formula>, electromyography prediction <inline-formula>
<mml:math id="M42">
<mml:msup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi mathvariant="italic">emg</mml:mi>
</mml:msup>
</mml:math>
</inline-formula>, velocity prediction <inline-formula>
<mml:math id="M43">
<mml:msup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi mathvariant="italic">vel</mml:mi>
</mml:msup>
</mml:math>
</inline-formula>, and attention weight sequences <inline-formula>
<mml:math id="M44">
<mml:mi>A</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula>. The training objectives are uniformly expressed as a multi-task joint loss function, which is expressed as shown in <xref ref-type="disp-formula" rid="E6">Equation 6</xref>:</p>
<disp-formula id="E6">
<mml:math id="M45">
<mml:mtable columnalign="left" displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext>total</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext mathvariant="italic">pose</mml:mtext>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">emg</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">vel</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mi mathvariant="italic">att</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">att</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mrow>
<mml:mtext mathvariant="italic">smoot</mml:mtext>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mtext mathvariant="italic">smoot</mml:mtext>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mi mathvariant="italic">reg</mml:mi>
</mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>&#x03B8;</mml:mi>
<mml:msubsup>
<mml:mo>&#x2225;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(6)</label>
</disp-formula>
<p>Among them, <inline-formula>
<mml:math id="M46">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext mathvariant="italic">pose</mml:mtext>
</mml:msub>
</mml:math>
</inline-formula> is the posture prediction error, <inline-formula>
<mml:math id="M47">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">emg</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the electromyography regression error, <inline-formula>
<mml:math id="M48">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">vel</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>is the velocity curve fitting error, <inline-formula>
<mml:math id="M49">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">att</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the attention regularization term, <inline-formula>
<mml:math id="M50">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mtext mathvariant="italic">smoot</mml:mtext>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> is the time series smoothing term, <inline-formula>
<mml:math id="M51">
<mml:msubsup>
<mml:mi>&#x03B8;</mml:mi>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
</inline-formula> is the L2 regularization term of the model parameters; <inline-formula>
<mml:math id="M52">
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the task weight, <inline-formula>
<mml:math id="M53">
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mi mathvariant="italic">att</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mrow>
<mml:mtext mathvariant="italic">smoot</mml:mtext>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mi mathvariant="italic">reg</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the regularization weight, and <inline-formula>
<mml:math id="M54">
<mml:mi>&#x03B8;</mml:mi>
</mml:math>
</inline-formula> is the model parameter vector.</p>
<p>The posture loss is in the form of mean square error, which is defined as <xref ref-type="disp-formula" rid="E7">Equation 7</xref>.</p>
<disp-formula id="E7">
<mml:math id="M55">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext mathvariant="italic">pose</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:munderover>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mtext mathvariant="italic">pose</mml:mtext>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mtext mathvariant="italic">pose</mml:mtext>
</mml:msubsup>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
<label>(7)</label>
</disp-formula>
<p><xref ref-type="disp-formula" rid="E7">Equation 7</xref>, <inline-formula>
<mml:math id="M56">
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the number of joints, <inline-formula>
<mml:math id="M57">
<mml:mi>T</mml:mi>
</mml:math>
</inline-formula> is the number of time steps, <inline-formula>
<mml:math id="M58">
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mtext mathvariant="italic">pose</mml:mtext>
</mml:msubsup>
</mml:math>
</inline-formula> is the model&#x2019;s <inline-formula>
<mml:math id="M59">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula> Step<inline-formula>
<mml:math id="M60">
<mml:mspace width="0.25em"/>
<mml:mi>j</mml:mi>
</mml:math>
</inline-formula>. The prediction of each joint <inline-formula>
<mml:math id="M61">
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mtext mathvariant="italic">pose</mml:mtext>
</mml:msubsup>
</mml:math>
</inline-formula> is the corresponding annotation.</p>
<p>The EMG regression loss is defined as <xref ref-type="disp-formula" rid="E8">Equation 8</xref>.</p>
<disp-formula id="E8">
<mml:math id="M62">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">emg</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:munderover>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mi mathvariant="italic">emg</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mi mathvariant="italic">emg</mml:mi>
</mml:msubsup>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
<label>(8)</label>
</disp-formula>
<p><xref ref-type="disp-formula" rid="E8">Equation 8</xref>, <inline-formula>
<mml:math id="M63">
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the number of myoelectric channels, <inline-formula>
<mml:math id="M64">
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mi mathvariant="italic">emg</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> is the <inline-formula>
<mml:math id="M65">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula> Step <inline-formula>
<mml:math id="M66">
<mml:mi>m</mml:mi>
</mml:math>
</inline-formula>. The prediction of the channel <inline-formula>
<mml:math id="M67">
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mi mathvariant="italic">emg</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> is the labeled value.</p>
<p>The velocity fitting loss is defined as <xref ref-type="disp-formula" rid="E9">Equation 9</xref>:</p>
<disp-formula id="E9">
<mml:math id="M68">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">vel</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi mathvariant="italic">DT</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
<mml:mi mathvariant="italic">vel</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi mathvariant="italic">vel</mml:mi>
</mml:msubsup>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
<label>(9)</label>
</disp-formula>
<p><xref ref-type="disp-formula" rid="E9">Equation 9</xref>, <inline-formula>
<mml:math id="M69">
<mml:mi>D</mml:mi>
</mml:math>
</inline-formula> is the velocity vector dimension, <inline-formula>
<mml:math id="M70">
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
<mml:mi mathvariant="italic">vel</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> is the model for the <inline-formula>
<mml:math id="M71">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula>. The prediction of <inline-formula>
<mml:math id="M72">
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi mathvariant="italic">vel</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> the step speed is the true speed vector.</p>
<p>The attention regularization uses entropy terms to constrain the sparsity and interpretability of attention distribution, which is defined as follows:</p>
<disp-formula id="E10">
<mml:math id="M73">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">att</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>H</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:munderover>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>log</mml:mo>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi>&#x03B5;</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(10)</label>
</disp-formula>
<p>In <inline-formula>
<mml:math id="M74">
<mml:mi>H</mml:mi>
</mml:math>
</inline-formula> <xref ref-type="disp-formula" rid="E10">Equation 10</xref>, is the number of attention heads or channels, <inline-formula>
<mml:math id="M75">
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> is the normalized attention weight (satisfies for each head <inline-formula>
<mml:math id="M76">
<mml:munder>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mi>t</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula>), <inline-formula>
<mml:math id="M77">
<mml:mi>&#x03B5;</mml:mi>
</mml:math>
</inline-formula> and is a numerical stabilization term to prevent logarithmic divergence.</p>
<p>In order to ensure the smoothness of the forecast time series, the first-order difference smoothing term is introduced, which is defined as follows:</p>
<disp-formula id="E11">
<mml:math id="M78">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mtext mathvariant="italic">smoot</mml:mtext>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">&#x2016;</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
<label>(11)</label>
</disp-formula>
<p><xref ref-type="disp-formula" rid="E11">Equation 11</xref> where <inline-formula>
<mml:math id="M79">
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the joint prediction vector obtained by splicing on each task, which is used to measure the continuity of predictions at adjacent moments.</p>
<p>In the mini-batch training framework, the batch loss is the average of samples within the batch, expressed as <xref ref-type="disp-formula" rid="E12">Equation 12</xref>:</p>
<disp-formula id="E12">
<mml:math id="M80">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mtext mathvariant="italic">batc</mml:mtext>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>B</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>B</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext>total</mml:mtext>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(12)</label>
</disp-formula>
<p><xref ref-type="disp-formula" rid="E12">Equation 12</xref>, <inline-formula>
<mml:math id="M81">
<mml:mi>B</mml:mi>
</mml:math>
</inline-formula> is the batch size, <inline-formula>
<mml:math id="M82">
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the multimodal sequence and label of the <inline-formula>
<mml:math id="M83">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula>-th sample in the batch.</p>
<p>Backpropagation uses truncated backpropagation (BPTT) to expand to length in the time dimension <inline-formula>
<mml:math id="M84">
<mml:mi>&#x03C4;</mml:mi>
</mml:math>
</inline-formula>. To suppress gradient explosion, the norm of the gradient is clipped. The clipping rule is as shown in <xref ref-type="disp-formula" rid="E13">Equation 13</xref>:</p>
<disp-formula id="E13">
<mml:math id="M85">
<mml:mover accent="true">
<mml:mi>g</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mi>g</mml:mi>
<mml:mo>&#x00B7;</mml:mo>
<mml:mo>min</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(13)</label>
</disp-formula>
<p><xref ref-type="disp-formula" rid="E13">Equation 13</xref>, <inline-formula>
<mml:math id="M86">
<mml:mi>g</mml:mi>
</mml:math>
</inline-formula> is the unclipped gradient vector, <inline-formula>
<mml:math id="M87">
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> is its two-norm, <inline-formula>
<mml:math id="M88">
<mml:mi>C</mml:mi>
</mml:math>
</inline-formula> is the clipping threshold, and <inline-formula>
<mml:math id="M89">
<mml:mover accent="true">
<mml:mi>g</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> is the clipped gradient.</p>
<p>The parameter update uses the Adam optimizer with bias correction, and the update process is based on <xref ref-type="disp-formula" rid="E14">Equation 14</xref>:</p>
<disp-formula id="E14">
<mml:math id="M90">
<mml:mtable columnalign="right" displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mover accent="true">
<mml:mi>m</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mspace width="0.25em"/>
<mml:msub>
<mml:mover accent="true">
<mml:mi>v</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>&#x03B8;</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x03B8;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x03B7;</mml:mi>
<mml:mfrac>
<mml:msub>
<mml:mover accent="true">
<mml:mi>m</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mrow>
<mml:msqrt>
<mml:msub>
<mml:mover accent="true">
<mml:mi>v</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msqrt>
<mml:mo>+</mml:mo>
<mml:mi>&#x03B5;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(14)</label>
</disp-formula>
<p><xref ref-type="disp-formula" rid="E14">Equation 14</xref>, <inline-formula>
<mml:math id="M91">
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the clipped gradient, <inline-formula>
<mml:math id="M92">
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M93">
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> are the first-order and second-order moment estimates respectively, <inline-formula>
<mml:math id="M94">
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:math>
</inline-formula> is the momentum coefficient, <inline-formula>
<mml:math id="M95">
<mml:msub>
<mml:mover accent="true">
<mml:mi>m</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>v</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the bias-corrected estimate, <inline-formula>
<mml:math id="M96">
<mml:mi>&#x03B7;</mml:mi>
</mml:math>
</inline-formula> is the learning rate, <inline-formula>
<mml:math id="M97">
<mml:mi>&#x03B5;</mml:mi>
</mml:math>
</inline-formula> is the numerical stability term.</p>
<p>The cosine annealing learning rate scheduling is used, and <inline-formula>
<mml:math id="M98">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula> the learning rate during the training step is calculated according to <xref ref-type="disp-formula" rid="E15">Equation 15</xref>:</p>
<disp-formula id="E15">
<mml:math id="M99">
<mml:msub>
<mml:mi>&#x03B7;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x03B7;</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>&#x03B7;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x03B7;</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>+</mml:mo>
<mml:mo>cos</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="italic">&#x03C0;t</mml:mi>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(15)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E15">Equation 15</xref>, <inline-formula>
<mml:math id="M100">
<mml:msub>
<mml:mi>&#x03B7;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:math>
</inline-formula> is the initial learning rate, <inline-formula>
<mml:math id="M101">
<mml:msub>
<mml:mi>&#x03B7;</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the minimum learning rate, <inline-formula>
<mml:math id="M102">
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> and is the maximum number of training steps; to be consistent with the convergence observation, take <inline-formula>
<mml:math id="M103">
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>500</mml:mn>
</mml:math>
</inline-formula>.</p>
<p>When the validation set loss does not decrease in consecutive <inline-formula>
<mml:math id="M104">
<mml:mi>p</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>20</mml:mn>
</mml:math>
</inline-formula> epochs, the training is terminated and the model parameters are rolled back to the lowest point of the validation set to retain the optimal performance.</p>
<p>The fused features are fed into the fully connected layer to generate personalized training feedback. Feedback generation forms a closed loop with four steps: feature mapping, deviation measurement, intensity mapping, and output mapping. The feedback feature vector is given by <xref ref-type="disp-formula" rid="E16">Equation 16</xref>:</p>
<disp-formula id="E16">
<mml:math id="M105">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x03C3;</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(16)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E16">Equation 16</xref>, <inline-formula>
<mml:math id="M106">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the size <inline-formula>
<mml:math id="M107">
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo>&#x00D7;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>. The weight matrix is <inline-formula>
<mml:math id="M108">
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>, the bias vector is <inline-formula>
<mml:math id="M109">
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>, the fusion feature dimension is <inline-formula>
<mml:math id="M110">
<mml:mi>&#x03C3;</mml:mi>
</mml:math>
</inline-formula>, the component-by-component Sigmoid activation function is used to limit the output to <inline-formula>
<mml:math id="M111">
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> the interval, and the feedback feature vector is <inline-formula>
<mml:math id="M112">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>.</p>
<p>The deviation metric uses the Euclidean deviation from the target baseline and is normalized within the window, defined as follows:</p>
<disp-formula id="E17">
<mml:math id="M113">
<mml:msub>
<mml:mi>&#x0394;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi mathvariant="italic">ref</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mspace width="0.25em"/>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msub>
<mml:mi>&#x0394;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="true">&#x2016;</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:munder>
<mml:mo>max</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>'</mml:mo>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:msub>
<mml:mi>&#x0394;</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>'</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">&#x2016;</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi>&#x03B4;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(17)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E17">Equation 17</xref>, <inline-formula>
<mml:math id="M114">
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi mathvariant="italic">ref</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> is the target baseline vector, <inline-formula>
<mml:math id="M115">
<mml:msub>
<mml:mi>&#x0394;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the deviation vector, <inline-formula>
<mml:math id="M116">
<mml:msub>
<mml:mrow>
<mml:mo stretchy="true">&#x2016;</mml:mo>
<mml:mo>&#x22C5;</mml:mo>
<mml:mo stretchy="true">&#x2016;</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:math>
</inline-formula>. is the Euclidean norm, and the window <inline-formula>
<mml:math id="M117">
<mml:mi>W</mml:mi>
</mml:math>
</inline-formula>. Used to calculate the normalized reference value, <inline-formula>
<mml:math id="M118">
<mml:mi>&#x03B4;</mml:mi>
</mml:math>
</inline-formula> a small constant <inline-formula>
<mml:math id="M119">
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> to prevent division by zero, and a normalized deviation scalar.</p>
<p>The deviation scalar is nonlinearly mapped to obtain the feedback strength <inline-formula>
<mml:math id="M120">
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>, and the mapping is performed according to <xref ref-type="disp-formula" rid="E18">Equation 18</xref>.</p>
<disp-formula id="E18">
<mml:math id="M121">
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>sigmoid</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>&#x03B3;</mml:mi>
<mml:mspace width="0.1em"/>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(18)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E18">Equation 18</xref>, <inline-formula>
<mml:math id="M122">
<mml:mi>&#x03B3;</mml:mi>
</mml:math>
</inline-formula> is the gain coefficient used to adjust the sensitivity of the deviation to intensity, <inline-formula>
<mml:math id="M123">
<mml:mtext>sigmoid</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00B7;</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> is the standard S-shaped mapping, <inline-formula>
<mml:math id="M124">
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> and is the scalar intensity.</p>
<p>The continuous control vector actually sent to the virtual simulation/teaching interface is obtained by linear mapping and clipping after splicing the feedback characteristics and intensity, and is expressed as follows:</p>
<disp-formula id="E19">
<mml:math id="M125">
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>clip</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:mo stretchy="true">[</mml:mo>
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mo stretchy="true">[</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">pt</mml:mi>
<mml:mo stretchy="true">]</mml:mo>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo stretchy="true">]</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(19)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E19">Equation 19</xref>, <inline-formula>
<mml:math id="M126">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the output mapping matrix, <inline-formula>
<mml:math id="M127">
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>u</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the bias, <inline-formula>
<mml:math id="M128">
<mml:mtext mathvariant="italic">clip</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00B7;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="italic">umin</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext mathvariant="italic">umax</mml:mtext>
<mml:mo stretchy="true">)</mml:mo>
<mml:mspace width="0.25em"/>
<mml:mtext>clip</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>&#x00B7;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>represents the component&#x2014;<inline-formula>
<mml:math id="M129">
<mml:mo stretchy="true">[</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
<mml:mo stretchy="true">]</mml:mo>
</mml:math>
</inline-formula> by-component clipping of the vector components by interval, <inline-formula>
<mml:math id="M130">
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> and is the continuous feedback signal finally sent to the virtual simulation engine or the coaching end.</p>
<p>In order to make the generated feedback show a positive effect in the posterior evaluation, a closed-loop evaluation of the feedback utility is introduced and the evaluation signal is converted into an auxiliary loss for joint training, which can be rewritten as follows:</p>
<disp-formula id="E20">
<mml:math id="M131">
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>K</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:munderover>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(20)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E20">Equation 20</xref>, <inline-formula>
<mml:math id="M132">
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the scalar performance metric used to evaluate the training effect, <inline-formula>
<mml:math id="M133">
<mml:mi>K</mml:mi>
</mml:math>
</inline-formula> is the evaluation window length, <inline-formula>
<mml:math id="M134">
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> and is the average improvement within a given window.</p>
<disp-formula id="E21">
<mml:math id="M135">
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">fb</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mspace width="0.25em"/>
<mml:mi>L</mml:mi>
<mml:msub>
<mml:mo>'</mml:mo>
<mml:mtext>total</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mtext>total</mml:mtext>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mi mathvariant="italic">fb</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi mathvariant="italic">fb</mml:mi>
</mml:msub>
</mml:math>
<label>(21)</label>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="E21">Equation 21</xref>, <inline-formula>
<mml:math id="M136">
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mi mathvariant="italic">fb</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the feedback utility weight, <inline-formula>
<mml:math id="M137">
<mml:mi>L</mml:mi>
<mml:msub>
<mml:mo>'</mml:mo>
<mml:mtext>total</mml:mtext>
</mml:msub>
</mml:math>
</inline-formula> is the joint loss with feedback utility constraint, and is used to guide the model to generate feedback signals that reflect positive improvements in the posterior evaluation.</p>
<p>The training hyperparameters in this implementation are: batch size <inline-formula>
<mml:math id="M138">
<mml:mi>B</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>32</mml:mn>
</mml:math>
</inline-formula>, truncation length <inline-formula>
<mml:math id="M139">
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>100</mml:mn>
</mml:math>
</inline-formula>, gradient clipping threshold <inline-formula>
<mml:math id="M140">
<mml:mi>C</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>5.0</mml:mn>
</mml:math>
</inline-formula>, Adam momentum coefficient <inline-formula>
<mml:math id="M141">
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>0.9</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x03B2;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>0.999</mml:mn>
</mml:math>
</inline-formula>, numerical stability term <inline-formula>
<mml:math id="M142">
<mml:mi>&#x03B5;</mml:mi>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>8</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, initial learning rate <inline-formula>
<mml:math id="M143">
<mml:msub>
<mml:mi>&#x03B7;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, minimum learning rate <inline-formula>
<mml:math id="M144">
<mml:msub>
<mml:mi>&#x03B7;</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>6</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, maximum number of training steps <inline-formula>
<mml:math id="M145">
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>500</mml:mn>
</mml:math>
</inline-formula>, early stopping window <inline-formula>
<mml:math id="M146">
<mml:mi>p</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>20</mml:mn>
</mml:math>
</inline-formula>, attention regularization weight <inline-formula>
<mml:math id="M147">
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mi mathvariant="italic">att</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:math>
</inline-formula>, time series smoothing weight <inline-formula>
<mml:math id="M148">
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mrow>
<mml:mtext mathvariant="italic">smoot</mml:mtext>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>0.1</mml:mn>
</mml:math>
</inline-formula>, feedback utility weight <inline-formula>
<mml:math id="M149">
<mml:msub>
<mml:mi>&#x03BB;</mml:mi>
<mml:mi mathvariant="italic">fb</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:math>
</inline-formula>, task weight is set to <inline-formula>
<mml:math id="M150">
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1.0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>0.8</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>0.8</mml:mn>
</mml:math>
</inline-formula>, feedback gain <inline-formula>
<mml:math id="M151">
<mml:mi>&#x03B3;</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>10.0</mml:mn>
</mml:math>
</inline-formula>, output clipping interval is taken <inline-formula>
<mml:math id="M152">
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula>. During the training process, batch normalization and layer normalization are used to maintain numerical stability of the hidden layer after each parameter update, and the training is rolled back at the lowest point of the validation set to retain the optimal weights.</p>
<p>The above training optimization and feedback generation process transforms cross-modal fusion time series predictions into executable personalized feedback signals through multi-task loss construction, constraints based on entropy and time series smoothing, a stable numerical update mechanism, and closed-loop feedback utility constraints. It also uses closed-loop evaluation to drive the adaptive fine-tuning of the model during training.</p>
</sec>
</sec>
<sec id="sec7">
<label>3</label>
<title>Experimental design and implementation</title>
<sec id="sec8">
<label>3.1</label>
<title>Experimental dataset construction</title>
<p>The study recruited 45 participants with varying levels of volleyball proficiency, including young professionals, university athletes, and amateur enthusiasts (see <xref ref-type="table" rid="tab3">Table 3</xref>). The participants were aged between 15 and 28&#x202F;years (Mean&#x202F;=&#x202F;21.4, SD&#x202F;=&#x202F;3.2), with heights ranging from 165&#x202F;cm to 195&#x202F;cm. Prior to the experiment, all participants (and legal guardians for those under 18) provided written informed consent. The experimental procedures regarding human subjects were strictly adhered to the Declaration of Helsinki and were approved by the institutional review board. During the experiment, the research team constructed a multi-layered dataset targeting individuals at different training levels to ensure sufficient motion diversity and sample coverage for the multimodal time series modeling process. The experimental subjects included young professionals, university teams, amateur clubs, middle school interest classes, and students from physical education colleges. Gesture capture, electromyography sensors, and accelerometers were used throughout the data collection process to continuously sample core volleyball training movements such as serving, spiking, blocking, passing, padding, and run-ups, and the cumulative training duration was recorded. To ensure the reliability of the subsequent model in motion recognition and load modeling, this section summarizes the experimental subjects, motion categories, and acquisition duration. The results are shown in <xref ref-type="table" rid="tab3">Table 3</xref>.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Distribution of experimental subjects and actions.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Group ID</th>
<th align="center" valign="top">Number of participants</th>
<th align="left" valign="top">Action categories</th>
<th align="center" valign="top">Total duration (minutes)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">G1 (University Volleyball Team)</td>
<td align="center" valign="middle">10</td>
<td align="left" valign="middle">Spiking, Blocking, Approach Run</td>
<td align="center" valign="middle">320</td>
</tr>
<tr>
<td align="left" valign="middle">G2 (Amateur Volleyball Club)</td>
<td align="center" valign="middle">8</td>
<td align="left" valign="middle">Serving, Serve Reception, Setting</td>
<td align="center" valign="middle">260</td>
</tr>
<tr>
<td align="left" valign="middle">G3 (Physical Education Students)</td>
<td align="center" valign="middle">12</td>
<td align="left" valign="middle">Approach Run, Jumping, Spiking</td>
<td align="center" valign="middle">340</td>
</tr>
<tr>
<td align="left" valign="middle">G4 (Middle School Volleyball Class)</td>
<td align="center" valign="middle">6</td>
<td align="left" valign="middle">Setting, Passing, Blocking</td>
<td align="center" valign="middle">210</td>
</tr>
<tr>
<td align="left" valign="middle">G5 (Professional Youth Group)</td>
<td align="center" valign="middle">9</td>
<td align="left" valign="middle">Serving, Spiking, Blocking, Serve Reception</td>
<td align="center" valign="middle">370</td>
</tr>
<tr>
<td align="left" valign="middle">Total</td>
<td align="center" valign="middle">45</td>
<td align="left" valign="middle">6 Core Actions</td>
<td align="center" valign="middle">1,500</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>All participants were within the age range of 15 to 28&#x202F;years and their height ranged from 165 to 195&#x202F;cm. Surface EMG sensors were placed on the skin overlying the bellies of the following major muscle groups involved in volleyball actions: biceps brachii, triceps brachii, anterior deltoid, pectoralis major, rectus femoris, and gastrocnemius. The specific combination of muscles monitored was adjusted per the focus of the training movement for each experimental group, ensuring coverage of primary movers for spiking, blocking, serving, and jumping actions. The distribution of experimental subjects showed that the professional youth group and physical education college students had the highest proportion of action collection time, reaching 370 and 340&#x202F;min, respectively. This was primarily due to the stability and continuity of these groups&#x2019; movements during training, which reduced the number of invalid segments removed, effectively extending data retention. The university volleyball team&#x2019;s training time was 320&#x202F;min, indicating that their training intensity and frequency approached professional standards, but the movement continuity was slightly lower, resulting in incomplete data retention. The amateur club group&#x2019;s collection time was 260&#x202F;min, lower than that of the university and professional groups. This was due to insufficient standardization of movement execution, resulting in the rejection of some sequences due to substandard quality. The middle school interest class&#x2019;s total time was 210&#x202F;min, the lowest of all groups. This was primarily due to their lack of sports experience, resulting in frequent interruptions and unstable segments between movements, which reduced the proportion of valid data. Overall, differences in training proficiency and technical stability across different groups directly influenced the distribution of data collection time. For each subject and action sequence, the multimodal time-series data were sorted chronologically and split into training, validation, and test sets with a ratio of 70%/15%/15%. The earliest segment was used for training, the subsequent segment for validation, and the final segment for testing, ensuring that all test samples occurred strictly after the training data.</p>
</sec>
<sec id="sec9">
<label>3.2</label>
<title>Experimental platform and implementation environment</title>
<p>During the experiment, the research team established a complete training environment based on a virtual simulation platform. They combined posture capture, electromyography, and accelerometers to collect multimodal data, ensuring the simultaneous acquisition of kinematic and physiological signals on the same platform. After data collection, a unified hardware and software environment was used for storage, modeling, and training, achieving a closed-loop support from virtual training scenarios to algorithm implementation. To clarify the experimental conditions and environmental configuration, the relevant parameters are shown in <xref ref-type="table" rid="tab4">Table 4</xref>.</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Experimental environment parameters.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Platform/category</th>
<th align="left" valign="top">Configuration item</th>
<th align="left" valign="top">Parameter description</th>
<th align="left" valign="top">Version/specification</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Virtual Simulation Platform</td>
<td align="left" valign="middle">Engine Type</td>
<td align="left" valign="middle">Unity 3D Engine</td>
<td align="left" valign="middle">2021.3 LTS</td>
</tr>
<tr>
<td align="left" valign="middle">Data Acquisition Device</td>
<td align="left" valign="middle">Motion Capture System</td>
<td align="left" valign="middle">Optical motion capture, 12 channels</td>
<td align="left" valign="middle">100&#x202F;Hz</td>
</tr>
<tr>
<td align="left" valign="middle">Data Acquisition Device</td>
<td align="left" valign="middle">EMG Sensor</td>
<td align="left" valign="middle">Surface EMG acquisition, 8 channels</td>
<td align="left" valign="middle">1,000&#x202F;Hz</td>
</tr>
<tr>
<td align="left" valign="middle">Data Acquisition Device</td>
<td align="left" valign="middle">Accelerometer</td>
<td align="left" valign="middle">Three-axis acceleration, range &#x00B1;16&#x202F;g</td>
<td align="left" valign="middle">200&#x202F;Hz</td>
</tr>
<tr>
<td align="left" valign="middle">Hardware Environment</td>
<td align="left" valign="middle">GPU</td>
<td align="left" valign="middle">NVIDIA RTX 3080</td>
<td align="left" valign="middle">10&#x202F;GB VRAM</td>
</tr>
<tr>
<td align="left" valign="middle">Hardware Environment</td>
<td align="left" valign="middle">CPU</td>
<td align="left" valign="middle">Intel Core i7-12700K</td>
<td align="left" valign="middle">12 cores, 3.6&#x202F;GHz</td>
</tr>
<tr>
<td align="left" valign="middle">Hardware Environment</td>
<td align="left" valign="middle">Memory</td>
<td align="left" valign="middle">DDR4</td>
<td align="left" valign="middle">32&#x202F;GB</td>
</tr>
<tr>
<td align="left" valign="middle">Software Environment</td>
<td align="left" valign="middle">Deep Learning Framework</td>
<td align="left" valign="middle">PyTorch</td>
<td align="left" valign="middle">2.0.1</td>
</tr>
<tr>
<td align="left" valign="middle">Software Environment</td>
<td align="left" valign="middle">Programming Language and Toolchain</td>
<td align="left" valign="middle">Python</td>
<td align="left" valign="middle">3.9</td>
</tr>
<tr>
<td align="left" valign="middle">Software Environment</td>
<td align="left" valign="middle">Operating System</td>
<td align="left" valign="middle">Ubuntu</td>
<td align="left" valign="middle">20.04 LTS</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>This table summarizes the construction conditions of the virtual simulation platform, the sensor acquisition accuracy and number of channels, and the core hardware and software parameters, ensuring that experimental data is processed in an efficient and stable environment. The GPU and CPU configurations meet the training requirements of deep learning models, while the matching memory and operating system support the storage and access of large-scale time series data. The deep learning framework and programming environment lay the foundation for the implementation of DS-LSTM and temporal attention mechanisms.</p>
</sec>
<sec id="sec10">
<label>3.3</label>
<title>Ablation study of model architecture components</title>
<p>To examine the roles of two-stream structures and temporal attention mechanisms in multimodal temporal modeling, this study conducted comparative modeling of different structural configurations while maintaining consistency in data partitioning, preprocessing procedures, network depth, hidden dimensions, and training strategies. The single-stream LSTM and the proposed DS-LSTM were implemented and trained under identical experimental conditions, including the same input features, data splits, network depth, hidden dimensions, optimizer, learning rate schedule, batch size, and stopping criteria, so that the comparison directly reflects architectural differences. Experimental setups included single-stream temporal modeling, modeling with only two-stream structures, modeling with only temporal attention, and a complete model employing both two-stream structures and temporal attention mechanisms. All models converged within the same number of training epochs, and action classification accuracy, electromyography load modeling error, and velocity curve fitting performance were evaluated on the same test set to observe the impact of structural changes on multimodal modeling behavior. The relevant results are summarized in <xref ref-type="table" rid="tab5">Table 5</xref>.</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Ablation results of model components.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Model configuration</th>
<th align="center" valign="top">Motion accuracy (%)</th>
<th align="center" valign="top">EMG error (%)</th>
<th align="center" valign="top">Velocity R<sup>2</sup></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Single-stream LSTM</td>
<td align="char" valign="middle" char=".">86.4</td>
<td align="char" valign="middle" char=".">8.7</td>
<td align="char" valign="middle" char=".">0.83</td>
</tr>
<tr>
<td align="left" valign="middle">Dual-stream LSTM without attention</td>
<td align="char" valign="middle" char=".">90.2</td>
<td align="char" valign="middle" char=".">6.0</td>
<td align="char" valign="middle" char=".">0.87</td>
</tr>
<tr>
<td align="left" valign="middle">Attention-based single-stream LSTM</td>
<td align="char" valign="middle" char=".">88.1</td>
<td align="char" valign="middle" char=".">7.4</td>
<td align="char" valign="middle" char=".">0.85</td>
</tr>
<tr>
<td align="left" valign="middle">Dual-stream LSTM with temporal attention</td>
<td align="char" valign="middle" char=".">93.1</td>
<td align="char" valign="middle" char=".">3.8</td>
<td align="char" valign="middle" char=".">0.91</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The results in <xref ref-type="table" rid="tab5">Table 5</xref> show that the single-stream LSTM exhibits high error levels in action recognition and EMG load modeling, with an action classification accuracy of 86.4 and an EMG error of 8.7. This indicates that heterogeneous modalities generate significant information interference when modeled in a unified temporal space. When a two-stream structure is introduced without using an attention mechanism, the action accuracy improves to 90.2, the EMG error decreases to 6.0, and the velocity fitting coefficient increases to 0.87, indicating that temporal decoupling of kinematic and physiological signals makes the model&#x2019;s representation of their respective dynamic modes more stable. The single-stream model using only temporal attention achieves an action accuracy of 88.1 and an EMG error of 7.4, showing that temporal weight allocation has a suppressive effect on redundant segments, but it is still limited by the modeling inconsistencies caused by feature mixing. When the dual-stream structure and time attention are introduced simultaneously, the motion accuracy is improved to 93.1, the electromyographic error is reduced to 3.8, and the velocity fitting coefficient reaches 0.91. This indicates that performing time weighting after completing intramodal temporal modeling effectively coordinates cross-modal dynamic information, thereby forming a stable and consistent modeling result.</p>
</sec>
</sec>
<sec id="sec11">
<label>4</label>
<title>Results analysis</title>
<sec id="sec12">
<label>4.1</label>
<title>Motion capture accuracy analysis</title>
<p>In the motion capture accuracy experiment, by constructing prediction results under different action categories, the performance of the method proposed in this paper is compared with other typical methods in the posture recognition task. The stability of the model in different actions is tested by combining multiple repeated experiments. At the same time, the statistical distribution analysis of the key point coordinate prediction error is carried out to fully reflect the dynamic performance of the model and the differences in data characteristics. The results are shown in <xref ref-type="fig" rid="fig3">Figure 3</xref>.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Motion capture accuracy and stability analysis comparing the proposed DS-LSTM against baseline methods. <bold>(a)</bold> Action-wise accuracy comparison showing the proposed method (blue) achieving higher accuracy in spike and block actions. <bold>(b)</bold> Prediction stability error bars indicating consistent performance. <bold>(c)</bold> Histogram of prediction error distribution, highlighting that 77% of errors fall within the minimal 0&#x2013;2&#x202F;cm range.</p>
</caption>
<graphic xlink:href="fnbot-20-1760494-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three charts are displayed: (a) Line graph comparing action-wise accuracy for DS-LSTM+Attention, Single-stream LSTM, CNN-LSTM, and Traditional methods across spike, block, serve, dig, and average categories. (b) Line graph depicting prediction stability for accuracy with mean and standard deviation for spike, block, serve, and dig. (c) Bar graph illustrating prediction error distribution in centimeters, with frequencies decreasing from 0-1 cm to 4-5 cm.</alt-text>
</graphic>
</fig>
<p>The data in the figure show that our method achieves 93.1% accuracy for the spike action, while traditional methods achieve only 78.6% for the same action. This difference stems from the dual-stream architecture&#x2019;s superior ability to capture temporal dependencies in high-speed movements, thereby reducing transient loss. For the block action, the accuracy is 91.8%, compared to 85.2% for the single-stream LSTM. This higher accuracy is primarily due to the interaction of cross-modal features, which enables more complete temporal modeling of upper and lower limb coordination. In the stability analysis, the standard deviation of the serve action is only 1.1, less than the 1.4 for the block action. This is because the serve action pattern is relatively fixed and has high temporal repeatability, which makes the model converge more stably during learning. Error distribution statistics show that 77 cases, representing 77% of the total data, have prediction errors less than 2&#x202F;cm. This concentration is related to the attention mechanism&#x2019;s ability to filter critical time periods, effectively reducing the impact of non-critical movements on prediction accuracy. Overall, this method demonstrates consistent advantages in both accuracy and stability (<xref ref-type="fig" rid="fig3">Figure 3</xref>).</p>
</sec>
<sec id="sec13">
<label>4.2</label>
<title>Muscle load modeling error analysis</title>
<p>In the EMG load modeling experiment, multimodal time-series data from volleyball-specific physical training was collected simultaneously using posture capture, EMG, and accelerometers. A dual-stream architecture was used to model kinematic and physiological characteristics separately, and a temporal attention mechanism was incorporated to achieve cross-modal fusion. During training, 500 iterations were set to monitor model error convergence. Comparative experiments were further conducted to verify the differences between the DS-LSTM and single-stream LSTM models. The error distributions of five typical volleyball movements under repeated experiments were statistically analyzed to assess movement-level stability and volatility. The results obtained based on this experimental process are shown in <xref ref-type="fig" rid="fig4">Figure 4</xref>.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Analysis of EMG load modeling error convergence and motion-specific distribution. <bold>(a)</bold> The loss curve demonstrates rapid convergence to a 3.8% error rate after 500 iterations. <bold>(b)</bold> Model comparison showing the DS-LSTM (blue) reduces error more effectively than the single-stream LSTM (orange). <bold>(c)</bold> Boxplot of error distribution across varying volleyball actions, reflecting higher variability in dynamic movements like Digging.</p>
</caption>
<graphic xlink:href="fnbot-20-1760494-g004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three-panel graph showing: (a) Convergence of Muscle Load Error, with a decreasing trend in modeling error percentage over iterations; (b) Model Comparison, displaying error trends for DS-LSTM and Single LSTM models, both declining over iterations; (c) Error Distribution Across Actions, using box plots for spike, block, serve, dig, and set actions, indicating varying error percentages.</alt-text>
</graphic>
</fig>
<p>Experimental results show that the overall convergence curve error dropped to 3.8 percent after 500 iterations, indicating that the model gradually overcomes high-frequency noise interference with continued training. This is because time alignment and filtering preprocessing suppress redundant signals, enabling the LSTM memory units to effectively capture the long-term dependencies of the EMG signals. In a model comparison, the DS-LSTM achieved an error of 6.0 percent after 200 iterations, while the single-stream LSTM achieved an error of 8.7 percent during the same period. This difference is primarily due to the dual-stream architecture&#x2019;s separation of kinematic and physiological features, avoiding gradient convergence instability caused by feature aliasing. A boxplot of the action category errors shows that the median errors for Block and Set are 3.6 percent and 3.5 percent, respectively, while the median error for Dig reaches 4.3 percent, indicating that high-speed directional changes introduce EMG fluctuations, making accurate predictions in this category more difficult for the model. Overall, the multimodal fusion strategy offers advantages in reducing error levels and improving training data stability. The observed EMG load error patterns align with established EMG feature analysis practices, where lower errors correspond to consistent muscle activation timing and amplitude across trials, reflecting stable neuromuscular coordination during volleyball-specific movements.</p>
</sec>
<sec id="sec14">
<label>4.3</label>
<title>Speed curve fitting analysis</title>
<p>In this experiment, we used multimodal sensors to collect posture and velocity sequences during volleyball physical training. We then used the DS-LSTM model to fit and model the movement velocity to examine the discrepancies between the predicted results and the actual measurements. After normalization and time-series alignment, the raw data was input into the model to produce the predicted output. We then systematically evaluated the model&#x2019;s fitting performance and error distribution characteristics by comparing the actual velocity curves with the predicted curves, analyzing the residual distribution, and performing regression scatter point fitting. The results are shown in <xref ref-type="fig" rid="fig5">Figure 5</xref>.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Velocity curve fitting analysis verifying model precision. <bold>(a)</bold> Comparison of Real (black) vs. Predicted (red) speed curves showing tight alignment with a peak deviation of only 0.05 m/s. <bold>(b)</bold> Residual curve highlighting minor deviations primarily during peak velocity phases. <bold>(c)</bold> Regression scatter plot (R<sup>2</sup>&#x202F;=&#x202F;0.91) confirming a strong linear correlation between predicted and actual velocities.</p>
</caption>
<graphic xlink:href="fnbot-20-1760494-g005.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Graph set showing speed analysis. Panel (a) displays a line graph comparing real speed and predicted speed over time, with closely matched curves. Panel (b) presents a residual curve indicating error variation over time, peaking at around 0.08 meters per second. Panel (c) features a scatter plot of real versus predicted speed, with a regression line (R&#x00B2; = 0.99) closely following the ideal y=x line.</alt-text>
</graphic>
</fig>
<p>The results in the figure show that the true and predicted velocity curves closely match each other for most periods of time, reaching a peak between 1.0 and 1.5&#x202F;s. The true maximum value is 2.85 meters per second, while the predicted value is 2.80 meters per second, a difference of 0.05 meters per second. This minor error is primarily due to transient fluctuations in the accelerometer during high-speed motion. The residual curve shows a positive deviation of 0.05 to 0.08 meters per second between 0.4 and 1.0&#x202F;s, which is related to phase differences caused by synchronization errors in the electromyographic signals. In the scatter regression plot, the data points are distributed close to the diagonal, and the coefficient of determination calculated by the regression is 0.91, indicating that the model has strong fitting stability across different velocity ranges. In summary, this method can effectively characterize the dynamic characteristics of velocity during volleyball physical training and maintain high prediction accuracy.</p>
</sec>
<sec id="sec15">
<label>4.4</label>
<title>Explanatory analysis of attention weight distribution</title>
<p>In a virtual simulation experiment of volleyball physical training, to explore the model&#x2019;s dynamic nature and cross-modal explanatory power in temporal feature selection, we conducted comparative modeling of the attention distribution of kinematic and physiological streams across different training time segments. This was further extended to a two-dimensional interactive analysis of movement categories and time segments. Based on multimodal time series data collected by posture, electromyography, and accelerometer sensors, the experiment extracted weight information for key time segments after two-stream LSTM encoding and attention weighting. The weight differences were then compared across kinematic and physiological features. Finally, multi-view diagrams of the model&#x2019;s attention patterns across temporal segments and movement components were generated, including histograms, broken line trends, and heat matrix diagrams, as shown in <xref ref-type="fig" rid="fig6">Figure 6</xref>.</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Spatiotemporal visualization of attention weight distribution. <bold>(a)</bold> Bar chart contrasting kinematic vs. physiological attention weights across time segments. <bold>(b)</bold> Line chart illustrating the dynamic shift in attention intensity during movement execution. <bold>(c)</bold> Heatmap showing how the model prioritizes specific time windows (e.g., 4&#x2013;6&#x202F;s) for different action types such as blocking and spiking.</p>
</caption>
<graphic xlink:href="fnbot-20-1760494-g006.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three-part data visualization showing attention distribution over time. (a) Bar chart compares motion and physiological flow attention weights across four time segments. (b) Line chart shows attention trends over time for both flows. (c) Heatmap displays attention allocation for different actions, with colors indicating weight intensity.</alt-text>
</graphic>
</fig>
<p>The results show that the kinematic stream&#x2019;s attention weight for the 4&#x2013;6&#x202F;s period is 0.28, higher than the physiological stream&#x2019;s 0.22. This phenomenon stems from the dominance of postural changes caused by the rapid jump during this period over the motion characteristics. The physiological stream&#x2019;s weight for the 2&#x2013;4&#x202F;s period reaches 0.25, while the kinematic stream&#x2019;s is only 0.20. This is primarily due to the increased muscle load early in training, which makes the EMG signal intensity more pronounced in driving the model weights. In the movement comparison, the Block&#x2019;s weight for the 4&#x2013;6&#x202F;s period is 0.30, higher than the Spike&#x2019;s 0.28 and the Jump&#x2019;s 0.25. This result is due to the aggregation of physiological and kinematic signal characteristics generated by the combined upper limb force and core stability during the blocking process. Overall, the differences in weights over time and movement type indicate that the model&#x2019;s attention to kinematic and physiological characteristics switches periodically during training.</p>
</sec>
<sec id="sec16">
<label>4.5</label>
<title>Reliability analysis across trials and across subjects</title>
<p>To systematically evaluate the predictive consistency of the model across different trials and subjects, this experiment selected three key indicators: motion recognition accuracy, electromyographic load modeling error, and velocity fitting error. Based on data from all 45 subjects in multiple repeated trials, the intraclass correlation coefficient (ICC) and coefficient of variation (CV%) were calculated to quantify the model&#x2019;s repeatability and stability. The relevant reliability statistics are shown in <xref ref-type="table" rid="tab6">Table 6</xref>.</p>
<table-wrap position="float" id="tab6">
<label>Table 6</label>
<caption>
<p>Statistics of model prediction reliability indicators.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Evaluation metric</th>
<th align="left" valign="top">Action category</th>
<th align="left" valign="top">Subject group</th>
<th align="center" valign="top">ICC</th>
<th align="center" valign="top">CV (%)</th>
<th align="center" valign="top"><italic>p</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Action Recognition Accuracy</td>
<td align="left" valign="middle">Spike</td>
<td align="left" valign="middle">All participants</td>
<td align="char" valign="middle" char=".">0.94</td>
<td align="char" valign="middle" char=".">3.2</td>
<td align="char" valign="middle" char=".">&#x003E;0.05</td>
</tr>
<tr>
<td align="left" valign="middle">Action Recognition Accuracy</td>
<td align="left" valign="middle">Block</td>
<td align="left" valign="middle">All participants</td>
<td align="char" valign="middle" char=".">0.92</td>
<td align="char" valign="middle" char=".">3.8</td>
<td align="char" valign="middle" char=".">&#x003E;0.05</td>
</tr>
<tr>
<td align="left" valign="middle">EMG Load Modeling Error</td>
<td align="left" valign="middle">Serve</td>
<td align="left" valign="middle">Professional group</td>
<td align="char" valign="middle" char=".">0.91</td>
<td align="char" valign="middle" char=".">4.1</td>
<td align="char" valign="middle" char=".">&#x003E;0.05</td>
</tr>
<tr>
<td align="left" valign="middle">EMG Load Modeling Error</td>
<td align="left" valign="middle">Dig</td>
<td align="left" valign="middle">Amateur group</td>
<td align="char" valign="middle" char=".">0.88</td>
<td align="char" valign="middle" char=".">5.3</td>
<td align="char" valign="middle" char=".">&#x003E;0.05</td>
</tr>
<tr>
<td align="left" valign="middle">Velocity Fitting Error</td>
<td align="left" valign="middle">Approach Jump</td>
<td align="left" valign="middle">University team</td>
<td align="char" valign="middle" char=".">0.93</td>
<td align="char" valign="middle" char=".">4.5</td>
<td align="char" valign="middle" char=".">&#x003E;0.05</td>
</tr>
<tr>
<td align="left" valign="middle">Attention Weight Consistency</td>
<td align="left" valign="middle">Spike</td>
<td align="left" valign="middle">All participants</td>
<td align="char" valign="middle" char=".">0.89</td>
<td align="char" valign="middle" char=".">6.1</td>
<td align="char" valign="middle" char=".">&#x003E;0.05</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The model exhibits high consistency in action recognition tasks, with ICC values of 0.94 and 0.92 for spiking and blocking actions, respectively, and CV% values below 4.0%, indicating that the dual-stream structure effectively extracts common temporal features of motion across different subjects. These results indicate that the proposed model maintains consistent prediction behavior at the individual subject level across repeated trials, demonstrating subject-specific stability rather than reliance on averaged population performance. In electromyography (EMG) load modeling, the professional group achieved an ICC of 0.91 and a CV% of 4.1% for serving, superior to the amateur group&#x2019;s 0.88 and 5.3% for passing. This reflects the impact of technical standardization on EMG signal stability, with greater variability in action execution among amateur subjects leading to increased predictive volatility. Velocity fitting showed an ICC of 0.93 and a CV% of 4.5% for the approach jump, indicating strong generalization ability of the model to dynamic velocity curves. The consistency ICC of attention weights across trials was 0.89, demonstrating that the temporal attention mechanism stably captures key motion phases. Overall, the model demonstrates good cross-trial and cross-subject reliability across multiple tasks, making it suitable for training and analysis scenarios with diverse populations.</p>
<p>To facilitate a direct quantitative comparison between data-driven temporal modeling and handcrafted signal analysis, reliability metrics reported in this study were contrasted with representative sEMG and IMU feature-based results reported in prior musculoskeletal monitoring research. The comparison focuses on intraclass correlation coefficients and coefficients of variation, which were used in both studies to assess reproducibility and precision across repeated trials. The handcrafted feature results were drawn from a validated wearable-based rehabilitation study that evaluated time-domain and frequency-domain sEMG features together with IMU acceleration characteristics under controlled exercise protocols, while the DS-LSTM results were obtained from the multimodal volleyball training dataset analyzed in this work. The comparison is summarized in <xref ref-type="table" rid="tab7">Table 7</xref>.</p>
<table-wrap position="float" id="tab7">
<label>Table 7</label>
<caption>
<p>Reliability comparison between DS-LSTM outputs and handcrafted sEMG&#x2013;IMU features.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Method</th>
<th align="left" valign="top">Data type</th>
<th align="center" valign="top">ICC range</th>
<th align="center" valign="top">CV range (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Handcrafted sEMG&#x2013;IMU features (<xref ref-type="bibr" rid="ref2">Amin et al., 2025</xref>)</td>
<td align="left" valign="middle">sEMG time and frequency features, IMU acceleration</td>
<td align="char" valign="middle" char="&#x2013;">0.81&#x2013;0.98</td>
<td align="char" valign="middle" char="&#x2013;">5.7&#x2013;14.4</td>
</tr>
<tr>
<td align="left" valign="middle">DS-LSTM (this study)</td>
<td align="left" valign="middle">Multimodal temporal outputs</td>
<td align="char" valign="middle" char="&#x2013;">0.88&#x2013;0.94</td>
<td align="char" valign="middle" char="&#x2013;">3.2&#x2013;6.1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The reliability metrics indicate that the DS-LSTM outputs exhibit a narrower variability range and higher consistency across trials compared with handcrafted sEMG and IMU features. The handcrafted feature-based analysis shows ICC values spanning from 0.81 to 0.98, accompanied by coefficients of variation extending up to 14.4 percent, reflecting sensitivity to signal amplitude fluctuations and feature extraction stability across repetitions. In contrast, the DS-LSTM results demonstrate ICC values concentrated between 0.88 and 0.94, with coefficients of variation remaining below 6.1 percent, indicating reduced dispersion in repeated measurements. This difference arises from the end-to-end temporal modeling of multimodal sequences, which suppresses trial-level signal variability through recurrent state propagation rather than relying on isolated signal descriptors. These results confirm that DS-LSTM-based modeling provides more stable reliability characteristics when evaluated using the same statistical validation criteria.</p>
</sec>
<sec id="sec17">
<label>4.6</label>
<title>Analysis of training optimization feedback effects</title>
<p>In this experiment, using a virtual simulation platform as support, we conducted a multi-dimensional dynamic comparison of traditional training and personalized feedback training. We focused on collecting four core metrics: endurance, explosive power, agility, and recovery efficiency. We tracked the temporal changes in athletic performance during training and, combined with the weight distribution of the attention mechanism output, revealed differences in the model&#x2019;s focus on different features and training phases. This approach not only compared the final performance differences between the two training models, but also provided in-depth analysis of the data across both the temporal and feature dimensions, ultimately yielding the results shown in <xref ref-type="fig" rid="fig7">Figure 7</xref>.</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Evaluation of training optimization feedback effects. <bold>(a)</bold> Comparison of physical metrics (Endurance, Explosiveness, Agility, Recovery) between traditional and feedback-based training. <bold>(b)</bold> Endurance progress curve over 6&#x202F;weeks showing steady improvement. <bold>(c)</bold> Heatmap of feature-specific attention weights. <bold>(d)</bold> Percentage improvement across all metrics, demonstrating the efficacy of the personalized feedback loop.</p>
</caption>
<graphic xlink:href="fnbot-20-1760494-g007.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Four charts compare training methods in performance metrics. (a) Bar chart shows higher scores for "With Feedback" in endurance, explosiveness, agility, and recovery. (b) Line chart indicates improved endurance over six weeks for "With Feedback" versus "Traditional." (c) Heatmap displays varying attention weights across metrics and segments, with values between 0.16 and 0.34. (d) Line graph depicts percentage improvement across endurance, explosiveness, agility, and recovery, peaking at 19.1% for endurance and 15.8% for recovery.</alt-text>
</graphic>
</fig>
<p>As shown in the results, endurance reached 38.7&#x202F;min with personalized feedback training, compared to 32.5&#x202F;min with traditional training. This improvement is primarily attributed to the dynamic adjustment of load distribution during training, which reduces the rate of lactate accumulation and delays the onset of fatigue. Explosive power reached 52.8&#x202F;cm with feedback training, a significant improvement compared to 46.2&#x202F;cm with traditional training. This result is related to modifications in joint angle control and muscle activation patterns. Sensitivity decreased to 10.9&#x202F;s with feedback training, compared to 12.4&#x202F;s with traditional training. This is due to the model&#x2019;s increased focus on acceleration features during high-intensity transitions, which improves the latency of movement responses. Recovery efficiency increased to 82.6% with feedback training, compared to 71.3% with traditional training. This is due to more effective control of heart rate fluctuations, which enhances the recovery capacity of the autonomic nervous system. Overall, the personalized feedback mechanism, through optimized load distribution and feature focus strategies, leads to comprehensive improvements in athletic performance.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec18">
<label>5</label>
<title>Conclusion</title>
<p>This paper proposes a dynamic modeling method for volleyball physical training based on the DS-LSTM and temporal attention mechanism. By collecting multimodal time series data using posture capture, electromyography, and accelerometers, and performing time alignment and feature fusion, this method achieves intelligent prediction of training status and personalized feedback. Experimental results show that this method achieves 93.1% accuracy in spike motion recognition, significantly exceeding the 78.6% of traditional methods, demonstrating the advantages of cross-modal interaction in high-speed motion modeling. In muscle load prediction, the error drops to 3.8% after 500 iterations, surpassing the 8.7% of a single-stream LSTM. This demonstrates the significant effectiveness of the dual-stream architecture in stabilizing convergence and reducing noise. In velocity curve fitting experiments, the predicted peak value of 2.80 meters per second differs only 0.05 meters per second from the true value of 2.85 meters per second, with a coefficient of determination of 0.91, demonstrating the model&#x2019;s high fitting accuracy during dynamic motion. Research shows that combined with the feedback mechanism of the virtual simulation platform, the trainees have achieved significant improvements in endurance, explosive power and sensitivity. This shows that this method not only breaks through the limitations of traditional training that relies on experience and judgment, but also shows strong interpretability and scientificity in multimodal feature interaction and key period screening, providing reliable support for the personalized optimization of volleyball physical training and the promotion and application of virtual simulation teaching.</p>
<p>Despite the promising results, this study has certain limitations that outline directions for future research. First, the current dataset focuses primarily on specific age groups and skill levels; future work will expand the sample diversity to include a wider range of demographics to enhance model generalization. Second, while the current system operates effectively in a simulation environment, future iterations will focus on optimizing the algorithm for edge computing devices to minimize latency in real-world, on-court training scenarios. Finally, we plan to extend this dual-stream attention framework to other complex team sports, such as basketball and football, to validate its cross-disciplinary applicability.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec19">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="ethics-statement" id="sec20">
<title>Ethics statement</title>
<p>The studies involving humans were approved by the Ethics Committee of China Academy of Art (Approval Number: CAA-REC-2023-11-058). The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation in this study was provided by the participants&#x2019; legal guardians/next of kin where applicable.</p>
</sec>
<sec sec-type="author-contributions" id="sec21">
<title>Author contributions</title>
<p>XC: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="sec22">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec23">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec24">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Albaladejo-Saura</surname><given-names>M.</given-names></name> <name><surname>Vaquero-Crist&#x00F3;bal</surname><given-names>R.</given-names></name> <name><surname>Garc&#x00ED;a-Roca</surname><given-names>J. A.</given-names></name> <name><surname>Esparza-Ros</surname><given-names>F.</given-names></name></person-group> (<year>2023</year>). <article-title>What variables allow the differentiation between more and less successful adolescent volleyball players?</article-title> <source>J. Hum. Kinet.</source> <volume>88</volume>:<fpage>229</fpage>. doi: <pub-id pub-id-type="doi">10.2478/hukin-2023-0022</pub-id></mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Amin</surname><given-names>A. B.</given-names></name> <name><surname>Asabre</surname><given-names>E.</given-names></name> <name><surname>Razaghi</surname><given-names>S.</given-names></name> <name><surname>Noh</surname><given-names>Y.</given-names></name></person-group> (<year>2025</year>). <article-title>Quantitative musculoskeletal monitoring and analysis in aquatic rehabilitation</article-title>. <source>Front. Electron.</source> <volume>6</volume>:<fpage>1566899</fpage>. doi: <pub-id pub-id-type="doi">10.3389/felec.2025.1566899</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Amin</surname><given-names>A. B.</given-names></name> <name><surname>Asabre</surname><given-names>E.</given-names></name> <name><surname>Sahay</surname><given-names>A.</given-names></name> <name><surname>Razaghi</surname><given-names>S.</given-names></name> <name><surname>Noh</surname><given-names>Y.</given-names></name></person-group> (<year>2023</year>). "Feasibility testing of wearable device for musculoskeletal monitoring during aquatic therapy and rehabilitation," 2023 45th annual international conference of the IEEE engineering in Medicine &#x0026; Biology Society (EMBC), Sydney, Australia. pp. 1&#x2013;4.</mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Amin</surname><given-names>A. B.</given-names></name> <name><surname>Noh</surname><given-names>Y.</given-names></name></person-group>, (<year>2024</year>). "Classification of upper limb movements based on a LSTM model in aquatic rehabilitation," 2024 46th annual international conference of the IEEE engineering in medicine and biology society (EMBC), Orlando, FL, USA. pp. 1&#x2013;4.</mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bae</surname><given-names>K.</given-names></name> <name><surname>Lee</surname><given-names>S.</given-names></name> <name><surname>Bak</surname><given-names>S. Y.</given-names></name> <name><surname>Kim</surname><given-names>H. S.</given-names></name> <name><surname>Ha</surname><given-names>Y.</given-names></name> <name><surname>You</surname><given-names>J. H.</given-names></name></person-group> (<year>2024</year>). <article-title>Concurrent validity and test reliability of the deep learning markerless motion capture system during the overhead squat</article-title>. <source>Sci. Rep.</source> <volume>14</volume>:<fpage>29462</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-024-79707-2</pub-id>, <pub-id pub-id-type="pmid">39604407</pub-id></mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Duan</surname><given-names>C.</given-names></name></person-group> (<year>2021</year>). <article-title>Design of online volleyball remote teaching system based on AR technology</article-title>. <source>Alex. Eng. J.</source> <volume>60</volume>, <fpage>4299</fpage>&#x2013;<lpage>4306</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.aej.2021.03.006</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Esposito</surname><given-names>G.</given-names></name> <name><surname>Altavilla</surname><given-names>G.</given-names></name> <name><surname>Giardullo</surname><given-names>G.</given-names></name> <name><surname>Ceruso</surname><given-names>R.</given-names></name> <name><surname>D&#x2019;Isanto</surname><given-names>T.</given-names></name></person-group> (<year>2024</year>). <article-title>The effects of the use of plyometric exercises with and without the ball in the development of explosive strength in volleyball</article-title>. <source>J. Funct. Morphol. Kinesiol.</source> <volume>9</volume>:<fpage>126</fpage>. doi: <pub-id pub-id-type="doi">10.3390/jfmk9030126</pub-id></mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hafer</surname><given-names>J. F.</given-names></name> <name><surname>Vitali</surname><given-names>R.</given-names></name> <name><surname>Gurchiek</surname><given-names>R.</given-names></name> <name><surname>Curtze</surname><given-names>C.</given-names></name> <name><surname>Shull</surname><given-names>P.</given-names></name> <name><surname>Cain</surname><given-names>S. M.</given-names></name></person-group> (<year>2023</year>). <article-title>Challenges and advances in the use of wearable sensors for lower extremity biomechanics</article-title>. <source>J. Biomech.</source> <volume>157</volume>:<fpage>111714</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jbiomech.2023.111714</pub-id>, <pub-id pub-id-type="pmid">37423120</pub-id></mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hern&#x00E1;ndez</surname><given-names>A.</given-names></name> <name><surname>Amig&#x00F3;</surname><given-names>J. M.</given-names></name></person-group> (<year>2021</year>). <article-title>Attention mechanisms and their applications to complex systems</article-title>. <source>Entropy</source> <volume>23</volume>:<fpage>283</fpage>. doi: <pub-id pub-id-type="doi">10.3390/e23030283</pub-id>, <pub-id pub-id-type="pmid">33652728</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hribernik</surname><given-names>M.</given-names></name> <name><surname>Umek</surname><given-names>A.</given-names></name> <name><surname>Toma&#x017E;i&#x010D;</surname><given-names>S.</given-names></name> <name><surname>Kos</surname><given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Review of real-time biomechanical feedback systems in sport and rehabilitation</article-title>. <source>Sensors</source> <volume>22</volume>:<fpage>3006</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s22083006</pub-id>, <pub-id pub-id-type="pmid">35458991</pub-id></mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname><given-names>X.</given-names></name> <name><surname>Cai</surname><given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>A review of video action recognition based on 3D convolution</article-title>. <source>Comput. Electr. Eng.</source> <volume>108</volume>:<fpage>108713</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compeleceng.2023.108713</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Le</surname><given-names>V. T.</given-names></name> <name><surname>Tran-Trung</surname><given-names>K.</given-names></name> <name><surname>Hoang</surname><given-names>V. T.</given-names></name></person-group> (<year>2022</year>). <article-title>A comprehensive review of recent deep learning techniques for human activity recognition</article-title>. <source>Comput. Intell. Neurosci.</source> <volume>2022</volume>:<fpage>8323962</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2022/8323962</pub-id></mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lees</surname><given-names>T.</given-names></name> <name><surname>Buechel</surname><given-names>M.</given-names></name> <name><surname>Anderson</surname><given-names>B.</given-names></name> <name><surname>Slater</surname><given-names>L.</given-names></name> <name><surname>Reece</surname><given-names>S.</given-names></name> <name><surname>Coxon</surname><given-names>G.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Benchmarking data-driven rainfall&#x2013;runoff models in Great Britain: a comparison of long short-term memory (LSTM)-based models with four lumped conceptual models</article-title>. <source>Hydrol. Earth Syst. Sci.</source> <volume>25</volume>, <fpage>5517</fpage>&#x2013;<lpage>5534</lpage>. doi: <pub-id pub-id-type="doi">10.5194/hess-25-5517-2021</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>X.</given-names></name> <name><surname>Fan</surname><given-names>D.</given-names></name> <name><surname>Deng</surname><given-names>Y.</given-names></name> <name><surname>Lei</surname><given-names>Y.</given-names></name> <name><surname>Omalley</surname><given-names>O.</given-names></name></person-group> (<year>2024</year>). <article-title>Sensor fusion-based virtual reality for enhanced physical training</article-title>. <source>Robot. Intellig. Autom.</source> <volume>44</volume>, <fpage>48</fpage>&#x2013;<lpage>67</lpage>. doi: <pub-id pub-id-type="doi">10.1108/ria-08-2023-0103</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname><given-names>H. S.</given-names></name> <name><surname>Wu</surname><given-names>H. J.</given-names></name> <name><surname>Wu</surname><given-names>C. C.</given-names></name> <name><surname>Chen</surname><given-names>J. Y.</given-names></name> <name><surname>Chang</surname><given-names>C. K.</given-names></name></person-group> (<year>2024</year>). <article-title>Quantifying internal and external training loads in collegiate male volleyball players during a competitive season</article-title>. <source>BMC Sports Sci. Med. Rehabil.</source> <volume>16</volume>:<fpage>168</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13102-024-00958-7</pub-id>, <pub-id pub-id-type="pmid">39129006</pub-id></mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname><given-names>S.</given-names></name> <name><surname>Liu</surname><given-names>M.</given-names></name> <name><surname>Yin</surname><given-names>L.</given-names></name> <name><surname>Yin</surname><given-names>Z.</given-names></name> <name><surname>Liu</surname><given-names>X.</given-names></name> <name><surname>Zheng</surname><given-names>W.</given-names></name></person-group> (<year>2023</year>). <article-title>The multi-modal fusion in visual question answering: a review of attention mechanisms</article-title>. <source>PeerJ Comput. Sci.</source> <volume>9</volume>:<fpage>e1400</fpage>. doi: <pub-id pub-id-type="doi">10.7717/peerj-cs.1400</pub-id>, <pub-id pub-id-type="pmid">37346665</pub-id></mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pawlik</surname><given-names>D.</given-names></name> <name><surname>Mroczek</surname><given-names>D.</given-names></name></person-group> (<year>2023</year>). <article-title>Influence of jump height on the game efficiency in elite volleyball players</article-title>. <source>Sci. Rep.</source> <volume>13</volume>:<fpage>8931</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-023-35729-w</pub-id>, <pub-id pub-id-type="pmid">37264052</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Putranto</surname><given-names>J. S.</given-names></name> <name><surname>Heriyanto</surname><given-names>J.</given-names></name> <name><surname>Achmad</surname><given-names>S.</given-names></name> <name><surname>Kurniawan</surname><given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>Implementation of virtual reality technology for sports education and training: systematic literature review</article-title>. <source>Proc. Comp. Sci.</source> <volume>216</volume>, <fpage>293</fpage>&#x2013;<lpage>300</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.procs.2022.12.138</pub-id></mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rebelo</surname><given-names>A.</given-names></name> <name><surname>Valente-dos-Santos</surname><given-names>J.</given-names></name> <name><surname>Pires</surname><given-names>I. G.</given-names></name> <name><surname>Arrais</surname><given-names>I.</given-names></name> <name><surname>Pereira</surname><given-names>J. R.</given-names></name> <name><surname>Turner</surname><given-names>A. N.</given-names></name></person-group> (<year>2022</year>). <article-title>Strength and conditioning for volleyball: a review</article-title>. <source>Strength Cond. J.</source> 44, <fpage>10</fpage>&#x2013;<lpage>1519</lpage>. doi: <pub-id pub-id-type="doi">10.1519/SSC.0000000000000674</pub-id></mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Richlan</surname><given-names>F.</given-names></name> <name><surname>Wei&#x00DF;</surname><given-names>M.</given-names></name> <name><surname>Kastner</surname><given-names>P.</given-names></name> <name><surname>Braid</surname><given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>Virtual training, real effects: a narrative review on sports performance enhancement through interventions in virtual reality</article-title>. <source>Front. Psychol.</source> <volume>14</volume>:<fpage>1240790</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyg.2023.1240790</pub-id>, <pub-id pub-id-type="pmid">37928573</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Salim</surname><given-names>F. A.</given-names></name> <name><surname>Postma</surname><given-names>D. B.</given-names></name> <name><surname>Haider</surname><given-names>F.</given-names></name> <name><surname>Luz</surname><given-names>S.</given-names></name> <name><surname>Beijnum</surname><given-names>B. J. F. V.</given-names></name> <name><surname>Reidsma</surname><given-names>D.</given-names></name></person-group> (<year>2024</year>). <article-title>Enhancing volleyball training: empowering athletes and coaches through advanced sensing and analysis</article-title>. <source>Front. Sports Act. Living</source> <volume>6</volume>:<fpage>1326807</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fspor.2024.1326807</pub-id></mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sinsomboonthong</surname><given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>Performance comparison of new adjusted min-max with decimal scaling and statistical column normalization methods for artificial neural network classification</article-title>. <source>Int. J. Math. Math. Sci.</source> <volume>2022</volume>:<fpage>3584406</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2022/3584406</pub-id></mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sousa</surname><given-names>A. C.</given-names></name> <name><surname>Marques</surname><given-names>D. L.</given-names></name> <name><surname>Marinho</surname><given-names>D. A.</given-names></name> <name><surname>Neiva</surname><given-names>H. P.</given-names></name> <name><surname>Marques</surname><given-names>M. C.</given-names></name></person-group> (<year>2023</year>). <article-title>Assessing and monitoring physical performance using wearable technologies in volleyball players: a systematic review</article-title>. <source>Appl. Sci.</source> <volume>13</volume>:<fpage>4102</fpage>. doi: <pub-id pub-id-type="doi">10.3390/app13074102</pub-id></mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Su</surname><given-names>L.</given-names></name> <name><surname>Zuo</surname><given-names>X.</given-names></name> <name><surname>Li</surname><given-names>R.</given-names></name> <name><surname>Wang</surname><given-names>X.</given-names></name> <name><surname>Zhao</surname><given-names>H.</given-names></name> <name><surname>Huang</surname><given-names>B.</given-names></name></person-group> (<year>2025</year>). <article-title>A systematic review for transformer-based long-term series forecasting</article-title>. <source>Artif. Intell. Rev.</source> <volume>58</volume>:<fpage>80</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s10462-024-11044-2</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Suo</surname><given-names>X.</given-names></name> <name><surname>Tang</surname><given-names>W.</given-names></name> <name><surname>Li</surname><given-names>Z.</given-names></name></person-group> (<year>2024</year>). <article-title>Motion capture technology in sports scenarios: a survey</article-title>. <source>Sensors</source> <volume>24</volume>:<fpage>2947</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s24092947</pub-id>, <pub-id pub-id-type="pmid">38733052</pub-id></mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tan</surname><given-names>B.</given-names></name> <name><surname>Tian</surname><given-names>S.</given-names></name> <name><surname>Wang</surname><given-names>E.</given-names></name> <name><surname>Xiao</surname><given-names>L.</given-names></name> <name><surname>Cao</surname><given-names>K.</given-names></name> <name><surname>Zheng</surname><given-names>B.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Research on the development and testing methods of physical education and agility training equipment in universities</article-title>. <source>Front. Psychol.</source> <volume>14</volume>:<fpage>1155490</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyg.2023.1155490</pub-id>, <pub-id pub-id-type="pmid">37457097</pub-id></mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tang</surname><given-names>Q.</given-names></name> <name><surname>Liang</surname><given-names>J.</given-names></name> <name><surname>Zhu</surname><given-names>F.</given-names></name></person-group> (<year>2023</year>). <article-title>A comparative review on multi-modal sensors fusion based on deep learning</article-title>. <source>Signal Process.</source> <volume>213</volume>:<fpage>109165</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.sigpro.2023.109165</pub-id></mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tsanousa</surname><given-names>A.</given-names></name> <name><surname>Bektsis</surname><given-names>E.</given-names></name> <name><surname>Kyriakopoulos</surname><given-names>C.</given-names></name> <name><surname>Gonz&#x00E1;lez</surname><given-names>A. G.</given-names></name> <name><surname>Leturiondo</surname><given-names>U.</given-names></name> <name><surname>Gialampoukidis</surname><given-names>I.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>A review of multisensor data fusion solutions in smart manufacturing: systems and trends</article-title>. <source>Sensors</source> <volume>22</volume>:<fpage>1734</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s22051734</pub-id>, <pub-id pub-id-type="pmid">35270880</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Urrea</surname><given-names>C.</given-names></name> <name><surname>Agramonte</surname><given-names>R.</given-names></name></person-group> (<year>2021</year>). <article-title>Kalman filter: historical overview and review of its use in robotics 60 years after its creation</article-title>. <source>J. Sens.</source> <volume>2021</volume>:<fpage>9674015</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2021/9674015</pub-id></mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vec</surname><given-names>V.</given-names></name> <name><surname>Toma&#x017E;i&#x010D;</surname><given-names>S.</given-names></name> <name><surname>Kos</surname><given-names>A.</given-names></name> <name><surname>Umek</surname><given-names>A.</given-names></name></person-group> (<year>2024</year>). <article-title>Trends in real-time artificial intelligence methods in sports: a systematic review</article-title>. <source>J. Big Data</source> <volume>11</volume>:<fpage>148</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s40537-024-01026-0</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>M.</given-names></name> <name><surname>Liang</surname><given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>Cross-modal self-attention mechanism for controlling robot volleyball motion</article-title>. <source>Front. Neurorobot.</source> <volume>17</volume>:<fpage>1288463</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fnbot.2023.1288463</pub-id>, <pub-id pub-id-type="pmid">38023451</pub-id></mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>Y.</given-names></name> <name><surname>Shan</surname><given-names>G.</given-names></name> <name><surname>Li</surname><given-names>H.</given-names></name> <name><surname>Wang</surname><given-names>L.</given-names></name></person-group> (<year>2022</year>). <article-title>A wearable-sensor system with AI technology for real-time biomechanical feedback training in hammer throw</article-title>. <source>Sensors</source> <volume>23</volume>:<fpage>425</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s23010425</pub-id>, <pub-id pub-id-type="pmid">36617025</pub-id></mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Waqas</surname><given-names>M.</given-names></name> <name><surname>Humphries</surname><given-names>U. W.</given-names></name></person-group> (<year>2024</year>). <article-title>A critical review of RNN and LSTM variants in hydrological time series predictions</article-title>. <source>MethodsX</source> <volume>13</volume>:<fpage>102946</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.mex.2024.102946</pub-id>, <pub-id pub-id-type="pmid">39324077</pub-id></mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yunchao</surname><given-names>M.</given-names></name> <name><surname>Mengyao</surname><given-names>R.</given-names></name> <name><surname>Xingman</surname><given-names>L.</given-names></name></person-group> (<year>2023</year>). <article-title>Application of virtual simulation technology in sports decision training: a systematic review</article-title>. <source>Front. Psychol.</source> <volume>14</volume>:<fpage>1164117</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyg.2023.1164117</pub-id>, <pub-id pub-id-type="pmid">37275736</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/285850/overview">Michalis Vrigkas</ext-link>, University of Western Macedonia, Greece</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3315028/overview">Sawan Kumar</ext-link>, L&#x0026;T Technology Services Limited, India</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3320577/overview">Abu Bony Amin</ext-link>, University of Massachusetts Amherst, United States</p>
</fn>
</fn-group>
</back>
</article>