<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Phys.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Physics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Phys.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-424X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1786937</article-id>
<article-id pub-id-type="doi">10.3389/fphy.2026.1786937</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Dynamic social network anomalous behavior detection based on spatiotemporal multi-view graph attention fusion network</article-title>
<alt-title alt-title-type="left-running-head">Wang</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fphy.2026.1786937">10.3389/fphy.2026.1786937</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Jimin</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3347823"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
</contrib>
</contrib-group>
<aff id="aff1">
<institution>School of Information Engineering, Henan University of Science and Technology</institution>, <city>Luoyang</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Jimin Wang, <email xlink:href="mailto:wjm426@stu.haust.edu.cn">wjm426@stu.haust.edu.cn</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-27">
<day>27</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>14</volume>
<elocation-id>1786937</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>11</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>18</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Wang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Wang</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-27">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>The development of online social networks is accompanied by intricate abnormal interaction phenomena severely impairing the ecosystem&#x2019;s credibility. Current anomaly detection approaches find it challenging to balance accuracy and robustness when tackling dynamic structural changes, heterogeneous relationships, and lack of labeled data. To address these challenges, this paper proposes ST-MVAN, a Spatio-Temporal Multi-View Attention Network for unsupervised anomaly detection. The proposed framework integrates three core components: (1) in the spatial dimension, we construct heterogeneous relational subgraphs and design an improved Graph Convolutional Network (GCN) that incorporates edge attributes as additive bias and leverages sparse attention to filter structural noise; (2) for feature fusion, an Efficient Channel Attention (ECA) mechanism is introduced to adaptively assign importance weights to multi-view features; and (3) in the temporal dimension, a bidirectional GRU captures dynamic evolutionary dependencies. Finally, a joint Encoder-Decoder framework calculates anomaly scores based on reconstruction errors. Furthermore, we perform experiments on the Digg and Yelp datasets to validate that our method achieves an AUC improvement of up to 12.26% compared to baseline methods. These results demonstrate that ST-MVAN can effectively mitigate structural noise and enhance the security of dynamic social network environments.</p>
</abstract>
<kwd-group>
<kwd>anomaly behavior detection</kwd>
<kwd>complex networks</kwd>
<kwd>graph neural network</kwd>
<kwd>multi-head attention mechanism</kwd>
<kwd>social networks</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="2"/>
<equation-count count="19"/>
<ref-count count="32"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Social Physics</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Online social platforms&#x2019; deep penetration into daily life has driven social networks to transform information dissemination modes, interpersonal interaction approaches, and social collaboration mechanisms. Likes, reposts, comments, and other interactive acts have emerged as key pillars of the online ecosystem. However, the openness and anonymity of online platforms, while offering convenience to users, have also spawned numerous abnormal activities, including botnet assaults, malicious fake evaluations, and false public opinion manipulation (<xref ref-type="fig" rid="F1">Figure 1</xref>). These behaviors not only disrupt the normal network order, but may also trigger severe trust crises and social risks. Therefore, accurately detecting anomalous behaviors from massive, complex and dynamic social data has become a pressing key issue to be addressed by both the academic circle and industry.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Normal users and anomalous users.</p>
</caption>
<graphic xlink:href="fphy-14-1786937-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating the flow of data between a social network service, user behavior data, network servers, and two user types: a normal user with an orange icon and an abnormal user depicted as a hooded figure.</alt-text>
</graphic>
</fig>
<p>Existing methods for anomaly behavior detection in online social networks can be roughly categorized into two types: traditional machine learning methods and deep learning methods. Traditional machine learning methods, such as logistic regression, support vector machine (SVM) and random forests, mainly rely on handcrafted statistical features for classification tasks [<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>]. However, such methods struggle to effectively capture the complex graph structural correlations and temporal evolution patterns of behaviors in social networks. In addition, they suffer from time-consuming and labor-intensive feature engineering, along with limited generalization ability when dealing with heterogeneous interaction data [<xref ref-type="bibr" rid="B3">3</xref>].</p>
<p>In recent years, deep learning has brought new tools for anomaly detection. In many real-world applications, researchers have built stronger recognition systems and robust frameworks to cope with complex inputs and noise [<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B5">5</xref>]. These studies suggest that layered representations and removing negative information can significantly improve robustness [<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>]. This lesson is particularly relevant to dynamic social networks, where multiple relations exist and noisy links may hide abnormal behaviors. Graph Neural Networks (GNNs) and their variants, such as GCN and GAT, have shown strong results for social network anomaly detection by modeling non-Euclidean data [<xref ref-type="bibr" rid="B8">8</xref>&#x2013;<xref ref-type="bibr" rid="B10">10</xref>]. Nevertheless, many existing methods remain limited in handling structural camouflage and in effectively fusing multi-view spatio-temporal features in complex environments.</p>
<p>To address the aforementioned challenges, this paper proposes a deep graph neural network model named ST-MVAN, which is based on spatio-temporal multi-view attention. The model is designed to achieve accurate identification of anomalous behaviors through refined structure-aware learning and spatio-temporal feature fusion. Specifically, the core designs of the model are as follows: First, through heterogeneous view construction, the complex heterogeneous graph is decomposed into multiple homogeneous interaction subgraphs according to interaction types. Second, in the spatial feature extraction phase, a sparse graph attention mechanism integrated with edge attributes is proposed. This mechanism integrates edge features into multi-head attention computations as an additive offset with the Sparsemax mechanism embedded therein. Meanwhile the ECA-based adaptive multi-view fusion module utilizes the Efficient Channel Attention network to dynamically allocate significance weights to various interaction perspectives. Finally, a Bi-GRU is employed to capture the temporal evolution characteristics of user behaviors with anomaly detection achieved through reconstruction errors.</p>
<p>The main contributions of this paper are summarized as follows:<list list-type="bullet">
<list-item>
<p>This paper presents a Spatio-Temporal Multi-View Anomaly Detection Framework (ST-MVAN), a comprehensive framework incorporating heterogeneous view decoupling, relation-aware sparse aggregation, adaptive multi-view fusion, and temporal evolution modeling for handling the intricacy of social network data that efficiently overcomes the shortcomings of current approaches in managing structural noise and extracting spatio-temporal features.</p>
</list-item>
<list-item>
<p>We develop a relation-aware multi-head sparse attention module to address the drawback of graph convolutional networks in neighbor node weighted average aggregation, integrate edge features into multi-head attention calculations, combine with the Sparsemax strategy, achieve adaptive truncation and sparsification of neighbor weights, cut down computational costs, and boost the model&#x2019;s ability to filter camouflaged noisy neighbors.</p>
</list-item>
<list-item>
<p>We propose an ECA-based multi-view fusion with temporal modeling mechanism to tackle the challenges of weight differences among various interaction relations and dynamic changes in user behaviors in heterogeneous graphs, utilize ECA-Net to dynamically acquire view-specific importance weights, and Bi-GRU to capture temporal dependencies.</p>
</list-item>
</list>
</p>
<p>The rest of this paper is structured as below: Chapter 2 reviews relevant studies regarding abnormal behavior detection; Chapter 3 details the complete framework of the ST-MVAN model along with the design concepts behind each core component; Chapter 4 demonstrates experimental findings and their analysis for the proposed model across multiple datasets, where baseline approaches are chosen for comparative assessment; Chapter 5 summarizes the research conducted in this paper.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<p>Anomaly detection in social networks has evolved from early manual feature engineering to advanced deep learning paradigms capable of automatic feature extraction. To comprehensively position our work, we review the literature along two complementary axes: temporal evolution modeling, which captures dynamic behavioral changes; and heterogeneous multi-relational modeling, which addresses complex structural interactions and multi-view fusion.</p>
<sec id="s2-1">
<label>2.1</label>
<title>Dynamic graph anomaly detection</title>
<p>Dynamic graph methodologies aim to identify anomalies by characterizing the temporal evolution of network topology and user behaviors. These approaches generally fall into three sub-categories: snapshot-based, stream-based, and Transformer-based frameworks.</p>
<p>Snapshot-based approaches conceptualize the dynamic graph as a sequence of discrete static snapshots. Early studies leveraged temporal random walks or cross-snapshot matrix factorization to capture structural variations over discrete intervals [<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>]. While effective for coarse-grained analysis, these methods often struggle to capture fine-grained temporal dependencies. To address real-time interaction updates, stream-based methods have been developed to update node embeddings incrementally. For instance, NetWalk [<xref ref-type="bibr" rid="B13">13</xref>] utilizes clique embedding and streaming clustering to detect anomalies in real-time. Similarly, AddGraph [<xref ref-type="bibr" rid="B14">14</xref>] employs an extended temporal window to model short-term dependencies, providing an end-to-end architecture for dynamic edge classification.</p>
<p>More recently, Transformer-based architectures have emerged as a dominant force for modeling long-range temporal dependencies. By leveraging self-attention mechanisms, these models can capture complex evolutionary patterns that RNN-based methods might miss. Liu et al. [<xref ref-type="bibr" rid="B15">15</xref>] proposed a Transformer-based framework that captures the velocity of connection variations, significantly improving the detection of abrupt structural anomalies. However, while these dynamic methods excel at temporal modeling, they often treat all interactions uniformly, potentially neglecting the heterogeneity inherent in social relationships.</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Heterogeneous and multi-relational modeling</title>
<p>Real-world social networks are inherently heterogeneous, encompassing diverse node types and interaction patterns. Research in this domain focuses on distinguishing these patterns through meta-paths, hypergraphs, and multi-view graph neural networks.</p>
<p>To preserve the semantic information of different relations, meta-path-based methods define specific semantic sequences to aggregate neighbors [<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>], while recent works explore automatic relation weighting to reduce manual reliance [<xref ref-type="bibr" rid="B18">18</xref>]. To capture higher-order correlations beyond pairwise connections, hypergraph neural networks have been introduced to model complex group interactions [<xref ref-type="bibr" rid="B19">19</xref>]. Multi-view GNNs and structure-content fusion represent the current state-of-the-art, aiming to project node attributes and topology into a shared latent space [<xref ref-type="bibr" rid="B20">20</xref>, <xref ref-type="bibr" rid="B21">21</xref>], often leveraging contrastive learning to enhance discriminative power [<xref ref-type="bibr" rid="B22">22</xref>]. Furthermore, to combat &#x201c;structural camouflage&#x201d; where anomalies mimic normal connections, researchers have integrated adversarial learning and neighbor filtering strategies to mitigate graph inconsistency [<xref ref-type="bibr" rid="B23">23</xref>&#x2013;<xref ref-type="bibr" rid="B25">25</xref>].</p>
<p>However, ensuring robustness against noise and perturbations remains a challenge in heterogeneous graph modeling. Recent advances in robust multimodal learning offer valuable methodological insights for fusing diverse data signals. Specifically, strategies such as adversarial alignment [<xref ref-type="bibr" rid="B26">26</xref>&#x2013;<xref ref-type="bibr" rid="B28">28</xref>] and negative information removal [<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B29">29</xref>] have demonstrated strong capability in mitigating noise. Furthermore, context-aware attention mechanisms [<xref ref-type="bibr" rid="B30">30</xref>] offer a principled way to refine feature weights. Recognizing that relation-specific views play a role analogous to modalities, and motivated by these robust mechanisms [<xref ref-type="bibr" rid="B31">31</xref>], ST-MVAN adapts such adaptive fusion concepts to resist view-specific noise and minimize aggregation bias.</p>
<p>While the aforementioned methods have advanced the field, ST-MVAN distinguishes itself in three key aspects. First, unlike the attention-window approach of AddGraph [<xref ref-type="bibr" rid="B14">14</xref>], ST-MVAN explicitly decouples heterogeneous interactions into distinct views, mitigating semantic interference across relation types. Second, compared with THGNN [<xref ref-type="bibr" rid="B21">21</xref>], our framework incorporates relation-aware sparse attention with edge-attribute bias, where Sparsemax enables adaptive neighbor pruning and improved resilience to structural camouflage noise. Finally, addressing the scalability limitations of Transformer-based dynamic graph models [<xref ref-type="bibr" rid="B15">15</xref>], ST-MVAN leverages a streamlined Bi-GRU with connectivity-restricted sparse aggregation to efficiently capture long-range evolutionary dependencies on large-scale sparse networks. Collectively, these spatial and temporal refinements enhance robustness against structural perturbations and dynamic network evolution.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Construction of the ST-MVAN model</title>
<sec id="s3-1">
<label>3.1</label>
<title>Overall framework of the model</title>
<p>This paper presents a deep graph neural network architecture built upon Spatio-Temporal Multi-View Attention designated ST-MVAN to target the complex nature and heterogeneous characteristics of interaction patterns in dynamic graph structures. The architecture realizes accurate recognition of anomalous interaction patterns within an unsupervised learning framework by capturing spatial correlation features and temporal evolutionary regularities of nodes under multi-dimensional perspectives.</p>
<p>ST-MVAN takes an encoder-decoder framework (as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>). The encoder harmonizes heterogeneous feature representations through feature projection, integrates neighboring node data across distinct relation perspectives via a sparse attention module with edge feature properties, adaptively amalgamates multi-perspective feature sets using an ECA mechanism, and leverages a bidirectional GRU to capture temporal dynamics for generating ultimate spatio-temporal node embeddings. The decoder uses such embeddings to jointly recover network topological structure and interaction feature attributes measuring anomaly level based on reconstruction residual values.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Architecture of the ST-MVAN model.</p>
</caption>
<graphic xlink:href="fphy-14-1786937-g002.tif">
<alt-text content-type="machine-generated">Diagram of a complex neural network model for anomaly detection in social graphs, showing modules for decoupling heterogeneous graph data, feature embedding, multi-head sparse attention, adaptive fusion, and a bidirectional GRU network ending in anomaly scoring.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Multi-view sparse attention encoder</title>
<p>To decouple spatial dependencies of different patterns from complex network interactions, the ST-MVAN model first formally defines dynamic graph snapshots and performs multi-view decomposition, followed by the alignment of heterogeneous features.</p>
<sec id="s3-2-1">
<label>3.2.1</label>
<title>Dynamic graph definition and multi-view subgraph construction</title>
<p>We define a dynamic graph as a sequence of temporal snapshots <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Here, the network snapshot at any arbitrary time <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is denoted as <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="script">V</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">attr</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi mathvariant="script">V</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the set containing <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> nodes, <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the set of interaction edges at time <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the node feature matrix, and <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">attr</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> contains the attribute features of all edges.</p>
<p>Considering that multiple types of interaction relations often exist between nodes in real-world networks, we define the set of all possible interaction relation types as <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mi mathvariant="script">R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. To capture the topological structure under specific relations, the model decomposes the global snapshot into <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> independent relation view subgraphs, as shown in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>:<disp-formula id="e1">
<mml:math id="m12">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>For the <inline-formula id="inf12">
<mml:math id="m13">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th view subgraph <inline-formula id="inf13">
<mml:math id="m14">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="script">V</mml:mi>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, its node set is shared with the original graph, but the edge set only contains interaction edges belonging to relation type <inline-formula id="inf14">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Formally, it is defined as <xref ref-type="disp-formula" rid="e2">Equation 2</xref>:<disp-formula id="e2">
<mml:math id="m16">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>&#x3c8;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf15">
<mml:math id="m17">
<mml:mrow>
<mml:mi>&#x3c8;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the relation type mapping function. Specifically, these views are defined by different interaction types. This decomposition strategy enables the model to learn spatial embeddings of nodes specifically for each distinct interaction pattern, avoiding mutual interference between different relational features.</p>
</sec>
<sec id="s3-2-2">
<label>3.2.2</label>
<title>Feature extraction and alignment initialization</title>
<p>Social network data shows heterogeneous multi-modal properties including unstructured text-based semantic features, structured statistical user attributes, and attribute characteristics of interactive edges; we map both node and edge information into a latent space <inline-formula id="inf16">
<mml:math id="m18">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">model</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> of identical dimensionality to integrate such heterogeneous information into an integrated deep learning framework.</p>
<p>Original node features comprise two components, semantic and statistical features, to fully capture comprehensive user characteristic profiles. For the semantic feature component, users&#x2019; historical behavior logs hold abundant text-based content reflecting personal interest tendencies and latent behavioral intentions. We extract and concatenate core textual content linked to user behavioral activities, specifically article titles and review content. We adopt the pre-trained RoBERTa-base model to encode these text sequences. Input sequences are truncated to a maximum length of 128 tokens. To ensure training efficiency, the parameters of RoBERTa are frozen to serve as a static feature extractor. Finally, we implement mean pooling on the output word vectors of the last hidden layer to yield a 768-dimensional feature vector <inline-formula id="inf17">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">sem</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> containing complete deep semantic information.</p>
<p>Regarding statistical features, this work chooses structural metrics representing user impact and activity degrees: social connectivity gauges node connection scope while cumulative interaction count gauges general activity degree. We assemble these quantitative data into a vector, and then conduct log normalization to derive the statistical feature vector <inline-formula id="inf18">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">stat</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>To fuse these two types of information, the model concatenates them and maps them to the initial hidden state <inline-formula id="inf19">
<mml:math id="m21">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> of the node through a learnable linear projection layer, as shown in <xref ref-type="disp-formula" rid="e3">Equation 3</xref>:<disp-formula id="e3">
<mml:math id="m22">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">node</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">sem</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">stat</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">node</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf20">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">node</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf21">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">node</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the weight matrix and bias term respectively, <inline-formula id="inf22">
<mml:math id="m25">
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the concatenation operation, and <inline-formula id="inf23">
<mml:math id="m26">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> employs the ReLU activation function. At this point, <inline-formula id="inf24">
<mml:math id="m27">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> serves as the node feature input to the graph neural network.</p>
<p>Edge features are used to characterize the attributes of interactions. For an edge <inline-formula id="inf25">
<mml:math id="m28">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, interaction frequency and interaction timestamp are selected as raw features. The normalized frequency value is concatenated with the time feature encoded by a sinusoidal function, and then mapped to an edge embedding <inline-formula id="inf26">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> of the same dimension as the node feature via a Multi-Layer Perceptron (MLP), as shown in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>:<disp-formula id="e4">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>MLP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">edge</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:mtext>TimeEnc</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
</sec>
<sec id="s3-2-3">
<label>3.2.3</label>
<title>Sparse graph attention aggregation with edge attribute bias</title>
<p>To efficiently utilize network topology and edge attribute data during node representation updating, this paper proposes a sparse graph attention mechanism integrated with edge attribute bias; unlike conventional GCNs conducting average aggregation of neighboring nodes, this mechanism employs a multi-head attention mechanism to dynamically allocate weights based on node feature similarity and edge attributes.</p>
<p>Let the input node feature of the <inline-formula id="inf27">
<mml:math id="m31">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>-th layer be <inline-formula id="inf28">
<mml:math id="m32">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. Under a specific relation view <inline-formula id="inf29">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, for the <inline-formula id="inf30">
<mml:math id="m34">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th attention head, the model first maps the source node <inline-formula id="inf31">
<mml:math id="m35">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and its neighbor <inline-formula id="inf32">
<mml:math id="m36">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to independent feature subspaces to calculate the query vector and key vectorr, as shown in <xref ref-type="disp-formula" rid="e5">Equation 5</xref>:<disp-formula id="e5">
<mml:math id="m37">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:msubsup>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf33">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf34">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are learnable linear projection matrices. The model incorporates the obtained edge embedding <inline-formula id="inf35">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> into the relevance metric. This paper adopts an additive bias strategy, utilizing a nonlinear transformation function <inline-formula id="inf36">
<mml:math id="m41">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> to map the high-dimensional edge embedding to a scalar bias, which is applied to the calculation process of the attention score, as shown in <xref ref-type="disp-formula" rid="e6">Equation 6</xref>:<disp-formula id="e6">
<mml:math id="m42">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">head</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>In the equation, the first term characterizes the association intensity of nodes in the feature space through a scaled dot product; the second term introduces edge attributes as a bias to correct this association intensity.</p>
<p>To filter out massive redundant connections and invalid information in the network structure, this paper adopts the Sparsemax activation function instead of the traditional Softmax. Sparsemax can truncate the weights of low-relevance neighbors to zero, thereby generating sparse and robust normalized weights, as shown in <xref ref-type="disp-formula" rid="e7">Equation 7</xref>:<disp-formula id="e7">
<mml:math id="m43">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mtext>Sparsemax</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>Finally, weighted aggregation is performed on the neighbor value vectors based on these sparse weights. To promote gradient propagation and training stability in deep networks, the model introduces multi-head concatenation, residual connections, and layer normalization mechanisms to obtain the updated node representation <inline-formula id="inf37">
<mml:math id="m44">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> under the current view, as shown in <xref ref-type="disp-formula" rid="e8">Equation 8</xref>:<disp-formula id="e8">
<mml:math id="m45">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>LayerNorm</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf38">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is used to fuse the feature information output by multi-head attention.</p>
</sec>
<sec id="s3-2-4">
<label>3.2.4</label>
<title>Multi-view adaptive fusion based on ECA</title>
<p>Since different types of interaction views contribute differently to defining node behavior patterns, simple averaging or concatenation cannot distinguish the importance of views. Therefore, this paper introduces Efficient Channel Attention (ECA) to achieve efficient fusion of multi-view features.</p>
<p>First, we concatenate the node representations <inline-formula id="inf39">
<mml:math id="m47">
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> under <inline-formula id="inf40">
<mml:math id="m48">
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> views to form a multi-channel feature stack <inline-formula id="inf41">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">stack</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">model</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. The module treats each view as an independent channel. It first aggregates the global information of each channel through Global Average Pooling (GAP), then captures local interaction information across channels using 1D convolution, and finally generates normalized weights <inline-formula id="inf42">
<mml:math id="m50">
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for each view through a Sigmoid function, as shown in <xref ref-type="disp-formula" rid="e9">Equation 9</xref>:<disp-formula id="e9">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
<mml:mn>1</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">stack</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>After obtaining the weights, we perform a weighted summation of the representations from different views. To further fuse features and compress redundant information, the weighted features are reduced in dimension through a fully connected layer to obtain the final spatial feature representation <inline-formula id="inf43">
<mml:math id="m52">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">spatial</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> at time <inline-formula id="inf44">
<mml:math id="m53">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, as shown in <xref ref-type="disp-formula" rid="e10">Equation 10</xref>:<disp-formula id="e10">
<mml:math id="m54">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">spatial</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">fuse</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">fuse</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Bidirectional temporal evolution modeling</title>
<p>After processing by the spatial multi-view attention module, the aggregated spatial feature representation of nodes at each moment is obtained. To capture the dynamic evolution patterns of user behavior in the temporal dimension, the model inputs the sequence of spatial features within a continuous time window into a bidirectional GRU for temporal modeling.</p>
<p>Compared with LSTM, GRU retains the ability to capture long-range dependencies with fewer parameters and higher computational efficiency, making it more suitable for processing large-scale dynamic graph data. To comprehensively assess a node&#x2019;s state at time <inline-formula id="inf45">
<mml:math id="m55">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, ST-MVAN uses a bidirectional GRU to capture both historical context and future evolution trends simultaneously. Formally we abstract the GRU&#x2019;s internal gate update mechanism as a nonlinear transformation function. The model executes forward and backward propagation in parallel. Forward evolution transmits past information to the future capturing the cumulative impact of historical behavior sequences on the current state, as shown in <xref ref-type="disp-formula" rid="e11">Equation 11</xref>.<disp-formula id="e11">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo>&#x20d7;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>R</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>U</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">fwd</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo>&#x20d7;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>Backward evolution passes information from the future to the past, utilizing observed data from subsequent moments to assist in judging the potential intent of the current behavior, as shown in <xref ref-type="disp-formula" rid="e12">Equation 12</xref>.<disp-formula id="e12">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>R</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>U</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">bwd</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>where <inline-formula id="inf46">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo>&#x20d7;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the forward and backward hidden states at time <inline-formula id="inf47">
<mml:math id="m59">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, respectively. Finally, the hidden states from these two directions are concatenated to obtain the spatio-temporal embedding representation <inline-formula id="inf48">
<mml:math id="m60">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> of the node at time <inline-formula id="inf49">
<mml:math id="m61">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, as shown in <xref ref-type="disp-formula" rid="e13">Equation 13</xref>:<disp-formula id="e13">
<mml:math id="m62">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo>&#x20d7;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
</p>
<p>This representation <inline-formula id="inf50">
<mml:math id="m63">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> fuses the spatial structural features under multi-views as well as the temporal evolution laws.</p>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>Anomaly detection based on dual reconstruction</title>
<p>ST-MVAN follows a self-supervised learning paradigm. The core assumption is that anomalous behaviors deviate from the latent spatio-temporal evolution laws of the network, thereby leading to higher reconstruction errors. The model contains dual decoders for structure and attributes, quantifying the degree of anomaly by jointly optimizing reconstruction tasks to capture normal behavior patterns.</p>
<sec id="s3-4-1">
<label>3.4.1</label>
<title>Dual decoding and joint optimization</title>
<p>To simultaneously capture topological structure associations and content patterns of interaction behaviors, the decoder reconstructs the link existence probability <inline-formula id="inf51">
<mml:math id="m64">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and the edge attribute <inline-formula id="inf52">
<mml:math id="m65">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, respectively, as shown in <xref ref-type="disp-formula" rid="e14">Equation 14</xref>:<disp-formula id="e14">
<mml:math id="m66">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">dec</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>where <inline-formula id="inf53">
<mml:math id="m67">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the Sigmoid function, and <inline-formula id="inf54">
<mml:math id="m68">
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the concatenation operation.</p>
<p>Model training aims to minimize the joint reconstruction loss. To prevent the model from predicting edges for all node pairs, a negative sampling strategy is introduced in the structure reconstruction task. For each moment <inline-formula id="inf55">
<mml:math id="m69">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, in addition to focusing on the set of existing positive sample edges, a set of negative sample edges <inline-formula id="inf56">
<mml:math id="m70">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">neg</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> of comparable size is constructed via random sampling. The structure reconstruction loss <inline-formula id="inf57">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">struct</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> adopts the binary cross-entropy loss function, aiming to maximize the likelihood probability of positive samples while minimizing that of negative samples, as shown in <xref ref-type="disp-formula" rid="e15">Equation 15</xref>:<disp-formula id="e15">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">struct</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">neg</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>For attribute reconstruction, the prediction error is calculated only within the set of real edges <inline-formula id="inf58">
<mml:math id="m73">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, adopting mean squared error to quantify the difference between the reconstructed attribute and the real attribute <inline-formula id="inf59">
<mml:math id="m74">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, as shown in <xref ref-type="disp-formula" rid="e16">Equation 16</xref>:<disp-formula id="e16">
<mml:math id="m75">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">attr</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
<p>The final total optimization objective is defined as the weighted sum of the above two parts, with an <inline-formula id="inf60">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> regularization term introduced to prevent overfitting, as shown in <xref ref-type="disp-formula" rid="e17">Equation 17</xref>:<disp-formula id="e17">
<mml:math id="m77">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">struct</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">attr</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>where <inline-formula id="inf61">
<mml:math id="m78">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the hyperparameter balancing structure and attribute losses, <inline-formula id="inf62">
<mml:math id="m79">
<mml:mrow>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents all learnable parameters of the model, and <inline-formula id="inf63">
<mml:math id="m80">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the regularization weight coefficient.</p>
</sec>
<sec id="s3-4-2">
<label>3.4.2</label>
<title>Anomaly score determination</title>
<p>In the testing phase, the model calculates the anomaly score for each interaction edge <inline-formula id="inf64">
<mml:math id="m81">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> based on the reconstruction difficulty. We perform a weighted fusion of structural anomaly and attribute anomaly, defining the final anomaly score as follows in <xref ref-type="disp-formula" rid="e18">Equation 18</xref>:<disp-formula id="e18">
<mml:math id="m82">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>where <inline-formula id="inf65">
<mml:math id="m83">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is used to adjust the sensitivity of both components. When <inline-formula id="inf66">
<mml:math id="m84">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> exceeds a preset threshold, the interaction is determined to be an anomalous behavior.</p>
</sec>
</sec>
<sec id="s3-5">
<label>3.5</label>
<title>Complexity analysis</title>
<p>To evaluate the scalability of the proposed ST-MVAN framework, we analyze the time complexity of its core components. Let <inline-formula id="inf67">
<mml:math id="m85">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denote the number of nodes, <inline-formula id="inf68">
<mml:math id="m86">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> the number of edges, <inline-formula id="inf69">
<mml:math id="m87">
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> the number of relational views, <inline-formula id="inf70">
<mml:math id="m88">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> the number of GNN layers, <inline-formula id="inf71">
<mml:math id="m89">
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> the number of attention heads, <inline-formula id="inf72">
<mml:math id="m90">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> the hidden feature dimension, and <inline-formula id="inf73">
<mml:math id="m91">
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> the length of the temporal window.</p>
<p>For a single graph snapshot <inline-formula id="inf74">
<mml:math id="m92">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, the computational cost primarily stems from the spatial encoder and the reconstruction decoder. In the encoder, unlike standard Transformers with <inline-formula id="inf75">
<mml:math id="m93">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> complexity, our sparse attention mechanism restricts computation to connected neighbors. Considering <inline-formula id="inf76">
<mml:math id="m94">
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> attention heads, the complexity for calculating attention coefficients and aggregating neighbors is <inline-formula id="inf77">
<mml:math id="m95">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Including the linear transformations <inline-formula id="inf78">
<mml:math id="m96">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and the negligible cost of ECA-based fusion, the spatial encoding complexity per snapshot is <inline-formula id="inf79">
<mml:math id="m97">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Furthermore, the reconstruction decoder, which is critical for anomaly detection, employs a negative sampling strategy. Instead of reconstructing the full adjacency matrix with <inline-formula id="inf80">
<mml:math id="m98">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> cost, we only compute scores for positive edges and a sampled set of negative edges <inline-formula id="inf81">
<mml:math id="m99">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">neg</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, resulting in a decoding complexity of <inline-formula id="inf82">
<mml:math id="m100">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">neg</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> per snapshot.</p>
<p>In the temporal dimension, the Bi-GRU captures evolutionary patterns over a window <inline-formula id="inf83">
<mml:math id="m101">
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. For each node, the complexity of updating hidden states at each time step is <inline-formula id="inf84">
<mml:math id="m102">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, resulting in a temporal complexity of <inline-formula id="inf85">
<mml:math id="m103">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> per snapshot. Summing the spatial encoder, decoder, and temporal components, the overall time complexity of ST-MVAN is expressed as <xref ref-type="disp-formula" rid="e19">Equation 19</xref>:<disp-formula id="e19">
<mml:math id="m104">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">total</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>O</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>K</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">neg</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<p>Since real-world social networks are typically sparse (i.e., <inline-formula id="inf86">
<mml:math id="m105">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mo>&#x226a;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>) and <inline-formula id="inf87">
<mml:math id="m106">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">neg</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is comparable to <inline-formula id="inf88">
<mml:math id="m107">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, the overall complexity remains linear with respect to the number of edges <inline-formula id="inf89">
<mml:math id="m108">
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi mathvariant="script">E</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and nodes <inline-formula id="inf90">
<mml:math id="m109">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. This confirms that ST-MVAN avoids the quadratic bottleneck of dense methods and maintains high efficiency and scalability for large-scale dynamic social networks.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experiments and analysis</title>
<sec id="s4-1">
<label>4.1</label>
<title>Datasets</title>
<p>To assess the proposed model&#x2019;s performance on dynamic multi-view graph anomaly detection tasks we adopted the Digg dataset. Digg is a representative news aggregation and social sharing platform allowing users to build social connections by following others and show interest in news via voting (namely the Digg function). This dataset includes 279,631 users with abundant topological structures and time-stamped interaction data well-suited for building heterogeneous multi-view networks. Given the original dataset&#x2019;s large size and numerous sparse nodes we kept only active users in the candidate pool to guarantee experimental efficiency and model stability while conducting user alignment.</p>
<p>To capture the heterogeneity of user interactions, we constructed relation-specific subgraphs as distinct interaction views for each dataset. For Digg, we constructed a Following view to model explicit social links, and a Co-voting view to capture implicit interest similarity, where edges connect users who voted on the same post. Similarly, for Yelp, we constructed a Friendship view representing user social connections, and a Co-reviewing view derived from shared reviewed businesses, where edges connect users who reviewed the same business.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Baselines and experimental settings</title>
<sec id="s4-2-1">
<label>4.2.1</label>
<title>Baseline methods</title>
<p>We compare ST-MVAN with the following four baseline methods:<list list-type="bullet">
<list-item>
<p>DeepWalk [<xref ref-type="bibr" rid="B12">12</xref>]: A typical graph embedding approach that regards random walk sequences as text segments and nodes as vocabulary entries; it adopts the Skip-Gram model to acquire latent node representations by maximizing the co-occurrence probability of nodes.</p>
</list-item>
<list-item>
<p>GraphSAGE [<xref ref-type="bibr" rid="B32">32</xref>]: An inductive architecture for graph representation learning; it produces node embeddings through sampling and aggregating features from a node&#x2019;s local neighboring area; this approach fully leverages node attributes and processes each graph snapshot separately.</p>
</list-item>
<list-item>
<p>NetWalk [<xref ref-type="bibr" rid="B13">13</xref>]: A dynamic network anomaly detection approach enabling incremental learning of network representations; it uses clique embedding technologies to update node representations in real time and adopts a streaming clustering method to identify anomalies based on reconstruction deviations; this approach effectively copes with dynamic structural variations.</p>
</list-item>
<list-item>
<p>AddGraph [<xref ref-type="bibr" rid="B14">14</xref>]: A robust end-to-end architecture for dynamic graph anomaly detection; it models structural patterns and short-term temporal dependencies simultaneously; this approach adopts an attention-based time window to capture evolution trends.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s4-2-2">
<label>4.2.2</label>
<title>Parameter settings</title>
<p>The experiments were built with the PyTorch framework (Python 3.8). The AdamW optimizer was chosen to boost generalization performance paired with a cosine annealing learning rate scheduler. The initial learning rate was set to 0.003 to enable fast convergence in early training phases and detailed parameter tuning in subsequent stages. Weight decay was set to <inline-formula id="inf91">
<mml:math id="m110">
<mml:mrow>
<mml:mn>5</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>7</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and dropout rate to 0.5 to avoid overfitting. The model&#x2019;s structure includes 2 GCN layers, 4 attention heads and a hidden layer dimension of 64; the model underwent 100 training epochs. The dataset was split in chronological order with the first 50% used as training data and the next 50% as testing data.</p>
<p>Since the datasets lack ground-truth anomalies, we employ an anomaly injection strategy to generate synthetic anomalies exclusively for the testing phase. Specifically, we treat the existing edges in the test set as normal samples. To generate structural anomalies, we randomly sample node pairs <inline-formula id="inf92">
<mml:math id="m111">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> from the node set that have no observed interaction in the original data and assign them random timestamps within the test interval to create non-existent edges, simulating users forming unusual connections. To generate attribute anomalies, we randomly perturb the interaction frequency or timestamp features of existing edges by adding noise to deviate from their normal distribution. These anomalies are injected at ratios of 1%, 5%, and 10% relative to the normal edges. All experiments were repeated 10 times independently. Specifically, for each run, we re-generated the injected anomalies using different random seeds to evaluate the model&#x2019;s robustness against data variations. Within each specific run, the same set of anomalies was applied to all baseline methods to ensure a fair comparison. We report the mean AUC and standard deviation over these repeated runs to assess result stability.</p>
</sec>
<sec id="s4-2-3">
<label>4.2.3</label>
<title>Evaluation metrics</title>
<p>To conduct quantitative evaluation of the proposed model&#x2019;s performance we employ the Area Under the Receiver Operating Characteristic Curve (AUC-ROC) as the core assessment index. This index offers a reliable gauge of the model&#x2019;s distinguishing capability with a higher value indicating stronger capacity to accurately differentiate abnormal interactions from normal ones.</p>
</sec>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Analysis of experimental results</title>
<sec id="s4-3-1">
<label>4.3.1</label>
<title>Robustness analysis under different anomaly ratios</title>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> presents the quantitative results (Mean <inline-formula id="inf93">
<mml:math id="m112">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> SD) of diverse models on the Digg and Yelp datasets under varying anomaly injection ratios. The experimental findings show that ST-MVAN attains the best AUC results across all settings, fully confirming the validity of the proposed framework. Moreover, the relatively low standard deviations observed across all settings indicate the high stability and robustness of ST-MVAN against experimental randomness. Taking the Digg dataset with 1% anomaly injection as an instance, ST-MVAN achieves an AUC of 87.89%, surpassing DeepWalk by around 17.09% and outperforming GraphSAGE by roughly 15.39%. This directly illustrates the vital role of capturing temporal dynamics in anomaly detection. Against NetWalk, ST-MVAN maintains a distinct advantage, exceeding it by approximately 12.26% and 8.00% under the 1% setting on the Digg and Yelp datasets respectively. When compared with AddGraph, ST-MVAN outperforms it by 4.48%, 1.84%, and 2.19% under 1%, 5%, and 10% anomaly ratios on the Digg dataset. Similarly, on the Yelp dataset, ST-MVAN delivers performance improvements of 4.06%, 2.28%, and 1.68% respectively. Overall, these findings confirm that the ST-MVAN model exhibits superior detection capability and robustness.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Comparison of the AUC performance of different models on the Digg and Yelp datasets (Mean <inline-formula id="inf94">
<mml:math id="m113">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> SD).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Model</th>
<th colspan="3" align="center">Digg</th>
<th colspan="3" align="center">Yelp</th>
</tr>
<tr>
<th align="center">1%</th>
<th align="center">5%</th>
<th align="center">10%</th>
<th align="center">1%</th>
<th align="center">5%</th>
<th align="center">10%</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">DeepWalk</td>
<td align="center">0.7080 <inline-formula id="inf95">
<mml:math id="m114">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0154</td>
<td align="center">0.6881 <inline-formula id="inf96">
<mml:math id="m115">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0142</td>
<td align="center">0.6398 <inline-formula id="inf97">
<mml:math id="m116">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0168</td>
<td align="center">0.6243 <inline-formula id="inf98">
<mml:math id="m117">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0132</td>
<td align="center">0.6185 <inline-formula id="inf99">
<mml:math id="m118">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0157</td>
<td align="center">0.6054 <inline-formula id="inf100">
<mml:math id="m119">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0126</td>
</tr>
<tr>
<td align="left">GraphSAGE</td>
<td align="center">0.7250 <inline-formula id="inf101">
<mml:math id="m120">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0105</td>
<td align="center">0.7385 <inline-formula id="inf102">
<mml:math id="m121">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0092</td>
<td align="center">0.7120 <inline-formula id="inf103">
<mml:math id="m122">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0118</td>
<td align="center">0.7350 <inline-formula id="inf104">
<mml:math id="m123">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0086</td>
<td align="center">0.7409 <inline-formula id="inf105">
<mml:math id="m124">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0114</td>
<td align="center">0.7280 <inline-formula id="inf106">
<mml:math id="m125">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0097</td>
</tr>
<tr>
<td align="left">NetWalk</td>
<td align="center">0.7563 <inline-formula id="inf107">
<mml:math id="m126">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0082</td>
<td align="center">0.7176 <inline-formula id="inf108">
<mml:math id="m127">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0115</td>
<td align="center">0.6837 <inline-formula id="inf109">
<mml:math id="m128">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0096</td>
<td align="center">0.7524 <inline-formula id="inf110">
<mml:math id="m129">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0108</td>
<td align="center">0.7478 <inline-formula id="inf111">
<mml:math id="m130">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0085</td>
<td align="center">0.7396 <inline-formula id="inf112">
<mml:math id="m131">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0121</td>
</tr>
<tr>
<td align="left">AddGraph</td>
<td align="center">0.8341 <inline-formula id="inf113">
<mml:math id="m132">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0076</td>
<td align="center">0.8470 <inline-formula id="inf114">
<mml:math id="m133">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0064</td>
<td align="center">0.8369 <inline-formula id="inf115">
<mml:math id="m134">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0088</td>
<td align="center">0.7918 <inline-formula id="inf116">
<mml:math id="m135">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0095</td>
<td align="center">0.8037 <inline-formula id="inf117">
<mml:math id="m136">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0072</td>
<td align="center">0.7950 <inline-formula id="inf118">
<mml:math id="m137">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 0.0086</td>
</tr>
<tr>
<td align="left">Ours (ST-MVAN)</td>
<td align="center">
<bold>0.8789</bold> <inline-formula id="inf119">
<mml:math id="m138">
<mml:mrow>
<mml:mo mathvariant="bold">&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <bold>0.0079</bold>
</td>
<td align="center">
<bold>0.8654</bold> <inline-formula id="inf120">
<mml:math id="m139">
<mml:mrow>
<mml:mo mathvariant="bold">&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <bold>0.0053</bold>
</td>
<td align="center">
<bold>0.8588</bold> <inline-formula id="inf121">
<mml:math id="m140">
<mml:mrow>
<mml:mo mathvariant="bold">&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <bold>0.0064</bold>
</td>
<td align="center">
<bold>0.8324</bold> <inline-formula id="inf122">
<mml:math id="m141">
<mml:mrow>
<mml:mo mathvariant="bold">&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <bold>0.0084</bold>
</td>
<td align="center">
<bold>0.8265</bold> <inline-formula id="inf123">
<mml:math id="m142">
<mml:mrow>
<mml:mo mathvariant="bold">&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <bold>0.0069</bold>
</td>
<td align="center">
<bold>0.8118</bold> <inline-formula id="inf124">
<mml:math id="m143">
<mml:mrow>
<mml:mo mathvariant="bold">&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <bold>0.0063</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values indicate the best performance.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>To visually evaluate overall model performance under specific noise levels, <xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref> show ROC curve comparisons of all models at 5% anomaly injection ratio. All curves present a sharp upward slope attaining high True Positive Rate (TPR) while maintaining low False Positive Rate (FPR) and demonstrating strong anomaly ranking ability. Notably, ST-MVAN&#x2019;s ROC curve covers the largest area and lies mostly above all baseline models visually highlighting its edge in overall detection performance. Against AddGraph, ST-MVAN shows a notable advantage in the low FPR region with more distinct curvature. This trait enables ST-MVAN to detect more abnormal interactions with fewer false positives in real-world applications thus delivering greater practical utility.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Comparison of ROC curves between ST-MVAN and baseline models on the Digg dataset.</p>
</caption>
<graphic xlink:href="fphy-14-1786937-g003.tif">
<alt-text content-type="machine-generated">ROC curve chart compares five models and a random baseline for anomaly detection on Digg data with 5 percent anomaly ratio. Ours (ST-MVAN) has the highest AUC at 0.8654, followed by AddGraph at 0.8470, GraphSAGE at 0.7385, NetWalk at 0.7176, and DeepWalk at 0.6881. The x-axis is false positive rate and the y-axis is true positive rate.</alt-text>
</graphic>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Comparison of ROC curves between ST-MVAN and baseline models on the Yelp dataset.</p>
</caption>
<graphic xlink:href="fphy-14-1786937-g004.tif">
<alt-text content-type="machine-generated">ROC curve comparison graphic for Yelp anomaly detection at 5 percent anomaly ratio, showing the performance of DeepWalk, GraphSAGE, NetWalk, AddGraph, and Ours (ST-MVAN) methods. Ours (ST-MVAN) achieves the highest area under the curve at zero point eight two six five, indicating superior true positive rates across most false positive rates.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-3-2">
<label>4.3.2</label>
<title>Ablation studies</title>
<p>To further confirm the validity of core components in the ST-MVAN model we carried out ablation experiments on the Digg and Yelp datasets with a fixed 5% anomaly injection ratio. Experimental results presented in <xref ref-type="table" rid="T2">Table 2</xref> and <xref ref-type="fig" rid="F5">Figure 5</xref> show that the full ST-MVAN model achieved the best AUC performance on both datasets reaching 86.54% and 82.65% respectively. Comparative analysis indicates that removing the Bi-GRU module causes the most notable performance drop. This result fully proves that capturing long- and short-term dynamic evolution patterns of user behaviors is critical for identifying abnormal behaviors in social networks. Additionally replacing the multi-head attention mechanism with a traditional GCN adopting weighted average aggregation leads to the loss of edge attribute semantic information and the ability to adaptively assign neighbor weights. Also excluding the ECA channel attention prevents the model from adaptively balancing the importance of different relational views, thus undermining the robustness of multi-view feature fusion. Overall, experimental results clearly demonstrate that ST-MVAN&#x2019;s superior performance originates from the synergistic effect of refined spatial neighborhood feature aggregation, adaptive multi-view subgraph fusion, and bidirectional temporal modeling. This synergy allows the model to efficiently learn spatiotemporal distribution patterns of normal interactions and accurately identify abnormal behaviors deviating from these patterns.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Ablation study results on Digg and Yelp datasets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model variants</th>
<th align="center">Digg</th>
<th align="center">Yelp</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">With GCN aggregation</td>
<td align="center">0.8492</td>
<td align="center">0.8094</td>
</tr>
<tr>
<td align="left">Without ECA</td>
<td align="center">0.8578</td>
<td align="center">0.8185</td>
</tr>
<tr>
<td align="left">Without Bi-GRU</td>
<td align="center">0.8390</td>
<td align="center">0.7961</td>
</tr>
<tr>
<td align="left">Ours (ST-MVAN)</td>
<td align="center">
<bold>0.8654</bold>
</td>
<td align="center">
<bold>0.8265</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values indicate the best performance.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Visualization of ablation study results.</p>
</caption>
<graphic xlink:href="fphy-14-1786937-g005.tif">
<alt-text content-type="machine-generated">Bar chart comparing AUC values for Digg and Yelp datasets using four model variants: Without Bi-GRU, Without ECA, With GCN Aggregation, and Ours (ST-MVAN). Ours (ST-MVAN) achieves the highest AUC in both datasets.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-3-3">
<label>4.3.3</label>
<title>Sensitivity analysis</title>
<p>To further assess ST-MVAN&#x2019;s stability across varying training proportions we carried out sensitivity tests on the Digg dataset with fixed 10% anomaly injection ratio. We steadily decreased training proportion from 60% to 10% and documented AUC values for each timestamp in the testing phase.</p>
<p>From the experimental results in <xref ref-type="fig" rid="F6">Figure 6</xref>, it is evident that as the training set proportion drops from <inline-formula id="inf125">
<mml:math id="m144">
<mml:mrow>
<mml:mn>60</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf126">
<mml:math id="m145">
<mml:mrow>
<mml:mn>10</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> the median and maximum AUC values of the model do not decline with reduced training data instead showing steady upward tendency. This is because the training set consists exclusively of normal samples and the relative quantity of anomalous edge samples in the testing environment increases with the rising testing proportion. This data partitioning shift enables the model to better capture distribution differences between positive and negative samples during inference enhancing discriminability of anomaly scores and lifting detection performance upper limit. Yet when training data is reduced to <inline-formula id="inf127">
<mml:math id="m146">
<mml:mrow>
<mml:mn>10</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, boxplot morphology changes noticeably with expanded vertical span and decreased minimum AUC value indicating that data scarcity increases model training uncertainty and fluctuation impairing detection result stability to some extent. Even so, ST-MVAN maintains highly competitive average AUC level with median exceeding that of high training proportion settings under 10% training data. This fully verifies the proposed framework&#x2019;s excellent feature capture ability and robustness in label-scarce or weakly supervised scenarios effectively addressing data limitation challenges in dynamic graph anomaly detection.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Boxplot of AUC results on the Digg dataset under different training ratios.</p>
</caption>
<graphic xlink:href="fphy-14-1786937-g006.tif">
<alt-text content-type="machine-generated">Box plot comparing AUC values across six training ratios ranging from sixty percent to ten percent. Each box shows variation in AUC, with generally higher medians and wider variability as the training ratio decreases.</alt-text>
</graphic>
</fig>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>Tackling issues related to anomalous interaction in dynamic social networks, including strong concealment, intricate evolution characteristics, and lack of labeled data, this paper presents an end-to-end deep learning detection architecture named ST-MVAN. This model creatively develops an attention mechanism integrating edge attribute bias and ECA channel weighting module realizing refined feature fusion for multi-view heterogeneous subgraphs. By combining Bi-GRU and Encoder-Decoder self-supervised reconstruction architecture, the model successfully captures long- and short-term temporal dependencies of user behaviors allowing accurate detection of anomalous edges in an unsupervised setting. Extensive experiments conducted on two real-world datasets, Digg and Yelp, have fully validated the effectiveness and robustness of ST-MVAN. Experimental results demonstrate that the proposed model significantly outperforms mainstream baseline methods, such as NetWalk and AddGraph, in terms of detection accuracy. Through ablation studies, we confirmed that biased attention aggregation, multi-view adaptive fusion, and bidirectional temporal modeling are critical components for enhancing model performance. Additionally, the model exhibited satisfactory stability in the sensitivity analysis.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>JW: Supervision, Methodology, Validation, Conceptualization, Writing &#x2013; review and editing, Data curation, Investigation, Software, Writing &#x2013; original draft, Formal Analysis, Resources, Visualization, Project administration.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xing</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X</given-names>
</name>
</person-group>. <article-title>A survey on social network&#x2019;s anomalous behavior detection</article-title>. <source>Complex and Intell Syst</source> (<year>2024</year>) <volume>10</volume>:<fpage>5917</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1007/s40747-024-01446-8</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Mu</surname>
<given-names>Z</given-names>
</name>
</person-group>. <article-title>Adversarial machine learning on social network: a survey</article-title>. <source>Front Phys</source> (<year>2021</year>) <volume>9</volume>:<fpage>766540</fpage>. <pub-id pub-id-type="doi">10.3389/fphy.2021.766540</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Sheng</surname>
<given-names>QZ</given-names>
</name>
<etal/>
</person-group> <article-title>A comprehensive survey on graph anomaly detection with deep learning</article-title>. <source>IEEE Transact Knowledge Data Eng.</source> (<year>2021</year>) <volume>35</volume>:<fpage>12012</fpage>&#x2013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1109/tkde.2021.3118815</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X</given-names>
</name>
<etal/>
</person-group> <article-title>A review of key technologies for emotion analysis using multimodal information</article-title>. <source>Cogn Comput</source> (<year>2024</year>) <volume>16</volume>:<fpage>1504</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1007/s12559-024-10287-z</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Cambria</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H</given-names>
</name>
<etal/>
</person-group> <article-title>A client&#x2013;server based recognition system: non-contact single/multiple emotional and behavioral state assessment methods</article-title>. <source>Comp Methods Programs Biomed</source> (<year>2025</year>) <volume>260</volume>:<fpage>108564</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2024.108564</pub-id>
<pub-id pub-id-type="pmid">39732086</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R</given-names>
</name>
</person-group> <article-title>Emotion recognition based on brain-like multimodal hierarchical perception</article-title>. <source>Multimedia Tools Appl</source> (<year>2024</year>) <volume>83</volume>:<fpage>56039</fpage>&#x2013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-023-17347-w</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Cambria</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y</given-names>
</name>
<etal/>
</person-group> <article-title>Contrastive-based removal of negative information in multimodal emotion analysis</article-title>. <source>Cogn Comput</source> (<year>2025</year>) <volume>17</volume>:<fpage>107</fpage>. <pub-id pub-id-type="doi">10.1007/s12559-025-10463-9</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>B</given-names>
</name>
</person-group> <article-title>Semi-supervised learning with graph learning-convolutional networks</article-title>. In: <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source> (<year>2019</year>). p. <fpage>11313</fpage>&#x2013;<lpage>20</lpage>.</mixed-citation>
</ref>
<ref id="B9">
<label>9.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Veli&#x10d;kovi&#x107;</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Cucurull</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Casanova</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Romero</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Lio</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Graph attention networks</article-title> (<year>2018</year>) <volume>6</volume>:<fpage>2</fpage>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://openreview.net/forum?id=rJXMpikCZ">https://openreview.net/forum?id&#x3d;rJXMpikCZ</ext-link>
</comment>
</mixed-citation>
</ref>
<ref id="B10">
<label>10.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Bhanushali</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H</given-names>
</name>
</person-group>. <article-title>Deep anomaly detection on attributed networks</article-title>. In: <source>Proceedings of the 2019 SIAM international conference on data mining (SIAM)</source> (<year>2019</year>). p. <fpage>594</fpage>&#x2013;<lpage>602</lpage>.</mixed-citation>
</ref>
<ref id="B11">
<label>11.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Yoon</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Hooi</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Shin</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Faloutsos</surname>
<given-names>C</given-names>
</name>
</person-group>. <article-title>Fast and accurate anomaly detection in dynamic graphs with a two-pronged approach</article-title>. In: <source>Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery and data mining</source> (<year>2019</year>). p. <fpage>647</fpage>&#x2013;<lpage>57</lpage>.</mixed-citation>
</ref>
<ref id="B12">
<label>12.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Perozzi</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Al-Rfou</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Skiena</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Deepwalk: online learning of social representations</article-title>. In: <source>Proceedings of the 20th ACM SIGKDD international conference on knowledge discovery and data mining</source> (<year>2014</year>). p. <fpage>701</fpage>&#x2013;<lpage>10</lpage>.</mixed-citation>
</ref>
<ref id="B13">
<label>13.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Aggarwal</surname>
<given-names>CC</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W</given-names>
</name>
</person-group>. <article-title>Netwalk: a flexible deep embedding approach for anomaly detection in dynamic networks</article-title>. In: <source>Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery and data mining</source> (<year>2018</year>). p. <fpage>2672</fpage>&#x2013;<lpage>81</lpage>.</mixed-citation>
</ref>
<ref id="B14">
<label>14.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Addgraph: anomaly detection in dynamic graph using attention-based temporal gcn</article-title>. <source>IJCAI</source> (<year>2019</year>). <fpage>4419</fpage>&#x2013;<lpage>4425</lpage>. <pub-id pub-id-type="doi">10.24963/ijcai.2019/614</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>YG</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Q</given-names>
</name>
<etal/>
</person-group> <article-title>Anomaly detection in dynamic graphs via transformer</article-title>. <source>IEEE Trans Knowledge Data Eng</source> (<year>2021</year>) <volume>35</volume>:<fpage>12081</fpage>&#x2013;<lpage>94</lpage>. <pub-id pub-id-type="doi">10.1109/tkde.2021.3124061</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Mou</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Dynamic heterogeneous information network embedding with meta-path based proximity</article-title>. <source>IEEE Trans Knowl Data Eng</source> (<year>2020</year>) <volume>34</volume>:<fpage>1117</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1109/tkde.2020.2993870</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>P</given-names>
</name>
<etal/>
</person-group> <article-title>Heterogeneous graph attention network</article-title>. <source>World Wide Web Conference</source> (<year>2019</year>) <fpage>2022</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1145/3308558.3313562</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Heterogeneous graph structure learning for graph neural networks</article-title>. <source>Proc AAAI Conference Artificial Intelligence</source> (<year>2021</year>) <volume>35</volume>:<fpage>4697</fpage>&#x2013;<lpage>705</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v35i5.16600</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<label>19.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alam</surname>
<given-names>MT</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>CF</given-names>
</name>
<name>
<surname>Leung</surname>
<given-names>CK</given-names>
</name>
</person-group>. <article-title>Hyperedge anomaly detection with hypergraph neural network</article-title>. <source>arXiv preprint arXiv:2412.05641</source>. (<year>2024</year>).</mixed-citation>
</ref>
<ref id="B20">
<label>20.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>R</given-names>
</name>
</person-group>. <article-title>Modeling heterogeneous graph network on fraud detection: a community-based framework with attention mechanism</article-title>. In: <source>Proceedings of the 30th ACM international conference on information and knowledge management</source> (<year>2021</year>). p. <fpage>1959</fpage>&#x2013;<lpage>68</lpage>.</mixed-citation>
</ref>
<ref id="B21">
<label>21.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>Y</given-names>
</name>
<etal/>
</person-group> <article-title>Thgnn: an embedding-based model for anomaly detection in dynamic heterogeneous social networks</article-title>. In: <source>Proceedings of the 32nd ACM international conference on information and knowledge management</source> (<year>2023</year>). p. <fpage>1368</fpage>&#x2013;<lpage>78</lpage>.</mixed-citation>
</ref>
<ref id="B22">
<label>22.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>YF</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Anemone: graph anomaly detection with multi-scale contrastive learning</article-title>. In: <source>Proceedings of the 30th ACM international conference on information and knowledge management</source> (<year>2021</year>). p. <fpage>3122</fpage>&#x2013;<lpage>6</lpage>.</mixed-citation>
</ref>
<ref id="B23">
<label>23.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Derr</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Node similarity preserving graph convolutional networks</article-title>. In: <source>Proceedings of the 14th ACM international conference on web search and data mining</source> (<year>2021</year>). p. <fpage>148</fpage>&#x2013;<lpage>56</lpage>.</mixed-citation>
</ref>
<ref id="B24">
<label>24.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Dou</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>PS</given-names>
</name>
</person-group>. <article-title>Enhancing graph neural network-based fraud detectors against camouflaged fraudsters</article-title>. In: <source>Proceedings of the 29th ACM international conference on information and knowledge management</source> (<year>2020</year>). p. <fpage>315</fpage>&#x2013;<lpage>24</lpage>.</mixed-citation>
</ref>
<ref id="B25">
<label>25.</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Ao</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Chi</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>H</given-names>
</name>
<etal/>
</person-group> <article-title>Pick and choose: a gnn-based imbalanced learning approach for fraud detection</article-title>. In: <source>Proceedings of the web conference 2021</source> (<year>2021</year>). p. <fpage>3168</fpage>&#x2013;<lpage>77</lpage>.</mixed-citation>
</ref>
<ref id="B26">
<label>26.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Cambria</surname>
<given-names>E</given-names>
</name>
</person-group>. <article-title>Integrating audio&#x2013;visual text generation with contrastive learning for enhanced multimodal emotion analysis</article-title>. <source>Inf Fusion</source> (<year>2025</year>) <volume>127</volume>:<fpage>103809</fpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2025.103809</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<label>27.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Cascone</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>J</given-names>
</name>
<etal/>
</person-group> <article-title>Raft: robust adversarial fusion transformer for multimodal sentiment analysis</article-title>. <source>Array</source> (<year>2025</year>) <volume>27</volume>:<fpage>100445</fpage>. <pub-id pub-id-type="doi">10.1016/j.array.2025.100445</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<label>28.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Cambria</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Rida</surname>
<given-names>I</given-names>
</name>
<name>
<surname>L&#xf3;pez</surname>
<given-names>JS</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>L</given-names>
</name>
<etal/>
</person-group> <article-title>Rmer-dt: robust multimodal emotion recognition in conversational contexts based on diffusion and transformers</article-title>. <source>Inf Fusion</source> (<year>2025</year>) <volume>123</volume>:<fpage>103268</fpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2025.103268</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<label>29.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Rida</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>X</given-names>
</name>
</person-group>. <article-title>A generative random modality dropout framework for robust multimodal emotion recognition</article-title>. <source>IEEE Intell Syst</source> (<year>2025</year>) <volume>40</volume>:<fpage>62</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1109/mis.2025.3597120</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<label>30.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Shabaz</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Rida</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Cambria</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>X</given-names>
</name>
</person-group>. <article-title>Cime: contextual interaction-based multimodal emotion analysis with enhanced semantic information</article-title>. <source>IEEE Trans Comput Social Syst</source> (<year>2025</year>). <fpage>1</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1109/TCSS.2025.3572495</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<label>31.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Cambria</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Ju</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>H</given-names>
</name>
<etal/>
</person-group> <article-title>Emvas: end-to-end multimodal emotion visualization analysis system</article-title>. <source>Complex Intell Syst</source> (<year>2025</year>) <volume>11</volume>:<fpage>1</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1007/s40747-025-01931-8</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<label>32.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hamilton</surname>
<given-names>W</given-names>
</name>
<name>
<surname>Ying</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Leskovec</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Inductive representation learning on large graphs</article-title>. <source>Adv Neural Inform Process Syst</source> (<year>2017</year>) <volume>30</volume>:<fpage>1025</fpage>&#x2013;<lpage>1035</lpage>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/10.5555/3294771.3294869">https://dl.acm.org/doi/10.5555/3294771.3294869</ext-link>.</comment>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2116069/overview">Amin Ul Haq</ext-link>, University of Electronic Science and Technology of China, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3200084/overview">Syed Mohd. Faisal</ext-link>, Malla Reddy University, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3223935/overview">Xianxun Zhu</ext-link>, Shanghai University, China</p>
</fn>
</fn-group>
</back>
</article>