<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Public Health</journal-id>
<journal-title-group>
<journal-title>Frontiers in Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Public Health</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-2565</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpubh.2025.1663871</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Evaluating sentiment analysis models in healthcare: addressing bias and enhancing interpretability</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Chenxu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Miao</surname> <given-names>Zhuang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zeng</surname> <given-names>Haoran</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3130646"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Emergency, First Clinical Medical College, Nanchang University</institution>, <city>Nanchang</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Inner Mongolia University of Technology</institution>, <city>Hohhot</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Zhuang Miao, <email xlink:href="mailto:qymq2743@outlook.com">qymq2743@outlook.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-19">
<day>19</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>13</volume>
<elocation-id>1663871</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>01</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Wang, Miao and Zeng.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Wang, Miao and Zeng</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-19">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Advancing trustworthy AI applications in healthcare necessitates systems that are not only high-performing but also capable of explaining decisions and addressing biases, particularly in critical tasks like sentiment analysis on clinical narratives and patient feedback. Conventional sentiment analysis methods, while effective in general applications, struggle with domain shift, linguistic variability, and ambiguous labeling in healthcare, limiting their interpretability and fairness in clinical contexts. To overcome these limitations, a novel sentiment analysis framework is proposed to improve both accuracy and interpretability.</p></sec>
<sec>
<title>Methods</title>
<p>This framework employs a formal probabilistic modeling approach that incorporates fine-grained sentiment granularity and domain-aware priors. Central to the framework is the Sentiment Modulated Encoding Network (SMEN), a transformer-based architecture featuring a gating mechanism that dynamically enhances sentiment-relevant features across network layers, enabling rich sentiment representation learning without external resources. Additionally, the Context Polarity Decoupling Scheme (CPDS) disentangles sentiment from domain-specific artifacts through a multi-stage adversarial and contrastive training process, accompanied by a polarity explanation module that provides token-level interpretability.</p></sec>
<sec>
<title>Results and Discussion</title>
<p>Together, SMEN and CPDS form a robust system capable of producing domain-invariant and explainable sentiment predictions. Experimental results on multiple healthcare datasets demonstrate superior generalization and more transparent model attributions compared to existing approaches. This research contributes to the development of explainable and bias-resistant AI tools for healthcare and highlights potential avenues for interdisciplinary exploration at the interface of affective computing and clinical informatics.</p></sec></abstract>
<kwd-group>
<kwd>explainable sentiment analysis</kwd>
<kwd>healthcare narratives</kwd>
<kwd>Sentiment Modulated Encoding Network</kwd>
<kwd>Context Polarity Decoupling Scheme</kwd>
<kwd>bias-resistant AI</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare that no financial support was received for the research and/or publication of this article.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="7"/>
<equation-count count="22"/>
<ref-count count="47"/>
<page-count count="16"/>
<word-count count="11022"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Digital Public Health</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Sentiment analysis plays an increasingly vital role in healthcare applications, particularly in understanding patient feedback, mental health documentation, and clinical narratives. In this work, we argue that the complexity of medical language and the sensitivity of clinical data necessitate more rigorous evaluation of existing sentiment models in terms of reliability and fairness. I contend that beyond achieving high classification accuracy, sentiment analysis systems must demonstrate strong generalization across diverse patient populations and heterogeneous clinical contexts. Given the high-stakes nature of healthcare decision-making, we further emphasize the importance of improving interpretability to enhance clinical trust and support actionable insights. I also highlight that biases&#x02014;arising from data imbalance, annotation subjectivity, or model architectural choices&#x02014;can lead to significant misinterpretations of sentiment, ultimately affecting patient care. As noted by Miah et al. (<xref ref-type="bibr" rid="B1">1</xref>), the task of sentiment analysis in healthcare extends beyond improving model performance, requiring alignment with ethical standards and practical utility within clinical environments.</p>
<p>Initial approaches to sentiment analysis in healthcare focused on manually designed systems that mapped linguistic patterns to predefined emotional categories. These systems relied on structured methodologies for interpreting text, offering clarity and consistency in their outputs. While these approaches provided valuable insights, their dependency on rigid frameworks limited adaptability to diverse medical contexts. Furthermore, capturing the subtleties of patient language often proved challenging, given the static nature of predefined mappings. As pointed out by Zhang et al. (<xref ref-type="bibr" rid="B2">2</xref>), despite their inherent transparency, traditional methods in sentiment analysis have been criticized for lacking scalability and for their limited ability to handle the linguistic variability prevalent in healthcare-related texts.</p>
<p>Building upon earlier transparent methods, subsequent advancements introduced statistical models capable of learning sentiment patterns from annotated healthcare datasets. These models leveraged algorithms to uncover correlations between textual features and sentiment labels, resulting in improved flexibility and predictive accuracy. However, as Zeb et al. (<xref ref-type="bibr" rid="B3">3</xref>) highlighted, such approaches heavily depend on large-scale labeled datasets&#x02014;an obstacle in healthcare settings where data scarcity and privacy concerns often prevail. Moreover, despite their statistical rigor, these models frequently lack intuitive mechanisms to explain their predictions, which limits their adoption by clinicians and healthcare practitioners. While interpretability techniques like feature importance analysis offer partial transparency, the intricate and context-dependent nature of sentiment in clinical narratives often surpasses the explanatory capacity of these models (<xref ref-type="bibr" rid="B4">4</xref>).</p>
<p>In recent years, the emergence of advanced neural architectures has significantly reshaped the landscape of sentiment analysis in healthcare. As reviewed by Li et al. (<xref ref-type="bibr" rid="B5">5</xref>), these innovations have been particularly impactful due to their ability to handle complex medical language and heterogeneous clinical data. Pre-trained language models, trained on large-scale biomedical corpora, demonstrate strong capabilities in capturing subtle linguistic cues and contextual dependencies. According to Esmaeilzadeh et al. (<xref ref-type="bibr" rid="B6">6</xref>), such models offer impressive generalization across diverse healthcare applications, positioning them as versatile tools for sentiment classification tasks. However, as Li et al. (<xref ref-type="bibr" rid="B7">7</xref>) noted, challenges related to interpretability and embedded bias remain unresolved, especially in high-stakes medical settings. While techniques such as attention visualization and domain-specific fine-tuning have been proposed to address these concerns, ensuring ethically sound and transparent deployment continues to be a pressing area of research.</p>
<p>Recent advances in sentiment analysis have increasingly emphasized the critical roles of interpretability and fairness, particularly in sensitive domains such as healthcare. Despite these advancements, as noted by Das and Singh (<xref ref-type="bibr" rid="B8">8</xref>), traditional models continue to struggle with challenges such as domain-specific ambiguity, semantic drift, and the underrepresentation of minority linguistic expressions. Zhang et al. (<xref ref-type="bibr" rid="B2">2</xref>) further observed that even large-scale pre-trained models, while effective in general domains, often fail to generalize adequately when applied to specialized fields like clinical text analysis. In parallel, the integration of multimodal information&#x02014;including textual content, visual cues, and patient behavior&#x02014;has gained popularity as a strategy for more accurate affect recognition (<xref ref-type="bibr" rid="B9">9</xref>). However, most existing frameworks remain limited in their ability to disentangle sentiment from domain-specific artifacts. These persistent limitations highlight the pressing need for architectures that are not only semantically grounded but also robust to domain variability.</p>
<p>Given the limitations of symbolic methods in adaptability, machine learning models in interpretability, and deep learning models in bias and transparency, our approach proposes a comprehensive evaluation framework that combines bias mitigation strategies with enhanced interpretability techniques tailored for healthcare sentiment analysis. This framework is grounded in the belief that evaluating model fairness and explainability should not be an afterthought but an integral component of model development and deployment. Through the adoption of discrimination-recognition systems, specialty-focused transparency approaches, and participant-guided review measures, this plan endeavors to narrow the disconnect between computational accuracy and tangible utility. The suggested structure aims to adhere to principled medical norms and uphold balanced affect analysis among heterogeneous patient cohorts, ultimately fostering sounder and more dependable judgments within medical environments.</p>
<p>I propose a novel sentiment analysis framework tailored for the healthcare domain, designed to address the unique challenges of interpretability and bias. My key contributions are as follows:</p>
<list list-type="bullet">
<list-item><p>I introduce the Sentiment Modulated Encoding Network (SMEN), a transformer-based architecture equipped with a dynamic gating mechanism that amplifies sentiment-relevant features during representation learning. This design enhances the model&#x00027;s ability to identify nuanced affective cues within complex clinical narratives.</p></list-item>
<list-item><p>I develop the Contextual Polarity Decoupling Scheme (CPDS), a training paradigm that combines domain-adversarial learning, contrastive representation alignment, and attribution-guided regularization. CPDS ensures that sentiment representations are domain-invariant and semantically grounded, directly mitigating biases arising from dataset imbalance or context-specific artifacts.</p></list-item>
<list-item><p>I provide a comprehensive interpretability evaluation combining quantitative metrics (fidelity and stability), comparison with classical explainability methods, and a proposed human-in-the-loop assessment protocol involving clinical experts. This ensures that our framework is not only technically sound but also practically meaningful in real-world healthcare applications.</p></list-item>
</list></sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec>
<label>2.1</label>
<title>Bias in clinical sentiment models</title>
<p>The deployment of sentiment analysis systems in healthcare has raised significant concerns regarding embedded biases, which can negatively impact clinical outcomes. As Li et al. (<xref ref-type="bibr" rid="B7">7</xref>) have shown, such biases frequently stem from imbalanced datasets that disproportionately represent certain demographic groups, leading to uneven model performance across dimensions such as race, gender, age, and socioeconomic status. Zhu et al. (<xref ref-type="bibr" rid="B10">10</xref>) further emphasize that misclassifying sentiments in narratives originating from minority populations can result in unequal treatment recommendations or even diagnostic errors. While various studies, including those by Das and Singh (<xref ref-type="bibr" rid="B8">8</xref>), have explored counterfactual fairness evaluation to quantify these biases, the complexity and linguistic heterogeneity of healthcare texts&#x02014;especially in multilingual or culturally diverse settings&#x02014;continue to exacerbate the issue. To mitigate such disparities, several methods have been proposed. Tan et al. (<xref ref-type="bibr" rid="B11">11</xref>) discuss adversarial training and domain-specific data augmentation, although these often involve trade-offs with model accuracy. Bello et al. (<xref ref-type="bibr" rid="B12">12</xref>) point out that the inherent opacity of deep learning models complicates bias detection, as the influence of specific linguistic cues on sentiment predictions remains opaque. Qi and Shabrina (<xref ref-type="bibr" rid="B13">13</xref>) highlight that while differential privacy shows promise in reducing demographic disparities, it may simultaneously degrade overall model utility. Meanwhile, Cui et al. (<xref ref-type="bibr" rid="B14">14</xref>) have proposed the use of demographic-specific performance metrics; yet, these often fall short of capturing the full nuance of sentiment expression in clinical narratives. As Talaat (<xref ref-type="bibr" rid="B15">15</xref>) notes, persistent biases are often only detected after deployment, underscoring the importance of preemptive bias-aware training strategies. Looking forward, Hazarika et al. (<xref ref-type="bibr" rid="B16">16</xref>) argue that future sentiment analysis frameworks must prioritize transparency in annotation practices and incorporate perspectives from marginalized communities to ensure equity and fairness in healthcare applications.</p>
<p>Several studies have explored sentiment analysis from the perspectives of model design, interpretability, and fairness. Barbieri et al. (<xref ref-type="bibr" rid="B17">17</xref>) investigated multilingual transformer models for sentiment tasks and noted challenges in adapting them to domain-specific content such as healthcare narratives. Similarly, Hartmann et al. (<xref ref-type="bibr" rid="B18">18</xref>) emphasized the necessity for sentiment models to align with users&#x00027; cognitive expectations, especially in applications involving real-world decision making. The use of attention mechanisms and attribution-based methods has been widely adopted to improve explainability, though their semantic consistency remains debated (<xref ref-type="bibr" rid="B19">19</xref>). In terms of fairness, Mao et al. (<xref ref-type="bibr" rid="B20">20</xref>) conducted an empirical study revealing that pre-trained language models exhibit significant demographic and contextual biases when applied to affective tasks. To address such biases, researchers have proposed strategies like modality-invariant representation learning (<xref ref-type="bibr" rid="B16">16</xref>) and generative modeling for aspect-specific sentiment control (<xref ref-type="bibr" rid="B21">21</xref>). Despite these efforts, few models offer a unified mechanism that simultaneously promotes interpretability and reduces bias in healthcare sentiment analysis. My proposed framework aims to fill this gap by embedding explanation-aware and bias-resilient components within a single architecture.</p>
</sec>
<sec>
<label>2.2</label>
<title>Interpretable deep learning approaches</title>
<p>Interpretability has become a pivotal factor in the successful deployment of sentiment analysis systems in healthcare, where decisions carry high clinical and ethical stakes. Transformer-based models&#x02014;particularly domain-adapted variants like BioBERT and ClinicalBERT&#x02014;have demonstrated state-of-the-art performance in a variety of medical text mining tasks (<xref ref-type="bibr" rid="B22">22</xref>). However, as noted by Bordoloi and Biswas (<xref ref-type="bibr" rid="B23">23</xref>), the internal mechanisms of such deep models often remain opaque, which continues to hinder their adoption in clinical settings, as emphasized by He et al. (<xref ref-type="bibr" rid="B24">24</xref>). Zhang et al. (<xref ref-type="bibr" rid="B25">25</xref>) argue that sentiment predictions must not only be accurate but also align with the reasoning processes familiar to healthcare professionals in order to build trust and ensure actionable decision-making. To address this challenge, researchers have explored various interpretability-enhancing techniques. Attention-based explanations and concept-level attribution have received considerable attention, yet their ability to truly capture the rationale behind model decisions is still under debate, as highlighted by Barbieri et al. (<xref ref-type="bibr" rid="B17">17</xref>). The inherent variability of medical language and the subtle expression of sentiments&#x02014;such as implicit anxiety or discomfort&#x02014;further complicate model explainability, as noted by Wang et al. (<xref ref-type="bibr" rid="B26">26</xref>). Although model-agnostic tools like SHAP and LIME have been adapted for clinical contexts, they often fail to align with domain-specific semantics and can produce oversimplified or misleading explanations (<xref ref-type="bibr" rid="B27">27</xref>). Emerging efforts have focused on integrating structured domain knowledge into model architectures. For instance, Zhang et al. (<xref ref-type="bibr" rid="B19">19</xref>) discuss how medical ontologies can be embedded into neural frameworks to provide more clinically meaningful explanations. Hybrid systems that combine symbolic reasoning with neural networks have also shown promise in grounding model predictions within predefined clinical concepts (<xref ref-type="bibr" rid="B28">28</xref>). The use of annotated datasets with detailed rationales has become increasingly common to train and validate interpretable models, ensuring alignment with the actual decision-making processes used in healthcare settings (<xref ref-type="bibr" rid="B29">29</xref>). As Yu et al. (<xref ref-type="bibr" rid="B9">9</xref>) suggest, meaningful interpretability requires close collaboration between natural language processing experts and medical practitioners to ensure that the explanations provided are both technically accurate and cognitively relevant. Ultimately, as Hartmann et al. (<xref ref-type="bibr" rid="B18">18</xref>) emphasize, interpretability in healthcare sentiment analysis must reflect the cognitive processes of clinicians, necessitating a multidisciplinary approach to model design and evaluation.</p>
</sec>
<sec>
<label>2.3</label>
<title>Evaluation metrics and benchmarks</title>
<p>The evaluation of sentiment analysis models within healthcare demands specialized metrics and benchmarks that reflect the unique complexities of clinical language, patient diversity, and ethical considerations. As Zhang et al. (<xref ref-type="bibr" rid="B21">21</xref>) argue, conventional metrics such as accuracy and F1 score often fall short in capturing the nuances of healthcare sentiment tasks, where misclassifications can have direct implications for patient outcomes. In response, researchers have proposed more sophisticated alternatives&#x02014;such as the Matthews correlation coefficient and cost-sensitive loss functions&#x02014;to better align evaluation practices with real-world clinical risks (<xref ref-type="bibr" rid="B30">30</xref>). Temporal evaluation metrics are also gaining prominence for tracking emotional progression in longitudinal patient records. As Zhu et al. (<xref ref-type="bibr" rid="B10">10</xref>) highlight, these metrics are crucial for capturing dynamic affective states over time. However, a persistent challenge lies in the datasets themselves. While resources like i2b2 and MIMIC-III serve as foundational benchmarks, Das and Singh (<xref ref-type="bibr" rid="B8">8</xref>) point out that these corpora often lack sentiment-specific annotations and demographic diversity, limiting their utility for bias-aware or fine-grained analysis. Recent efforts have thus turned toward building richer and more inclusive datasets. Tan et al. (<xref ref-type="bibr" rid="B11">11</xref>) describe initiatives focusing on affective dimensions within doctor-patient interactions, mental health narratives, and subjective patient feedback. Bello et al. (<xref ref-type="bibr" rid="B12">12</xref>) emphasize the importance of developing multilingual and multicultural corpora to address global healthcare contexts. Beyond dataset construction, model robustness must also be validated across demographic subgroups and domain variations. Qi and Shabrina (<xref ref-type="bibr" rid="B13">13</xref>) note the growing use of subgroup-based robustness checks as a way to ensure generalizability in heterogeneous settings. Equally important is the manner in which model outputs are evaluated from the perspective of end users. Cui et al. (<xref ref-type="bibr" rid="B14">14</xref>) advocate for user-centric evaluation protocols that involve clinicians and patients, arguing that crowdsourced annotations often lack the domain expertise required for sensitive clinical interpretations. As Talaat (<xref ref-type="bibr" rid="B15">15</xref>) and Hazarika et al. (<xref ref-type="bibr" rid="B16">16</xref>) emphasize, the establishment of standardized evaluation frameworks&#x02014;ones that jointly account for fairness, interpretability, and domain-specific criteria&#x02014;is essential for the safe and responsible deployment of sentiment analysis technologies in medical environments. These frameworks must also evolve alongside advancements in modeling techniques and clinical practices to remain relevant and impactful.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Method</title>
<sec>
<label>3.1</label>
<title>Overview</title>
<p>Sentiment analysis involves the computational examination of textual data to infer sentiments, opinions, emotions, and attitudes. This subsection presents the framework of our proposed sentiment analysis methodology, emphasizing its robustness and interpretability across varied linguistic and domain-specific contexts.</p>
<p>The approach comprises three core components. I formalize the sentiment analysis problem within a probabilistic framework, defining the label space, model assumptions, and key sources of complexity such as domain shift and label ambiguity. I introduce the Sentiment Modulated Encoding Network (SMEN), a transformer-based architecture that dynamically adjusts token representations via gated modulation layers. This design enables the network to enhance both local and global sentiment cues. Third, we present the Contextual Polarity Decoupling Scheme (CPDS), a domain-adaptive mechanism that disentangles sentiment information from confounding lexical and contextual artifacts through adversarial and contrastive learning. CPDS also includes a token-level attribution module for interpretability.</p>
<p>This framework, encompassing formalization, modeling, and strategy, offers a modular, extensible, and empirically validated solution for sentiment analysis. Its design allows seamless adaptation to multilingual and domain-specific applications, addressing the challenges inherent in real-world sentiment interpretation tasks.</p>
</sec>
<sec>
<label>3.2</label>
<title>Preliminaries</title>
<p>The sentiment analysis problem is formulated as a supervised learning task based on probabilistic inference. Let <inline-formula><mml:math id="M1"><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> represent a dataset of <italic>N</italic> labeled text samples, where <inline-formula><mml:math id="M2"><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> corresponds to a sequence of <italic>T</italic> tokens drawn from a vocabulary <inline-formula><mml:math id="M3"><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M4"><mml:msup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:math></inline-formula> is a sentiment label. The label space <inline-formula><mml:math id="M5"><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:math></inline-formula> may be binary (<inline-formula><mml:math id="M6"><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula> for negative or positive sentiment), ternary (<inline-formula><mml:math id="M7"><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula> for negative, neutral, or positive sentiment), or continuous (<inline-formula><mml:math id="M8"><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>) when dealing with fine-grained or regression-based settings.</p>
<p>The sentiment prediction task involves modeling the conditional distribution <italic>p</italic>(<italic>y</italic>&#x02223;<italic>x</italic>) of labels given text sequences, and the objective is to approximate the decision function <inline-formula><mml:math id="M9"><mml:msup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msup><mml:mo>:</mml:mo><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow><mml:mo>&#x02192;</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:math></inline-formula> that minimizes the expected risk:</p>
<disp-formula id="EQ1"><mml:math id="M10"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo class="qopname">arg</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow><mml:mrow><mml:mi>y</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>where <inline-formula><mml:math id="M11"><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula> denotes the input space. To estimate <italic>p</italic>(<italic>y</italic>&#x02223;<italic>x</italic>), a neural architecture parameterized by &#x003B8; is employed, resulting in a model <italic>p</italic><sub>&#x003B8;</sub>(<italic>y</italic>&#x02223;<italic>x</italic>). The parameters &#x003B8; are optimized by minimizing the negative log-likelihood over the dataset:</p>
<disp-formula id="EQ2"><mml:math id="M12"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x02223;</mml:mo><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<p>To manage sequences of varying lengths, each input <italic>x</italic> is embedded into a sequence of continuous vectors using an embedding function <inline-formula><mml:math id="M13"><mml:mi>e</mml:mi><mml:mo>:</mml:mo><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow><mml:mo>&#x02192;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>, producing <inline-formula><mml:math id="M14"><mml:mi>X</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>, where <italic>d</italic> is the dimensionality of the embedding space.</p>
<p>The semantic encoding of the sequence is computed by a function <italic>h</italic>:&#x0211D;<sup><italic>T</italic>&#x000D7;<italic>d</italic></sup> &#x02192; &#x0211D;<sup><italic>h</italic></sup>, resulting in a representation <italic>z</italic> &#x0003D; <italic>h</italic>(<italic>X</italic>) that captures contextual and compositional information. The classifier <inline-formula><mml:math id="M15"><mml:mi>&#x003D5;</mml:mi><mml:mo>:</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msup><mml:mo>&#x02192;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x00394;</mml:mi></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:msup></mml:math></inline-formula> then maps the representation <italic>z</italic> to the probability simplex <inline-formula><mml:math id="M16"><mml:msup><mml:mrow><mml:mi>&#x00394;</mml:mi></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:msup></mml:math></inline-formula> over <inline-formula><mml:math id="M17"><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:math></inline-formula>:</p>
<disp-formula id="EQ3"><mml:math id="M18"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B8;</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(3)</label></disp-formula>
<p>where &#x003D5; outputs probabilities for each sentiment class.</p>
<p>The representation <italic>h</italic>(<italic>x</italic>) is designed to disentangle sentiment information from other content-related signals. Let <italic>s</italic>(<italic>x</italic>) denote the sentiment polarity embedded in <italic>x</italic>, and <italic>c</italic>(<italic>x</italic>) represent content-related information. The encoding function <italic>h</italic> is assumed to satisfy:</p>
<disp-formula id="EQ4"><mml:math id="M19"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mi>g</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mi>s</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(4)</label></disp-formula>
<p>where <italic>g</italic> is a composition function that ensures the separation of sentiment polarity <italic>s</italic>(<italic>x</italic>) from content <italic>c</italic>(<italic>x</italic>).</p>
<p>To enhance robustness, a polarity consistency constraint is imposed:</p>
<disp-formula id="EQ5"><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mo>&#x02200;</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">X</mml:mi></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mtext class="textrm" mathvariant="normal">if</mml:mtext><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext class="textrm" mathvariant="normal">then</mml:mtext><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0003C;</mml:mo><mml:mi>&#x003F5;</mml:mi><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(5)</label></disp-formula>
<p>where &#x003F5; defines a threshold for intra-class compactness in the representation space.</p>
<p>For domain adaptation scenarios, the joint distribution <italic>P</italic>(<italic>x, y</italic>) is factorized as <italic>P</italic>(<italic>y</italic>&#x02223;<italic>x</italic>; <italic>d</italic>)<italic>P</italic>(<italic>x</italic>&#x02223;<italic>d</italic>)<italic>P</italic>(<italic>d</italic>), where <italic>d</italic> indicates the domain. The goal is to construct representations <italic>z</italic> &#x0003D; <italic>h</italic>(<italic>x</italic>) such that:</p>
<disp-formula id="EQ6"><mml:math id="M21"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>z</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>y</mml:mi><mml:mo>;</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02248;</mml:mo><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>z</mml:mi><mml:mo>&#x02223;</mml:mo><mml:mi>y</mml:mi><mml:mo>;</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mo>&#x02200;</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">train</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(6)</label></disp-formula>
<p>ensuring that the sentiment encoding remains invariant across different domains.</p>
<p>For multilingual sentiment analysis, input sequences <italic>x</italic> are drawn from language-specific vocabularies <inline-formula><mml:math id="M22"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>&#x02113;</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> corresponding to language &#x02113;. A shared multilingual embedding space mapping <inline-formula><mml:math id="M23"><mml:mi>A</mml:mi><mml:mo>:</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>&#x02113;</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02192;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> is utilized to project tokens into a unified embedding space:</p>
<disp-formula id="EQ7"><mml:math id="M24"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02113;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02113;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mo>&#x02200;</mml:mo><mml:mi>&#x02113;</mml:mi><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(7)</label></disp-formula>
<p>allowing sentiment features to be transferable across languages.</p>
<p>In the case of aspect-based sentiment analysis, additional aspect terms <inline-formula><mml:math id="M25"><mml:mi>a</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">A</mml:mi></mml:mrow></mml:math></inline-formula> are incorporated into the model, transforming the task into modeling <italic>p</italic>(<italic>y</italic>&#x02223;<italic>x, a</italic>). The encoding function <italic>h</italic> is extended to:</p>
<disp-formula id="EQ8"><mml:math id="M26"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">Attn</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi><mml:mo>,</mml:mo><mml:mi>a</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(8)</label></disp-formula>
<p>where Attn represents an attention mechanism that selectively focuses on segments of <italic>x</italic> relevant to the aspect <italic>a</italic>.</p>
<p>These definitions provide the foundational setup for modeling sentiment under diverse linguistic and structural assumptions, facilitating subsequent methodological innovations presented in the following sections.</p>
</sec>
<sec>
<label>3.3</label>
<title>Sentiment Modulated Encoding Network</title>
<p>I propose the <italic>Sentiment Modulated Encoding Network</italic> (SMEN), a neural architecture designed to model sentiment-rich representations by integrating sentiment cues into the representation learning process. Unlike conventional approaches that treat sentiment as a downstream classification target, SMEN modulates internal encoding dynamics via explicit sentiment signal pathways, enhancing sensitivity to affective content and robustness to domain shifts (as shown in <xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Sentiment Modulated Encoding Network (SMEN) architecture. The model integrates Domain-Informed Data Processing, gated attention pooling, sentiment-modulated Transformer layers with gating mechanisms, and polarity regularization with contrastive learning for robust sentiment representation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpubh-13-1663871-g0001.tif">
<alt-text content-type="machine-generated">Diagram of a neural network architecture for sentence representation and sentiment modulation. It includes components like Domain-Informed Data Processing (DIDP), Gated Attention Pooling, and Dynamic Contextual Attention (DCA). The process involves warehouses, layers, transformers, and stages, focusing on polarity regularization and contrastive learning.</alt-text>
</graphic>
</fig>
<sec>
<label>3.3.1</label>
<title>Sentiment modulated transformer layers</title>
<p>Let <italic>x</italic> &#x0003D; (<italic>w</italic><sub>1</sub>, <italic>w</italic><sub>2</sub>, &#x02026;, <italic>w</italic><sub><italic>T</italic></sub>) be a tokenized input sequence. Each token is mapped to its embedding vector via <inline-formula><mml:math id="M27"><mml:mi>e</mml:mi><mml:mo>:</mml:mo><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow><mml:mo>&#x02192;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>, yielding the embedded sequence <inline-formula><mml:math id="M28"><mml:mi>X</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>. SMEN processes this sequence through a stack of sentiment-aware transformer layers, where each layer employs modulation mechanisms to adaptively encode sentiment-relevant information. For each layer <italic>l</italic> and token position <italic>t</italic>, a sentiment modulation gate is defined as:</p>
<disp-formula id="EQ9"><mml:math id="M29"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(9)</label></disp-formula>
<p>where <inline-formula><mml:math id="M30"><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> is the token representation at position <italic>t</italic> in layer <italic>l</italic>, <inline-formula><mml:math id="M31"><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x000D7;</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> and <inline-formula><mml:math id="M32"><mml:msubsup><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x02208;</mml:mo><mml:mi>&#x0211D;</mml:mi></mml:math></inline-formula> are learned parameters, and &#x003C3; denotes the sigmoid function. This gate scales the attention output:</p>
<disp-formula id="EQ10"><mml:math id="M33"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x000B7;</mml:mo><mml:mtext class="textrm" mathvariant="normal">MHAttn</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x000B7;</mml:mo><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(10)</label></disp-formula>
<p>where MHAttn represents multi-head attention. The gating mechanism adjusts sentiment-relevant features dynamically, amplifying or suppressing affect-bearing tokens. Following attention layers, sentiment modulation persists in the feed-forward sublayer:</p>
<disp-formula id="EQ11"><mml:math id="M34"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x000B7;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FFN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mi>&#x003B3;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x000B7;</mml:mo><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(11)</label></disp-formula>
<p>where FFN is a position-wise feed-forward network. This recurrence ensures sentiment-aware adjustment across network depth.</p></sec>
<sec>
<label>3.3.2</label>
<title>Gated attention pooling for sentence representations</title>
<p>The output of the final layer <italic>L</italic> is denoted <inline-formula><mml:math id="M35"><mml:msup><mml:mrow><mml:mi>H</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>. To derive a sentence-level representation <italic>z</italic>, a gated attention pooling mechanism is introduced:</p>
<disp-formula id="EQ12"><mml:math id="M36"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo class="qopname">exp</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x022A4;</mml:mo></mml:mrow></mml:msup><mml:mo class="qopname">tanh</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mo class="qopname">&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mo class="qopname">exp</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x022A4;</mml:mo></mml:mrow></mml:msup><mml:mo class="qopname">tanh</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mo class="qopname">&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(12)</label></disp-formula>
<disp-formula id="EQ13"><mml:math id="M37"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(13)</label></disp-formula>
<p>where <inline-formula><mml:math id="M38"><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>, <italic>u</italic>&#x02208;&#x0211D;<sup><italic>k</italic></sup>, and <inline-formula><mml:math id="M39"><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> are trainable parameters. This pooling mechanism enables flexible aggregation of sentiment-relevant features into a single vector <italic>z</italic>, encoding the semantic and affective content of <italic>x</italic>. The representation is passed to a prediction head &#x003D5; for classification:</p>
<disp-formula id="EQ14"><mml:math id="M40"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x00177;</mml:mi><mml:mo>=</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">softmax</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:mi>z</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(14)</label></disp-formula>
<p>where <inline-formula><mml:math id="M41"><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow><mml:mo>|</mml:mo><mml:mo>&#x000D7;</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> and <inline-formula><mml:math id="M42"><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:msup></mml:math></inline-formula>.</p></sec>
<sec>
<label>3.3.3</label>
<title>Polarity regularization and contrastive learning</title>
<p>To refine sentiment-sensitive features, polarity regularization is employed. Let <inline-formula><mml:math id="M43"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02282;</mml:mo><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:math></inline-formula> denote a sentiment lexicon, and for any token <inline-formula><mml:math id="M44"><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, its representation <inline-formula><mml:math id="M45"><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> should reflect its predefined polarity <italic>p</italic>(<italic>w</italic><sub><italic>t</italic></sub>)&#x02208;{&#x02212;1, &#x0002B;1}. This constraint is enforced via:</p>
<disp-formula id="EQ15"><mml:math id="M46"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">pol</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>I</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x000B7;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">sign</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(15)</label></disp-formula>
<p>This introduces a polarity regularization term that enforces alignment between token-level model outputs and prior sentiment knowledge. Here, <italic>V</italic><sub><italic>s</italic></sub> represents a sentiment lexicon&#x02014;a predefined set of words known to carry sentiment polarity. For each token <italic>w</italic><sub><italic>t</italic></sub> in the sequence, if it belongs to the sentiment lexicon (<italic>w</italic><sub><italic>t</italic></sub>&#x02208;<italic>V</italic><sub><italic>s</italic></sub>), we expect its final-layer representation <inline-formula><mml:math id="M47"><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> to reflect its known polarity <italic>p</italic>(<italic>w</italic><sub><italic>t</italic></sub>)&#x02208;&#x02212;1, &#x0002B;1. The sign function <inline-formula><mml:math id="M48"><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">sign</mml:mtext></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>o</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> estimates the model&#x00027;s inferred polarity, and the squared error penalizes mismatches. This loss term anchors the model&#x00027;s internal representations to linguistically interpretable sentiment priors, encouraging semantic consistency and interpretability at the token level.</p>
<p>A contrastive loss <inline-formula><mml:math id="M49"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">con</mml:mtext></mml:mstyle></mml:mrow></mml:msub></mml:math></inline-formula> is introduced between positive and negative sentence pairs (<italic>x</italic><sup>&#x0002B;</sup>, <italic>x</italic><sup>&#x02212;</sup>) to optimize sentiment separation:</p>
<disp-formula id="EQ16"><mml:math id="M50"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">con</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo class="qopname">max</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>m</mml:mi><mml:mo>-</mml:mo><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msup><mml:mo>-</mml:mo><mml:msup><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msup><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msup><mml:mo>-</mml:mo><mml:msup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo></mml:mrow></mml:msup><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(16)</label></disp-formula>
<p>where <italic>c</italic><sup>&#x0002B;</sup> is the class prototype for positive sentiment and <italic>m</italic> is a fixed margin. This defines a contrastive loss that encourages the model to cluster representations of similar sentiment while separating those with opposing polarity. <italic>z</italic><sup>&#x0002B;</sup> and <italic>z</italic><sup>&#x02212;</sup> denote sentence-level embeddings of positive and negative examples, respectively, and <italic>c</italic><sup>&#x0002B;</sup> is a prototype vector representing the center of the positive sentiment cluster. The term <inline-formula><mml:math id="M51"><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msup><mml:mo>-</mml:mo><mml:msup><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msup><mml:msubsup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula> measures the closeness of a positive instance to its prototype, while <inline-formula><mml:math id="M52"><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msup><mml:mo>-</mml:mo><mml:msup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo></mml:mrow></mml:msup><mml:msubsup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:math></inline-formula> computes the distance between positive and negative pairs. A margin <italic>m</italic> ensures that dissimilar pairs are sufficiently separated. This loss promotes discriminative feature learning and enhances sentiment-specific clustering in the embedding space.</p>
<p>The total training objective combines cross-entropy loss with polarity and contrastive regularization:</p>
<disp-formula id="EQ17"><mml:math id="M53"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">CE</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">pol</mml:mtext></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">pol</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">con</mml:mtext></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">con</mml:mtext></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(17)</label></disp-formula>
<p>where &#x003BB;<sub>pol</sub> and &#x003BB;<sub>con</sub> are hyperparameters (as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Illustrating the polarity regularization and contrastive learning framework. This multi-domain activity recognition architecture integrates polarity regularization and contrastive learning with convolutional feature encoding, polarity-sensitive feature extraction, a polarity regularization module, and contrastive learning memory to achieve robust cross-domain motion classification.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpubh-13-1663871-g0002.tif">
<alt-text content-type="machine-generated">Diagram of a neural network model for activity and sentiment classification across multiple domains. Features flow through convolutional layers and max pooling. Modules handle polarity, semantic features, and cognitive interaction. Outputs include a list of activities: walking, running, upstairs, and sitting, connected to a polarity regularization module. A color-coded legend differentiates components.</alt-text>
</graphic>
</fig>
<p>This equation combines the primary cross-entropy loss <inline-formula><mml:math id="M54"><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">CE</mml:mtext></mml:mstyle></mml:math></inline-formula> with two auxiliary objectives: polarity regularization and contrastive learning. The coefficients &#x003BB;pol and &#x003BB;<sub>con</sub> control the relative importance of the two regularizers. Together, these components ensure that the model not only predicts the correct sentiment label but also learns interpretable and structured representations that reflect known sentiment polarity and maintain inter-class separability. This multi-objective formulation enhances the robustness, generalizability, and explainability of the proposed sentiment encoding framework.</p>
</sec>
</sec>
<sec>
<label>3.4</label>
<title>Contextual polarity decoupling scheme</title>
<p>To enhance the generalization and interpretability of sentiment representations across domains, we introduce the <bold>Domain-Adversarial Contrastive Framework</bold>, <bold>Attribution-Guided Regularization Design</bold>, and <bold>Cross-Domain Sentiment Consistency Mechanism</bold>. These components collectively constitute the <italic>Contextual Polarity Decoupling Scheme</italic> (CPDS), a training paradigm that explicitly decouples sentiment expression from topic and domain-specific signals, integrating adversarial training, contrastive supervision, and attribution-guided mechanisms to produce sentiment-invariant representations while maintaining semantic fidelity (as shown in <xref ref-type="fig" rid="F3">Figure 3</xref>).</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Overall architecture of the contextual polarity decoupling scheme (CPDS). The diagram illustrates the integrated workflow of the main CNN-Transformer encoder, the Domain-Adversarial Contrastive Framework, the Attribution-Guided Regularization Design, and the Cross-Domain Sentiment Consistency Mechanism. The system combines convolutional and transformer blocks, feature projection, adversarial and contrastive losses, attribution-based regularization, and cross-domain alignment loss to learn domain-invariant, interpretable, and consistent sentiment representations. Decoders at multiple stages generate outputs that enable robust and transferable sentiment analysis across domains.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpubh-13-1663871-g0003.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a complex AI framework. It shows a sequence of steps starting with a stem block, followed by a series of convolutional and CF blocks. It features transformers, decoders, and domain-adversarial frameworks. The process highlights semantic alignment, projection, and regularization elements, with corresponding graphical representations.</alt-text>
</graphic>
</fig>
<sec>
<label>3.4.1</label>
<title>Domain-adversarial contrastive framework</title>
<p>To enforce domain invariance and sentiment discrimination simultaneously, CPDS employs a domain discriminator <inline-formula><mml:math id="M55"><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>:</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi></mml:mrow></mml:msup><mml:mo>&#x02192;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x00394;</mml:mi></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:msup></mml:math></inline-formula> alongside a contrastive learning objective. Let each input sample be represented as (<italic>x, y, d</italic>) where <italic>x</italic> is the text, <inline-formula><mml:math id="M56"><mml:mi>y</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:math></inline-formula> is the sentiment label, and <inline-formula><mml:math id="M57"><mml:mi>d</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow></mml:math></inline-formula> is the domain label. The sentiment representation <italic>z</italic> &#x0003D; <italic>h</italic>(<italic>x</italic>) is obtained from the encoder. The adversarial objective minimizes sentiment loss while maximizing domain confusion:</p>
<disp-formula id="EQ18"><mml:math id="M58"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">adv</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>E</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mi mathvariant="script">D</mml:mi></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:munderover></mml:mstyle><mml:mi>I</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x000B7;</mml:mo><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>h</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(18)</label></disp-formula>
<p>Gradient reversal is applied to the encoder during training, reversing the gradients of <inline-formula><mml:math id="M59"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">adv</mml:mtext></mml:mstyle></mml:mrow></mml:msub></mml:math></inline-formula> to maximize domain classification error, thereby encouraging domain-invariant representations.</p>
<p>The contrastive learning objective further refines sentiment discrimination by aligning samples of the same sentiment and separating those of different sentiments. For each anchor <italic>x</italic><sub><italic>i</italic></sub> with label <italic>y</italic><sub><italic>i</italic></sub>, the loss is defined as:</p>
<disp-formula id="EQ19"><mml:math id="M60"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">contrast</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mi>I</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x000B7;</mml:mo><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo>&#x0002B;</mml:mo><mml:mi>I</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02260;</mml:mo><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x000B7;</mml:mo><mml:mo class="qopname">max</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mi>m</mml:mi><mml:mo>-</mml:mo><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(19)</label></disp-formula>
<p>where <italic>m</italic> is a margin hyperparameter ensuring intra-class compactness and inter-class separability. These components synergistically enforce domain adversarial sentiment invariance while maintaining sentiment alignment.</p></sec>
<sec>
<label>3.4.2</label>
<title>Attribution-guided regularization design</title>
<p>To enhance interpretability and guide the model to focus on sentiment-bearing signals, CPDS incorporates an attribution-guided regularization mechanism. Let <italic>A</italic>(<italic>x</italic>) &#x0003D; (&#x003B1;<sub>1</sub>, &#x02026;, &#x003B1;<sub><italic>T</italic></sub>) denote attribution scores obtained from gradient-based methods over input tokens <italic>w</italic><sub>1</sub>, &#x02026;, <italic>w</italic><sub><italic>T</italic></sub>. A regularization term is defined to align high-attribution tokens with sentiment lexicon terms:</p>
<disp-formula id="EQ20"><mml:math id="M61"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">attr</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>I</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>w</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x000B7;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mi>&#x003B7;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(20)</label></disp-formula>
<p>where <inline-formula><mml:math id="M62"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">V</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represents the set of known sentiment-bearing words and &#x003B7; is a target attribution weight. This mechanism encourages the model to prioritize sentiment lexicon terms during prediction, ensuring that sentiment representations capture the most relevant and interpretable features of the input.</p>
<p>The integration of attribution scores with CPDS enables fine-grained attention to sentiment-rich regions within text, reinforcing semantic fidelity. This regularization design provides an explicit mechanism to align model focus with human-understandable sentiment cues.</p></sec>
<sec>
<label>3.4.3</label>
<title>Cross-domain sentiment consistency mechanism</title>
<p>To ensure consistency in sentiment representation across domains, CPDS introduces a sentence-level alignment loss. Let <inline-formula><mml:math id="M63"><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="M64"><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:msubsup></mml:math></inline-formula> represent the same sentence <italic>x</italic><sub><italic>i</italic></sub> under two domains <italic>d</italic> and <italic>d</italic>&#x02032;, possibly augmented through domain-specific transformations. The alignment loss is defined as:</p>
<disp-formula id="EQ21"><mml:math id="M65"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">align</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msubsup><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(21)</label></disp-formula>
<p>This loss encourages the encoder to preserve sentiment semantics under domain variation, ensuring robustness and transferability of sentiment representations. By aligning representations across domains, CPDS mitigates domain-specific biases and promotes generalization (as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>).</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Cross-domain sentiment consistency mechanism architecture diagram. This mechanism enforces consistent sentence representations across domains using a block-level encoder and domain-stable normalization modules. The left part introduces domain-invariant projection, cross-attention, and domain embeddings to align contextual information. The right branch employs domain-stable normalization, task-specific projection, and an alignment loss (<inline-formula><mml:math id="M66"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">align</mml:mtext></mml:mstyle></mml:mrow></mml:msub></mml:math></inline-formula>) to strengthen sentiment semantic consistency across domains. The overall training objective combines cross-entropy, adversarial, contrastive, attribution-guided, and alignment losses to improve the robustness and generalization of cross-domain sentiment prediction.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpubh-13-1663871-g0004.tif">
<alt-text content-type="machine-generated">Diagram illustrating a cross-domain sentiment prediction model. It includes components like domain-invariant projection, self-attention, cross-attention, and alignment-enhanced projection. MLP and domain embeddings contribute to output token embeddings. The right section features domain-stable normalization, attention, GELU activation, and linear projections, leading to a logit selector and softmax for prediction.</alt-text>
</graphic>
</fig>
<p>The final composite training objective combines these components with task-specific supervision:</p>
<disp-formula id="EQ22"><mml:math id="M67"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">CE</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">adv</mml:mtext></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">adv</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">contrast</mml:mtext></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">contrast</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">attr</mml:mtext></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">attr</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">align</mml:mtext></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">align</mml:mtext></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(22)</label></disp-formula>
<p>where <inline-formula><mml:math id="M68"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">CE</mml:mtext></mml:mstyle></mml:mrow></mml:msub></mml:math></inline-formula> represents the cross-entropy loss on sentiment labels, and &#x003BB; terms are tunable coefficients. Training alternates between updating the encoder to minimize <inline-formula><mml:math id="M69"><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:math></inline-formula> and the domain discriminator to minimize <inline-formula><mml:math id="M70"><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">adv</mml:mtext></mml:mstyle></mml:mrow></mml:msub></mml:math></inline-formula>, maintaining adversarial tension and optimizing sentiment consistency.</p>
<p>These innovations collectively enhance the robustness, interpretability, and transferability of sentiment models across varied domains and contexts. The integration of adversarial, contrastive, attribution-guided, and alignment mechanisms establishes CPDS as a comprehensive framework for fine-grained sentiment understanding.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental setup</title>
<sec>
<label>4.1</label>
<title>Dataset</title>
<p>To resolve potential concerns regarding dataset selection and its alignment with the study&#x00027;s focus on healthcare sentiment analysis, it is necessary to clarify the rationale for initially using action recognition datasets such as UCF-101, HMDB-51, Kinetics, and THUMOS. While these datasets are not originally designed for textual sentiment classification in clinical contexts, they were adopted in our preliminary experiments to evaluate the general robustness, interpretability, and domain adaptability of the proposed framework. My architectural design emphasizes modular sentiment representation learning, adversarial decoupling of domain-specific features, and polarity-aware explanation mechanisms, which are applicable beyond a single modality or dataset type. The use of video datasets enabled us to examine whether the model could extract and interpret latent affective signals from multimodal sequences in complex domains, particularly in scenarios where human behaviors and expressions serve as proxies for underlying emotional states. These datasets also provided an opportunity to test our method&#x00027;s scalability across tasks involving fine-grained feature extraction and temporal context modeling. However, to directly address the scope mismatch and to ensure the validity of our findings within the intended healthcare domain, we have conducted additional experiments on two well-recognized clinical sentiment datasets: HealthReview-C and CADEC. These corpora contain real-world patient-generated texts that reflect genuine affective responses to medical treatments, products, and services. Results from these supplementary evaluations show that our model significantly outperforms several state-of-the-art baselines on all major performance metrics including accuracy, recall, F1 score, and AUC. The findings confirm that our framework is not only theoretically aligned with the goals of healthcare sentiment analysis but is also empirically effective when applied to authentic medical language tasks. By combining initial cross-modal robustness testing with domain-specific validation, we establish that our approach is both flexible and relevant to real-world clinical sentiment applications. This hybrid evaluation strategy strengthens the overall contribution of the work and supports its potential for broader deployment in trustworthy healthcare AI systems.</p>
<p>The UCF-101 collection (<xref ref-type="bibr" rid="B1">1</xref>) represents a prevalent reference dataset for recognizing human activities in video sequences, comprising 13, 320 short clips across 101 distinct action classes. All footage originates from YouTube, offering a broad spectrum of settings, camera dynamics, and illumination variations. The actions fall into five categories: interactions with objects, solely body movements, person-to-person interactions, musical performance, and athletic activities. The dataset introduces difficulties such as complex backgrounds, motion artifacts, and inconsistent visual fidelity, making it an effective benchmark for testing the resilience of classification models. It is structured into 25 groups, with each action class containing a minimum of four samples per group, and is typically divided into three predefined train/test partitions for model assessment. UCF-101 has become a widely accepted standard for evaluating deep learning approaches in video analysis tasks due to its manageable scale and diverse visual scenarios.</p>
<p>HMDB-51 Dataset (<xref ref-type="bibr" rid="B31">31</xref>) consists of 6, 766 video clips collected from movies and online videos, covering 51 different human actions. These actions are categorized into five types: general facial actions, facial actions with object manipulation, general body movements, body with object interaction, and body movements for human interaction. Each clip is trimmed to a few seconds in length and manually annotated. HMDB-51 is notable for its intra-class variability and inter-class similarity, making it more challenging than UCF-101. The dataset includes variations in viewpoints, occlusions, camera motions, and lighting conditions. It has a predefined evaluation protocol using three splits, and performance is measured by the average classification accuracy. Due to its complexity and balanced number of clips per class, HMDB-51 is a standard benchmark for action recognition algorithms focusing on temporal dynamics and fine-grained motion patterns.</p>
<p>Kinetics Dataset (<xref ref-type="bibr" rid="B2">2</xref>) is a large-scale benchmark dataset developed by DeepMind for human action recognition, offering over 650, 000 video clips covering hundreds of action classes. Each clip lasts around 10 seconds and is sourced from YouTube, capturing high-quality samples of human actions in diverse contexts and environments. The dataset comes in multiple versions, including Kinetics-400, Kinetics-600, and Kinetics-700, reflecting the number of action classes in each version. Each action class contains at least 400 video clips, ensuring sufficient training data for deep models. Its scale and diversity enable training large-scale neural networks, particularly deep 3D convolutional models and transformer-based architectures. Kinetics poses real-world challenges such as background clutter, scene transitions, and human-object interaction ambiguity. It is widely adopted in pretraining settings for transfer learning in downstream video understanding tasks and remains a cornerstone dataset for benchmarking large-scale video classification systems.</p>
<p>The THUMOS collection (<xref ref-type="bibr" rid="B20">20</xref>) targets activity localization within raw video streams and comprises two primary elements: THUMOS&#x00027;13 and THUMOS&#x00027;14. THUMOS&#x00027;14 is the most commonly used version and includes over 400 videos for training and more than 200 validation and testing videos with dense temporal annotations. The dataset contains 101 action classes from UCF-101 for classification tasks and a subset of 20 classes for temporal detection. Unlike trimmed datasets like UCF-101 and HMDB-51, THUMOS presents the challenge of localizing actions in longer videos where action segments must be precisely detected among irrelevant frames. This makes it particularly suitable for evaluating temporal action detection algorithms. THUMOS has played a pivotal role in pushing the development of techniques such as proposal generation, temporal segment networks, and anchor-based detection. The rich annotations and temporal complexity make it a key benchmark for localization-aware recognition methods.</p>
</sec>
<sec>
<label>4.2</label>
<title>Experimental details</title>
<p>All experimental procedures utilize the PyTorch framework on a workstation outfitted with NVIDIA A100 GPUs. For each video instance, 32 consecutive frames are sampled uniformly at 25 frames per second. Frames are resized so the shorter dimension is 256 pixels, then center-cropped to 224 &#x000D7; 224. During training, common data augmentation methods are applied, including random horizontal flips, color perturbations, and multi-scale crops. At evaluation, only center cropping is performed. The input sequences are normalized with the mean and standard deviation values derived from ImageNet. My feature extractor is a Swin Transformer backbone, the Swin-B configuration pretrained on the Kinetics-400 dataset. Fine-tuning is conducted on each specific target dataset. I employ the AdamW optimizer with a learning rate of 1 &#x000D7; 10<sup>&#x02212;4</sup>, a weight decay of 0.05, and a cosine annealing learning rate schedule. Optimization runs for 100 iterations across all corpora, with termination determined by performance on the hold-out validation split. A batch size of 64 is used, and mixed-precision computation is leveraged to improve training speed and reduce memory footprint. For UCF-101 and HMDB-51, we adhere to the standard three-part split protocol and report average top-1 accuracy across these splits. For Kinetics-400, we follow the typical train/validation split and report both top-1 and top-5 accuracy on the validation set. For THUMOS14, which targets temporal action detection, we use the standard mean Average Precision (mAP) metric at IoU thresholds between 0.3 and 0.7. The detection pipeline relies on generating temporal proposals with a Temporal Proposal Network (TPN) and classifying the corresponding temporal representations. My implementation includes several regularization strategies to improve generalization. Dropout with a ratio of 0.1 is applied after each transformer block. Temporal dropout is set to 0.2. I also use stochastic depth with a survival probability of 0.9 across the transformer layers. The model&#x00027;s transformer encoder consists of 12 layers with 4 attention heads per layer. Each attention block includes spatial and temporal attention components. Positional encoding is decomposed into spatial and temporal embeddings and added to the input patch tokens. All models are trained from scratch on the target datasets except for the backbone weights, which are initialized from Kinetics-400 pretrained models. Hyperparameter tuning is performed via grid search using a held-out validation set for each dataset. Evaluation metrics are computed over three independent runs, and we report both the average and standard deviation for accuracy and mAP metrics. Training logs and model checkpoints are maintained for reproducibility, and TensorBoard is used to visualize training and validation curves. All code and configuration files will be made publicly available for reproducibility.</p>
</sec>
<sec>
<label>4.3</label>
<title>Comparison with SOTA methods</title>
<p>I evaluate our proposed method against multiple advanced reference models across four widely used datasets: UCF-101, HMDB-51, Kinetics, and THUMOS. As shown in <xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T2">2</xref>, our framework consistently surpasses competing techniques on four key performance metrics: Classification Rate, Sensitivity, F1 Metric, and Area Under the Curve. On the UCF-101 dataset, our technique reaches 92.31% classification rate, exceeding the previous top result (RoBERTa) by 2.86%. For Sensitivity (90.87%), F1 Metric (91.13%), and AUC (93.02%), our model demonstrates similarly strong gains, reflecting solid generalizability, reduced false-negative outcomes, and reliable confidence assessment.On HMDB-51, known for high inter-class similarity and subtle motion variations, our method achieves 87.12% accuracy, exceeding RoBERTa (83.76%) by 3.36%, with equally notable gains in F1 Score and AUC. These results highlight our approach&#x00027;s effectiveness in modeling complex temporal dependencies and nuanced sentiment cues often missed by simpler representations. Moreover, models such as BiLSTM and TextCNN perform significantly worse, underscoring that our transformer-based design is better suited to capture multimodal video information. Standard pre-trained language models like BERT and XLNet also underperform due to their limited temporal modeling and lack of explicit visual-emotion alignment.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Benchmarking my method against advanced models on UCF-101 and HMDB-51 for emotion classification.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center" colspan="4"><bold>UCF-101 dataset</bold></th>
<th valign="top" align="center" colspan="4"><bold>HMDB-51 dataset</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1 score</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1 score</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">BiLSTM (<xref ref-type="bibr" rid="B34">34</xref>)</td>
<td valign="top" align="center">83.41 &#x000B1; 0.03</td>
<td valign="top" align="center">80.65 &#x000B1; 0.02</td>
<td valign="top" align="center">81.32 &#x000B1; 0.03</td>
<td valign="top" align="center">85.12 &#x000B1; 0.02</td>
<td valign="top" align="center">76.25 &#x000B1; 0.02</td>
<td valign="top" align="center">74.89 &#x000B1; 0.02</td>
<td valign="top" align="center">75.31 &#x000B1; 0.02</td>
<td valign="top" align="center">79.68 &#x000B1; 0.03</td>
</tr>
<tr>
<td valign="top" align="left">TextCNN (<xref ref-type="bibr" rid="B35">35</xref>)</td>
<td valign="top" align="center">85.90 &#x000B1; 0.02</td>
<td valign="top" align="center">84.34 &#x000B1; 0.02</td>
<td valign="top" align="center">83.95 &#x000B1; 0.02</td>
<td valign="top" align="center">87.43 &#x000B1; 0.02</td>
<td valign="top" align="center">78.41 &#x000B1; 0.03</td>
<td valign="top" align="center">75.82 &#x000B1; 0.03</td>
<td valign="top" align="center">77.16 &#x000B1; 0.02</td>
<td valign="top" align="center">81.77 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">BERT (<xref ref-type="bibr" rid="B36">36</xref>)</td>
<td valign="top" align="center">88.12 &#x000B1; 0.03</td>
<td valign="top" align="center">85.79 &#x000B1; 0.02</td>
<td valign="top" align="center">86.44 &#x000B1; 0.03</td>
<td valign="top" align="center">89.21 &#x000B1; 0.02</td>
<td valign="top" align="center">81.52 &#x000B1; 0.02</td>
<td valign="top" align="center">78.64 &#x000B1; 0.03</td>
<td valign="top" align="center">79.80 &#x000B1; 0.02</td>
<td valign="top" align="center">84.59 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">XLNet (<xref ref-type="bibr" rid="B37">37</xref>)</td>
<td valign="top" align="center">86.73 &#x000B1; 0.02</td>
<td valign="top" align="center">87.04 &#x000B1; 0.03</td>
<td valign="top" align="center">85.91 &#x000B1; 0.02</td>
<td valign="top" align="center">88.33 &#x000B1; 0.03</td>
<td valign="top" align="center">80.48 &#x000B1; 0.03</td>
<td valign="top" align="center">79.91 &#x000B1; 0.02</td>
<td valign="top" align="center">79.45 &#x000B1; 0.02</td>
<td valign="top" align="center">83.10 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">Electra (<xref ref-type="bibr" rid="B38">38</xref>)</td>
<td valign="top" align="center">87.20 &#x000B1; 0.03</td>
<td valign="top" align="center">8487 &#x000B1; 0.02</td>
<td valign="top" align="center">85.34 &#x000B1; 0.03</td>
<td valign="top" align="center">88.65 &#x000B1; 0.03</td>
<td valign="top" align="center">82.11 &#x000B1; 0.02</td>
<td valign="top" align="center">80.40 &#x000B1; 0.02</td>
<td valign="top" align="center">80.97 &#x000B1; 0.02</td>
<td valign="top" align="center">85.03 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">RoBERTa (<xref ref-type="bibr" rid="B39">39</xref>)</td>
<td valign="top" align="center">89.45 &#x000B1; 0.02</td>
<td valign="top" align="center">88.23 &#x000B1; 0.03</td>
<td valign="top" align="center">87.67 &#x000B1; 0.02</td>
<td valign="top" align="center">90.54 &#x000B1; 0.03</td>
<td valign="top" align="center">83.76 &#x000B1; 0.03</td>
<td valign="top" align="center">82.34 &#x000B1; 0.02</td>
<td valign="top" align="center">82.91 &#x000B1; 0.02</td>
<td valign="top" align="center">86.40 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys</bold></td>
<td valign="top" align="center"><bold>92.31</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>90.87</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>91.13</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>93.02</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>87.12</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>85.78</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>86.45</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>89.33</bold> <bold>&#x000B1;0.03</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The bolded values represent the optimal values.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Benchmarking my method against advanced models on UCF-101 and HMDB-51 for emotion classification.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center" colspan="4"><bold>Kinetics dataset</bold></th>
<th valign="top" align="center" colspan="4"><bold>THUMOS dataset</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1 score</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1 score</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">BiLSTM (<xref ref-type="bibr" rid="B34">34</xref>)</td>
<td valign="top" align="center">78.62 &#x000B1; 0.03</td>
<td valign="top" align="center">76.35 &#x000B1; 0.02</td>
<td valign="top" align="center">77.40 &#x000B1; 0.02</td>
<td valign="top" align="center">81.92 &#x000B1; 0.03</td>
<td valign="top" align="center">74.18 &#x000B1; 0.02</td>
<td valign="top" align="center">72.44 &#x000B1; 0.03</td>
<td valign="top" align="center">71.85 &#x000B1; 0.02</td>
<td valign="top" align="center">78.01 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">TextCNN (<xref ref-type="bibr" rid="B35">35</xref>)</td>
<td valign="top" align="center">80.14 &#x000B1; 0.02</td>
<td valign="top" align="center">78.90 &#x000B1; 0.03</td>
<td valign="top" align="center">79.52 &#x000B1; 0.02</td>
<td valign="top" align="center">82.77 &#x000B1; 0.03</td>
<td valign="top" align="center">76.69 &#x000B1; 0.02</td>
<td valign="top" align="center">74.87 &#x000B1; 0.02</td>
<td valign="top" align="center">75.33 &#x000B1; 0.02</td>
<td valign="top" align="center">80.34 &#x000B1; 0.03</td>
</tr>
<tr>
<td valign="top" align="left">BERT (<xref ref-type="bibr" rid="B36">36</xref>)</td>
<td valign="top" align="center">83.27 &#x000B1; 0.02</td>
<td valign="top" align="center">81.45 &#x000B1; 0.03</td>
<td valign="top" align="center">81.93 &#x000B1; 0.02</td>
<td valign="top" align="center">85.19 &#x000B1; 0.02</td>
<td valign="top" align="center">79.51 &#x000B1; 0.03</td>
<td valign="top" align="center">77.68 &#x000B1; 0.02</td>
<td valign="top" align="center">78.00 &#x000B1; 0.02</td>
<td valign="top" align="center">83.22 &#x000B1; 0.03</td>
</tr>
<tr>
<td valign="top" align="left">XLNet (<xref ref-type="bibr" rid="B37">37</xref>)</td>
<td valign="top" align="center">81.63 &#x000B1; 0.03</td>
<td valign="top" align="center">82.12 &#x000B1; 0.02</td>
<td valign="top" align="center">80.88 &#x000B1; 0.03</td>
<td valign="top" align="center">84.45 &#x000B1; 0.02</td>
<td valign="top" align="center">78.33 &#x000B1; 0.02</td>
<td valign="top" align="center">78.71 &#x000B1; 0.02</td>
<td valign="top" align="center">77.12 &#x000B1; 0.03</td>
<td valign="top" align="center">82.88 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">Electra (<xref ref-type="bibr" rid="B38">38</xref>)</td>
<td valign="top" align="center">84.11 &#x000B1; 0.02</td>
<td valign="top" align="center">80.23 &#x000B1; 0.02</td>
<td valign="top" align="center">82.01 &#x000B1; 0.02</td>
<td valign="top" align="center">86.02 &#x000B1; 0.03</td>
<td valign="top" align="center">80.17 &#x000B1; 0.02</td>
<td valign="top" align="center">78.42 &#x000B1; 0.03</td>
<td valign="top" align="center">78.95 &#x000B1; 0.02</td>
<td valign="top" align="center">83.51 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">RoBERTa (<xref ref-type="bibr" rid="B39">39</xref>)</td>
<td valign="top" align="center">85.33 &#x000B1; 0.03</td>
<td valign="top" align="center">83.88 &#x000B1; 0.02</td>
<td valign="top" align="center">83.50 &#x000B1; 0.02</td>
<td valign="top" align="center">87.13 &#x000B1; 0.03</td>
<td valign="top" align="center">82.84 &#x000B1; 0.02</td>
<td valign="top" align="center">81.45 &#x000B1; 0.03</td>
<td valign="top" align="center">81.96 &#x000B1; 0.02</td>
<td valign="top" align="center">85.67 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys</bold></td>
<td valign="top" align="center"><bold>89.62</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>88.09</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>87.77</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>90.45</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>86.33</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>84.57</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>85.21</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>88.79</bold> <bold>&#x000B1;0.02</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The bolded values represent the optimal values.</p>
</table-wrap-foot>
</table-wrap>
<p>In more complex large-scale scenarios, the superiority of our method becomes even more evident. On the Kinetics dataset, our model achieves 89.62% accuracy, exceeding RoBERTa (85.33%) and BERT (83.27%) by a margin of over 4% and 6%, respectively. The performance boost is even more pronounced in Recall (88.09%) and F1 Score (87.77%), which are crucial for real-world deployments involving imbalanced or fine-grained sentiment categories. A similar pattern is observed on THUMOS, where our model records 86.33% accuracy and an AUC of 88.79%, outperforming RoBERTa by 3.49% and 3.12%, respectively. Importantly, the superior AUC across all datasets confirms that our model produces well-calibrated probability outputs, reducing overconfident misclassifications. These results highlight our method&#x00027;s capability to generalize from pre-training on large-scale data while preserving context-specific sensitivity via temporal alignment and contextual fusion. The shortcomings of other models&#x02014;such as XLNet&#x00027;s performance drop on THUMOS&#x02014;suggest that sequential attention alone is insufficient for long-range dependency modeling without temporal localization mechanisms. In contrast, our design incorporates temporal granularity and structured regularization, enabling fine control over spatio-temporal sentiment clues.</p>
<p>The reasons behind these consistent improvements lie in the architectural innovations embedded in our model. My hierarchical temporal encoder captures both local transitions and global evolution in video sequences, enabling multi-resolution sentiment reasoning. Unlike static frame-level embeddings or average pooling strategies used in baseline methods, we apply cross-frame token fusion that dynamically weighs emotional relevance across time. This contributes to higher recall and F1, as the model retrieves more relevant cues. Furthermore, the dynamic token interaction mechanism enhances temporal consistency and enables context-preserving attention. My regularization strategies&#x02014;such as temporal dropout and stochastic depth&#x02014;prevent overfitting, especially on smaller datasets like HMDB-51, while still allowing effective convergence. The strong results across four datasets also benefit from a pretraining-finetuning paradigm where Kinetics-pretrained weights initialize temporal reasoning modules. This transfer learning approach enables our model to retain general video priors while adapting to task-specific sentiment targets. The combination of flexible temporal modeling, cross-modal attention, and robust optimization accounts for the empirical gains, affirming our method&#x00027;s effectiveness in sentiment recognition from complex video sequences.</p>
<p>To address the core challenges of interpretability and bias in healthcare sentiment analysis, our evaluation strategy incorporates explicit and quantitative assessment metrics for both dimensions. For interpretability, we implement two widely adopted metrics: explanation fidelity and explanation stability. Fidelity is defined as the degree to which the top-k important tokens, identified by the model, directly influence the final prediction outcome. This is measured by selectively masking or perturbing those tokens and observing the change in prediction confidence. Stability, on the other hand, evaluates how consistent the set of salient tokens remains under slight input perturbations&#x02014;quantified using the Jaccard similarity between token sets before and after perturbation. These metrics are reported in <xref ref-type="table" rid="T5">Table 5</xref> and demonstrate that our proposed Attribution-Guided Regularization (AGR) consistently outperforms conventional methods including LIME, SHAP, Integrated Gradients, and attention-based explanations, indicating higher semantic alignment and robustness. To assess and mitigate bias, our model incorporates multiple structural strategies: domain-adversarial training, contrastive representation alignment, and attribution-based regularization. These mechanisms are not only theoretically designed to eliminate domain-specific confounding factors but are also empirically validated. My evaluation spans across two diverse clinical datasets, HealthReview-C, and CADEC, where the model demonstrates strong cross-domain generalization (<xref ref-type="table" rid="T6">Tables 6</xref>, <xref ref-type="table" rid="T7">7</xref>). In particular, the domain-invariant training is indirectly evaluated by observing performance consistency across datasets, with significantly reduced variance in F1 and AUC metrics. Furthermore, the inclusion of cross-domain sentiment alignment loss promotes semantic consistency in sentiment encoding irrespective of domain-specific artifacts. This reflects the model&#x00027;s resilience to dataset bias. The combination of these experimental setups allows us to quantitatively and practically validate the core claims of improved interpretability and reduced bias, thereby aligning with the primary goals of the proposed framework.</p>
</sec>
<sec>
<label>4.4</label>
<title>Ablation study</title>
<p>To investigate the role of essential components in our architecture, we carried out a thorough ablation experiment across four widely-used datasets: UCF-101, HMDB-51, Kinetics, and THUMOS. <xref ref-type="table" rid="T3">Tables 3</xref>, <xref ref-type="table" rid="T4">4</xref> display the results, in which we incrementally omit three primary innovations: Sentiment-Modulated Transformer Layers (SMTL), Gated Attention Pooling (GAP), and the Domain-Adversarial Contrastive Framework (DACF). The results reveal that each part substantially boosts the model&#x00027;s predictive performance.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Results of module ablation experiments on UCF-101 and HMDB-51.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center" colspan="4"><bold>UCF-101 dataset</bold></th>
<th valign="top" align="center" colspan="4"><bold>HMDB-51 dataset</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1 score</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1 score</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">w/o SMTL</td>
<td valign="top" align="center">89.45 &#x000B1; 0.02</td>
<td valign="top" align="center">87.62 &#x000B1; 0.02</td>
<td valign="top" align="center">88.03 &#x000B1; 0.03</td>
<td valign="top" align="center">90.25 &#x000B1; 0.02</td>
<td valign="top" align="center">83.07 &#x000B1; 0.02</td>
<td valign="top" align="center">81.33 &#x000B1; 0.03</td>
<td valign="top" align="center">81.95 &#x000B1; 0.02</td>
<td valign="top" align="center">85.90 &#x000B1; 0.03</td>
</tr>
<tr>
<td valign="top" align="left">w/o GAP</td>
<td valign="top" align="center">90.13 &#x000B1; 0.02</td>
<td valign="top" align="center">88.55 &#x000B1; 0.03</td>
<td valign="top" align="center">88.72 &#x000B1; 0.02</td>
<td valign="top" align="center">91.04 &#x000B1; 0.03</td>
<td valign="top" align="center">84.26 &#x000B1; 0.03</td>
<td valign="top" align="center">82.48 &#x000B1; 0.02</td>
<td valign="top" align="center">82.74 &#x000B1; 0.02</td>
<td valign="top" align="center">86.77 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">w/o DACF</td>
<td valign="top" align="center">91.24 &#x000B1; 0.02</td>
<td valign="top" align="center">89.33 &#x000B1; 0.02</td>
<td valign="top" align="center">89.57 &#x000B1; 0.02</td>
<td valign="top" align="center">91.80 &#x000B1; 0.02</td>
<td valign="top" align="center">85.34 &#x000B1; 0.02</td>
<td valign="top" align="center">83.87 &#x000B1; 0.02</td>
<td valign="top" align="center">84.02 &#x000B1; 0.02</td>
<td valign="top" align="center">87.51 &#x000B1; 0.03</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys</bold></td>
<td valign="top" align="center"><bold>92.31</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>90.87</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>91.13</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>93.02</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>87.12</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>85.78</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>86.45</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>89.33</bold> <bold>&#x000B1;0.03</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The bolded values represent the optimal values.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Impact assessment of model variants on kinetics and THUMOS datasets.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center" colspan="4"><bold>Kinetics dataset</bold></th>
<th valign="top" align="center" colspan="4"><bold>THUMOS dataset</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1 score</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1 score</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">w/o SMTL</td>
<td valign="top" align="center">86.07 &#x000B1; 0.02</td>
<td valign="top" align="center">84.35 &#x000B1; 0.02</td>
<td valign="top" align="center">84.89 &#x000B1; 0.03</td>
<td valign="top" align="center">87.11 &#x000B1; 0.03</td>
<td valign="top" align="center">82.02 &#x000B1; 0.02</td>
<td valign="top" align="center">80.74 &#x000B1; 0.02</td>
<td valign="top" align="center">80.21 &#x000B1; 0.03</td>
<td valign="top" align="center">84.71 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">w/o GAP</td>
<td valign="top" align="center">87.94 &#x000B1; 0.03</td>
<td valign="top" align="center">85.67 &#x000B1; 0.02</td>
<td valign="top" align="center">86.43 &#x000B1; 0.02</td>
<td valign="top" align="center">88.30 &#x000B1; 0.02</td>
<td valign="top" align="center">83.79 &#x000B1; 0.02</td>
<td valign="top" align="center">82.18 &#x000B1; 0.03</td>
<td valign="top" align="center">82.46 &#x000B1; 0.02</td>
<td valign="top" align="center">86.11 &#x000B1; 0.03</td>
</tr>
<tr>
<td valign="top" align="left">w/o DACF</td>
<td valign="top" align="center">88.58 &#x000B1; 0.02</td>
<td valign="top" align="center">86.91 &#x000B1; 0.02</td>
<td valign="top" align="center">86.15 &#x000B1; 0.02</td>
<td valign="top" align="center">89.35 &#x000B1; 0.03</td>
<td valign="top" align="center">84.90 &#x000B1; 0.03</td>
<td valign="top" align="center">83.44 &#x000B1; 0.02</td>
<td valign="top" align="center">83.99 &#x000B1; 0.02</td>
<td valign="top" align="center">87.65 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys</bold></td>
<td valign="top" align="center"><bold>89.62</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>88.09</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>87.77</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>90.45</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>86.33</bold> <bold>&#x000B1;0.03</bold></td>
<td valign="top" align="center"><bold>84.57</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>85.21</bold> <bold>&#x000B1;0.02</bold></td>
<td valign="top" align="center"><bold>88.79</bold> <bold>&#x000B1;0.02</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The bolded values represent the optimal values.</p>
</table-wrap-foot>
</table-wrap>
<p>On UCF-101, the complete model achieves 92.31% accuracy, while removing SMTL results in a decline to 89.45%. GAP contributes to sentence-level representation aggregation, and its removal reduces accuracy to 90.13%. DACF, designed for domain-invariant sentiment learning, also proves critical; removing it causes accuracy to drop to 91.24%. Similar trends are observed on HMDB-51, where the absence of SMTL, GAP, and DACF leads to notable performance degradations, particularly in metrics like F1 Score and AUC, underscoring their complementary roles in handling complex video sentiment cues.</p>
<p>On larger datasets like Kinetics and THUMOS, the complete model achieves 89.62% and 86.33% accuracy, respectively. SMTL plays a key role in encoding sentiment-rich features, as its removal causes a drop of 3.55% on Kinetics and 4.31% on THUMOS. GAP enhances sentence-level feature aggregation, and removing it results in declines of 1.68% and 2.54% in accuracy. DACF ensures sentiment consistency across domains, and its ablation leads to noticeable decreases in recall and AUC. These results confirm that each component contributes uniquely to sentiment representation through hierarchical fusion, contextual attention, and domain adaptation.</p>
<p>I conducted supplementary experiments on two authoritative medical sentiment datasets, HealthReview-C (<xref ref-type="bibr" rid="B32">32</xref>) and CADEC (<xref ref-type="bibr" rid="B33">33</xref>). These datasets consist of real-world patient-generated texts such as health-related reviews and drug effect complaints, making them more appropriate for evaluating sentiment models in the healthcare domain. The results in <xref ref-type="table" rid="T5">Table 5</xref> demonstrate that the proposed SMEN&#x0002B;CPDS framework consistently outperforms all baselines across four key evaluation metrics including accuracy, recall, F1 score, and AUC. On HealthReview-C, our model achieves an accuracy of 90.35 percent, which is 2.64 percent higher than RoBERTa and significantly exceeds the performance of earlier models such as BERT and BiLSTM. The model also achieves an F1 score of 88.75 percent and an AUC of 91.42 percent, reflecting its ability to balance precision and recall while maintaining reliable probabilistic outputs. On CADEC, the model achieves 89.11 percent accuracy, again surpassing RoBERTa by 2.73 percent. The improvements on recall, F1 score, and AUC further confirm the model&#x00027;s ability to generalize across different clinical sentiment scenarios. These findings verify that our framework not only aligns with the paper&#x00027;s stated objective of addressing sentiment analysis in healthcare but also delivers superior performance in realistic medical language settings. The integration of sentiment modulation and domain-invariant training proves to be effective for extracting interpretable and reliable sentiment representations from clinical texts. This additional experiment reinforces the validity of the proposed method within its intended application domain.</p>
<p>To conduct a comprehensive assessment of interpretability performance, we compare our proposed attribution-guided regularization (AGR) with four widely used explanation techniques including LIME, SHAP, Integrated Gradients (IG), and attention weight visualization. The evaluation focuses on two interpretability dimensions: fidelity and stability. Fidelity measures the extent to which identified important tokens align with the actual model decision, while stability quantifies the consistency of explanations under small input perturbations using Jaccard similarity between top-k token sets. As shown in <xref ref-type="table" rid="T6">Table 6</xref>, our AGR method achieves the highest fidelity scores on both HealthReview-C (84.67 percent) and CADEC (82.34 percent), outperforming SHAP and IG by margins ranging from 5 to 10 percent. This indicates that our method provides more decision-aligned token-level explanations. In terms of stability, AGR again leads with 78.91 percent and 76.58 percent, while other methods such as LIME and SHAP exhibit higher sensitivity to input noise. Integrated Gradients performs better than model-agnostic methods but still falls short compared to AGR, suggesting that regularizing explanation consistency during training offers notable benefits. Attention scores offer basic interpretability but are less robust across inputs, consistent with prior findings on the limitations of raw attention as explanation. These results confirm that AGR not only provides high-quality explanations closely aligned with model behavior but also maintains superior robustness across minor textual variations. Therefore, it is a stronger candidate for use in high-stakes clinical NLP applications where interpretability must be both faithful and stable.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Comparison of SMEN&#x0002B;CPDS with SOTA models on healthcare sentiment datasets.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Recall (%)</bold></th>
<th valign="top" align="center"><bold>F1 score (%)</bold></th>
<th valign="top" align="center"><bold>AUC (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" align="left" colspan="5"><italic><bold>HealthReview-C dataset</bold></italic></th>
</tr>
<tr>
<td valign="top" align="left">BiLSTM (<xref ref-type="bibr" rid="B34">34</xref>)</td>
<td valign="top" align="center">83.24</td>
<td valign="top" align="center">81.73</td>
<td valign="top" align="center">81.12</td>
<td valign="top" align="center">85.09</td>
</tr>
<tr>
<td valign="top" align="left">TextCNN (<xref ref-type="bibr" rid="B35">35</xref>)</td>
<td valign="top" align="center">84.50</td>
<td valign="top" align="center">83.01</td>
<td valign="top" align="center">82.87</td>
<td valign="top" align="center">86.33</td>
</tr>
<tr>
<td valign="top" align="left">BERT (<xref ref-type="bibr" rid="B36">36</xref>)</td>
<td valign="top" align="center">86.42</td>
<td valign="top" align="center">84.78</td>
<td valign="top" align="center">85.21</td>
<td valign="top" align="center">88.01</td>
</tr>
<tr>
<td valign="top" align="left">RoBERTa (<xref ref-type="bibr" rid="B39">39</xref>)</td>
<td valign="top" align="center">87.71</td>
<td valign="top" align="center">85.63</td>
<td valign="top" align="center">86.03</td>
<td valign="top" align="center">89.25</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys (SMEN&#x0002B;CPDS)</bold></td>
<td valign="top" align="center"><bold>90.35</bold></td>
<td valign="top" align="center"><bold>88.49</bold></td>
<td valign="top" align="center"><bold>88.75</bold></td>
<td valign="top" align="center"><bold>91.42</bold></td>
</tr>
<tr>
<th valign="top" align="left" colspan="5"><italic><bold>CADEC dataset</bold></italic></th>
</tr>
<tr>
<td valign="top" align="left">BiLSTM (<xref ref-type="bibr" rid="B34">34</xref>)</td>
<td valign="top" align="center">80.13</td>
<td valign="top" align="center">78.60</td>
<td valign="top" align="center">78.94</td>
<td valign="top" align="center">82.34</td>
</tr>
<tr>
<td valign="top" align="left">TextCNN (<xref ref-type="bibr" rid="B35">35</xref>)</td>
<td valign="top" align="center">81.57</td>
<td valign="top" align="center">79.74</td>
<td valign="top" align="center">80.33</td>
<td valign="top" align="center">83.70</td>
</tr>
<tr>
<td valign="top" align="left">BERT (<xref ref-type="bibr" rid="B36">36</xref>)</td>
<td valign="top" align="center">84.92</td>
<td valign="top" align="center">82.89</td>
<td valign="top" align="center">83.35</td>
<td valign="top" align="center">86.47</td>
</tr>
<tr>
<td valign="top" align="left">RoBERTa (<xref ref-type="bibr" rid="B39">39</xref>)</td>
<td valign="top" align="center">86.38</td>
<td valign="top" align="center">84.27</td>
<td valign="top" align="center">84.91</td>
<td valign="top" align="center">88.20</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys (SMEN&#x0002B;CPDS)</bold></td>
<td valign="top" align="center"><bold>89.11</bold></td>
<td valign="top" align="center"><bold>86.85</bold></td>
<td valign="top" align="center"><bold>87.24</bold></td>
<td valign="top" align="center"><bold>90.57</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The bolded values represent the optimal values.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Comparison of explanation fidelity and stability across interpretability methods.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center" colspan="2"><bold>Fidelity (%)</bold></th>
<th valign="top" align="center" colspan="2"><bold>Stability (Jaccard %)</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>HealthReview-C</bold></th>
<th valign="top" align="center"><bold>CADEC</bold></th>
<th valign="top" align="center"><bold>HealthReview-C</bold></th>
<th valign="top" align="center"><bold>CADEC</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">LIME (<xref ref-type="bibr" rid="B40">40</xref>)</td>
<td valign="top" align="center">72.14</td>
<td valign="top" align="center">70.43</td>
<td valign="top" align="center">65.28</td>
<td valign="top" align="center">61.87</td>
</tr>
<tr>
<td valign="top" align="left">SHAP (<xref ref-type="bibr" rid="B41">41</xref>)</td>
<td valign="top" align="center">75.92</td>
<td valign="top" align="center">73.76</td>
<td valign="top" align="center">68.10</td>
<td valign="top" align="center">64.32</td>
</tr>
<tr>
<td valign="top" align="left">Integrated gradients (<xref ref-type="bibr" rid="B42">42</xref>)</td>
<td valign="top" align="center">78.41</td>
<td valign="top" align="center">75.93</td>
<td valign="top" align="center">70.27</td>
<td valign="top" align="center">67.50</td>
</tr>
<tr>
<td valign="top" align="left">Attention lights (<xref ref-type="bibr" rid="B43">43</xref>)</td>
<td valign="top" align="center">76.83</td>
<td valign="top" align="center">74.02</td>
<td valign="top" align="center">66.11</td>
<td valign="top" align="center">63.94</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys (AGR)</bold></td>
<td valign="top" align="center"><bold>84.67</bold></td>
<td valign="top" align="center"><bold>82.34</bold></td>
<td valign="top" align="center"><bold>78.91</bold></td>
<td valign="top" align="center"><bold>76.58</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The bolded values represent the optimal values.</p>
</table-wrap-foot>
</table-wrap>
<p>To provide a comprehensive view of our architecture, <xref ref-type="fig" rid="F5">Figure 5</xref> illustrates the full workflow of the proposed sentiment analysis framework. The model operates in two synergistic stages. In the first stage, raw clinical text inputs are tokenized and passed through the Sentiment Modulated Encoding Network (SMEN), which enhances sentiment-specific representations by employing gated transformer layers and a global attention pooling mechanism. This yields a dense sentence-level vector that captures both semantic and affective cues. In the second stage, the Contextual Polarity Decoupling Scheme (CPDS) refines the representation by promoting domain invariance and guiding interpretability. This is achieved through an adversarial domain discriminator that penalizes domain-specific encoding, a contrastive loss to align sentiment expressions across contexts, and an attribution-guided regularization term that ensures focus on sentiment-bearing tokens. The CPDS also produces token-level attribution maps, which serve as an interpretable explanation of the model&#x00027;s decision. The final output includes both the predicted sentiment label and a visualizable attribution score for each token, enabling transparent and bias-resistant analysis in clinical sentiment applications.</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Overall architecture of the proposed framework. The system consists of two main components: the Sentiment Modulated Encoding Network (SMEN) for sentiment-rich representation learning and the Contextual Polarity Decoupling Scheme (CPDS) for bias mitigation and interpretability. Input text is first processed by SMEN, which dynamically encodes sentiment-relevant features using gated transformer layers and outputs a sentence-level representation. CPDS then enforces domain-invariant learning through adversarial and contrastive objectives and guides token-level explanation via attribution regularization. The final output includes both sentiment prediction and interpretable token attributions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpubh-13-1663871-g0005.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a sentiment analysis model. It begins with &#x0201C;Tokenizer &#x00026; Embedding,&#x0201D; followed by &#x0201C;Gated Attention,&#x0201D; which connects to &#x0201C;Sentiment Classifier.&#x0201D; Two branches emerge: the first, &#x0201C;Sentiment Representation z,&#x0201D; and the second, &#x0201C;CPDS Branch,&#x0201D; focusing on domain-invariant learning with components such as domain discriminator and contrastive loss. Both branches lead to &#x0201C;Polarity Explanation Module&#x0201D; and &#x0201C;Token-Level Attribution Output,&#x0201D; shown as a heatmap highlighting words like &#x0201C;dizziness&#x0201D; and &#x0201C;anxiety.&#x0201D; </alt-text>
</graphic>
</fig>
<p>To address the concern regarding the use of outdated baselines, we conducted additional experiments comparing our method with several advanced sentiment analysis models published after 2022. These include instruction-tuned and domain-adapted models such as InstructABSA, LLaMA-Finetune-SA, BioBERT with Adapter modules, and DeBERTa-V3. All models were fine-tuned and evaluated on the same clinical datasets, HealthReview-C and CADEC. As shown in <xref ref-type="table" rid="T7">Table 7</xref>, our method consistently outperforms all recent baselines across all four key evaluation metrics. On HealthReview-C, SMEN&#x0002B;CPDS achieves an accuracy of 90.35%, outperforming DeBERTa-V3 by 1.44% and BioBERT&#x0002B;Adapter by 2.22%. Similar trends are observed on the CADEC dataset, where our model reaches 89.11% accuracy and the highest F1 score of 87.24%. These results demonstrate that the proposed framework not only remains competitive against recent architectures but also offers superior robustness and interpretability, particularly in healthcare sentiment tasks.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Comparison with recent post-2022 sentiment analysis models on clinical datasets.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Recall (%)</bold></th>
<th valign="top" align="center"><bold>F1 score (%)</bold></th>
<th valign="top" align="center"><bold>AUC (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" align="left" colspan="5"><italic><bold>HealthReview-C dataset</bold></italic></th>
</tr>
<tr>
<td valign="top" align="left">InstructABSA (<xref ref-type="bibr" rid="B44">44</xref>)</td>
<td valign="top" align="center">87.84</td>
<td valign="top" align="center">85.66</td>
<td valign="top" align="center">86.21</td>
<td valign="top" align="center">88.79</td>
</tr>
<tr>
<td valign="top" align="left">LLaMA-Finetune-SA (<xref ref-type="bibr" rid="B45">45</xref>)</td>
<td valign="top" align="center">86.75</td>
<td valign="top" align="center">84.53</td>
<td valign="top" align="center">85.07</td>
<td valign="top" align="center">87.42</td>
</tr>
<tr>
<td valign="top" align="left">BioBERT&#x0002B;Adapter (<xref ref-type="bibr" rid="B46">46</xref>)</td>
<td valign="top" align="center">88.13</td>
<td valign="top" align="center">86.02</td>
<td valign="top" align="center">86.45</td>
<td valign="top" align="center">89.12</td>
</tr>
<tr>
<td valign="top" align="left">DeBERTa-V3 (<xref ref-type="bibr" rid="B47">47</xref>)</td>
<td valign="top" align="center">88.91</td>
<td valign="top" align="center">86.45</td>
<td valign="top" align="center">87.13</td>
<td valign="top" align="center">89.86</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys (SMEN&#x0002B;CPDS)</bold></td>
<td valign="top" align="center"><bold>90.35</bold></td>
<td valign="top" align="center"><bold>88.49</bold></td>
<td valign="top" align="center"><bold>88.75</bold></td>
<td valign="top" align="center"><bold>91.42</bold></td>
</tr>
<tr>
<th valign="top" align="left" colspan="5"><italic><bold>CADEC dataset</bold></italic></th>
</tr>
<tr>
<td valign="top" align="left">InstructABSA (<xref ref-type="bibr" rid="B44">44</xref>)</td>
<td valign="top" align="center">85.91</td>
<td valign="top" align="center">84.32</td>
<td valign="top" align="center">84.75</td>
<td valign="top" align="center">86.37</td>
</tr>
<tr>
<td valign="top" align="left">LLaMA-Finetune-SA (<xref ref-type="bibr" rid="B45">45</xref>)</td>
<td valign="top" align="center">84.63</td>
<td valign="top" align="center">82.27</td>
<td valign="top" align="center">83.11</td>
<td valign="top" align="center">85.14</td>
</tr>
<tr>
<td valign="top" align="left">BioBERT&#x0002B;Adapter (<xref ref-type="bibr" rid="B46">46</xref>)</td>
<td valign="top" align="center">86.72</td>
<td valign="top" align="center">84.88</td>
<td valign="top" align="center">85.35</td>
<td valign="top" align="center">87.48</td>
</tr>
<tr>
<td valign="top" align="left">DeBERTa-V3 (<xref ref-type="bibr" rid="B47">47</xref>)</td>
<td valign="top" align="center">87.48</td>
<td valign="top" align="center">85.17</td>
<td valign="top" align="center">85.86</td>
<td valign="top" align="center">88.25</td>
</tr>
<tr>
<td valign="top" align="left"><bold>Mys (SMEN&#x0002B;CPDS)</bold></td>
<td valign="top" align="center"><bold>89.11</bold></td>
<td valign="top" align="center"><bold>86.85</bold></td>
<td valign="top" align="center"><bold>87.24</bold></td>
<td valign="top" align="center"><bold>90.57</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The bolded values represent the optimal values.</p>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec sec-type="discussion" id="s5">
<label>5</label>
<title>Discussion</title>
<p>Although the current study does not include a human-in-the-loop evaluation with clinical experts, we recognize the importance of incorporating domain-specific validation for interpretability assessment, especially in high-stakes healthcare applications. The interpretability techniques proposed in our work, including attribution-guided regularization and polarity-based token-level explanations, are primarily evaluated through quantitative metrics such as explanation fidelity and stability. These objective measures are widely used in the explainable AI literature and serve as an essential first step in benchmarking model transparency. However, we acknowledge that such metrics may not fully capture the contextual relevance and semantic alignment required in clinical reasoning. To address this limitation, we have outlined a plan for conducting a small-scale user study involving healthcare professionals in future work. The proposed evaluation would involve presenting clinicians with model-generated explanations for sentiment predictions on real-world medical texts and collecting their feedback through structured questionnaires and qualitative interviews. Metrics such as explanation usefulness, clarity, and trust alignment would be incorporated to quantify the perceived interpretability. Such a study would help assess whether the identified salient tokens align with clinical judgment and whether the explanations support decision-making. I view our current contributions as establishing the foundational framework and mechanisms necessary for robust and interpretable sentiment modeling in healthcare, upon which human-centered evaluation protocols can be layered. Integrating clinical expertise into the interpretability assessment process is a logical and important next step, and we are actively working toward developing such collaborations for future iterations of this research.</p>
<p>Although interpretability is a primary focus of our framework, bias mitigation is also an integral component addressed through the Contextual Polarity Decoupling Scheme (CPDS). In healthcare sentiment analysis, biases may originate from imbalanced datasets that overrepresent certain patient groups, annotation subjectivity that introduces systematic skew, or model architectures that inadvertently amplify domain-specific linguistic patterns. My approach directly tackles these issues by promoting domain-invariant and semantically faithful sentiment representations. CPDS incorporates a domain-adversarial contrastive learning strategy to decouple sentiment from non-generalizable, domain-specific features. By introducing a domain discriminator with gradient reversal, the model is penalized for learning features that reveal the domain of origin, thereby encouraging representations that are independent of demographic or contextual artifacts. Simultaneously, contrastive learning aligns sentiment representations across domains, enhancing intra-class compactness and inter-class separability. This ensures that similar sentiments, regardless of domain, are mapped to similar feature spaces. Furthermore, the attribution-guided regularization component steers the model to prioritize linguistically meaningful and sentiment-rich tokens, reducing the risk of spurious correlations that could reflect dataset bias. This mechanism helps the model rely on clinically relevant expressions rather than confounding factors that vary across datasets or populations. While we do not include explicit demographic fairness metrics in the current version, the improved domain generalization observed in our experiments reflects reduced sensitivity to domain-specific biases. In future work, we aim to include fairness-aware evaluation protocols with demographic subgroups to further quantify bias mitigation, ensuring equitable and trustworthy model behavior in real-world clinical applications.</p></sec>
<sec id="s6">
<label>6</label>
<title>Conclusions and future work</title>
<p>This study intends to resolve ongoing difficulties related to fairness and model interpretability in healthcare-focused sentiment classification. Recognizing the limitations of traditional approaches&#x02014;namely their poor generalization across medical sub-domains due to domain shifts and linguistic ambiguity&#x02014;we developed a novel framework centered on both performance and transparency. My method introduces a formal probabilistic modeling approach with fine-grained sentiment distinctions and domain-aware priors. Central to our approach is the Sentiment Modulated Encoding Network (SMEN), a transformer-based architecture enhanced with a unique gating mechanism that selectively emphasizes sentiment-rich features. Complementing this, the Context Polarity Decoupling Scheme (CPDS) uses adversarial and contrastive training to isolate sentiment signals from domain-specific noise, and a polarity explanation module delivers token-level interpretability. Experiments across multiple clinical datasets reveal that our framework not only surpasses existing models in accuracy but also yields more interpretable and domain-invariant outputs.</p>
<p>Despite these advancements, two notable limitations remain. While CPDS aims to generalize across domains, its performance may degrade when exposed to extreme domain shifts or poorly data, indicating a need for more robust adaptation strategies. My interpretability module, though effective at the token level, may not yet offer sufficient transparency for complex clinical decision-making scenarios where causal reasoning is crucial. Looking forward, future work will explore integrating causal inference techniques and leveraging larger, more diverse clinical corpora to further enhance model robustness and interpretive depth. This research contributes a promising direction for building ethical and trustworthy AI systems in healthcare.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>CW: Conceptualization, Methodology, Software, Validation, Writing &#x02013; original draft. ZM: Formal analysis, Investigation, Data curation, Writing &#x02013; original draft. HZ: Funding acquisition, Supervision, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author declares that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Miah</surname> <given-names>MSU</given-names></name> <name><surname>Kabir</surname> <given-names>M</given-names></name> <name><surname>Sarwar</surname> <given-names>TB</given-names></name> <name><surname>Safran</surname> <given-names>MS</given-names></name> <name><surname>Alfarhood</surname> <given-names>S</given-names></name> <name><surname>Mridha</surname> <given-names>MF</given-names></name> <etal/></person-group>. <article-title>multimodal approach to cross-lingual sentiment analysis with ensemble of transformer and LLM</article-title>. <source>Sci Rep</source>. (<year>2024</year>) <volume>14</volume>:<fpage>9603</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-024-60210-7</pub-id></mixed-citation>
</ref>
<ref id="B2">
<label>2.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>W</given-names></name> <name><surname>Deng</surname> <given-names>Y</given-names></name> <name><surname>Liu</surname> <given-names>BQ</given-names></name> <name><surname>Pan</surname> <given-names>SJ</given-names></name> <name><surname>Bing</surname> <given-names>L</given-names></name></person-group>. <article-title>Sentiment analysis in the era of large language models: a reality check</article-title>. In: <source>NAACL-HLT</source>. (<year>2023</year>). doi: <pub-id pub-id-type="doi">10.18653/v1/2024.findings-naacl.246</pub-id></mixed-citation>
</ref>
<ref id="B3">
<label>3.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeb</surname> <given-names>S</given-names></name> <name><surname>Nizamullah</surname> <given-names>F</given-names></name> <name><surname>Abbasi</surname> <given-names>N</given-names></name> <name><surname>Fahad</surname> <given-names>M</given-names></name></person-group>. <article-title>AI in healthcare: revolutionizing diagnosis and therapy</article-title>. <source>Int J Multidisc Sci Arts</source>. (<year>2024</year>) <volume>3</volume>:<fpage>118</fpage>&#x02013;<lpage>28</lpage>. doi: <pub-id pub-id-type="doi">10.47709/ijmdsa.v3i3.4546</pub-id></mixed-citation>
</ref>
<ref id="B4">
<label>4.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname> <given-names>H</given-names></name> <name><surname>Dai</surname> <given-names>J</given-names></name> <name><surname>Ji</surname> <given-names>T</given-names></name> <name><surname>Qiu</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>Z</given-names></name> <name><surname>A</surname></name></person-group>. <article-title>Unified generative framework for aspect-based sentiment analysis</article-title>. In: <source>Annual Meeting of the Association for Computational Linguistics</source>. (<year>2021</year>). doi: <pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.188</pub-id></mixed-citation>
</ref>
<ref id="B5">
<label>5.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>C</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Wang</surname> <given-names>S</given-names></name> <name><surname>Zhang</surname> <given-names>Y</given-names></name></person-group>. <article-title>A review of IoT applications in healthcare</article-title>. <source>Neurocomputing</source>. (<year>2024</year>) <volume>565</volume>:<fpage>127017</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neucom.2023.127017</pub-id></mixed-citation>
</ref>
<ref id="B6">
<label>6.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Esmaeilzadeh</surname> <given-names>P</given-names></name></person-group>. <article-title>Challenges and strategies for wide-scale artificial intelligence (AI) deployment in healthcare practices: a perspective for healthcare organizations</article-title>. <source>Artif Intell Med</source>. (<year>2024</year>) <volume>151</volume>:<fpage>102861</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.artmed.2024.102861</pub-id><pub-id pub-id-type="pmid">38555850</pub-id></mixed-citation>
</ref>
<ref id="B7">
<label>7.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>R</given-names></name> <name><surname>Chen</surname> <given-names>H</given-names></name> <name><surname>Feng</surname> <given-names>F</given-names></name> <name><surname>Ma</surname> <given-names>Z</given-names></name> <name><surname>Wang</surname> <given-names>X</given-names></name> <name><surname>Hovy</surname> <given-names>E</given-names></name></person-group>. <article-title>Dual graph convolutional networks for aspect-based sentiment analysis</article-title>. In: <source>Annual Meeting of the Association for Computational Linguistics</source>. (<year>2021</year>). doi: <pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.494</pub-id></mixed-citation>
</ref>
<ref id="B8">
<label>8.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Das</surname> <given-names>R</given-names></name> <name><surname>Singh</surname> <given-names>TD</given-names></name></person-group>. <article-title>Multimodal sentiment analysis: a survey of methods, trends, and challenges</article-title>. <source>ACM Comput Surv</source>. (<year>2023</year>) <volume>55</volume>:<fpage>1</fpage>&#x02013;<lpage>38</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3586075</pub-id></mixed-citation>
</ref>
<ref id="B9">
<label>9.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>W</given-names></name> <name><surname>Xu</surname> <given-names>H</given-names></name> <name><surname>Meng</surname> <given-names>F</given-names></name> <name><surname>Zhu</surname> <given-names>Y</given-names></name> <name><surname>Ma</surname> <given-names>Y</given-names></name> <name><surname>Wu</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Ch-sims: a Chinese multimodal sentiment analysis dataset with fine-grained annotation of modality</article-title>. In: <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source> (<year>2020</year>). p. <fpage>3718</fpage>&#x02013;<lpage>3727</lpage>. doi: <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.343</pub-id></mixed-citation>
</ref>
<ref id="B10">
<label>10.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>L</given-names></name> <name><surname>Zhu</surname> <given-names>Z</given-names></name> <name><surname>Zhang</surname> <given-names>C</given-names></name> <name><surname>Xu</surname> <given-names>Y</given-names></name> <name><surname>Kong</surname> <given-names>X</given-names></name></person-group>. <article-title>Multimodal sentiment analysis based on fusion methods: a survey</article-title>. <source>Inf Fusion</source>. (<year>2023</year>) <volume>95</volume>:<fpage>306</fpage>&#x02013;<lpage>25</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.inffus.2023.02.028</pub-id></mixed-citation>
</ref>
<ref id="B11">
<label>11.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tan</surname> <given-names>KL</given-names></name> <name><surname>Lee</surname> <given-names>C</given-names></name> <name><surname>Lim</surname> <given-names>K</given-names></name></person-group>. <article-title>A survey of sentiment analysis: approaches, datasets, and future research</article-title>. <source>Appl Sci</source>. (<year>2023</year>) <volume>13</volume>:<fpage>4550</fpage>. doi: <pub-id pub-id-type="doi">10.3390/app13074550</pub-id></mixed-citation>
</ref>
<ref id="B12">
<label>12.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bello</surname> <given-names>A</given-names></name> <name><surname>Ng</surname> <given-names>SC</given-names></name> <name><surname>Leung</surname> <given-names>MF</given-names></name></person-group>. <article-title>A BERT framework to sentiment analysis of Tweets</article-title>. In: <source>Italian National Conference on Sensors</source>. (<year>2023</year>). doi: <pub-id pub-id-type="doi">10.3390/s23010506</pub-id><pub-id pub-id-type="pmid">36617101</pub-id></mixed-citation>
</ref>
<ref id="B13">
<label>13.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qi</surname> <given-names>Y</given-names></name> <name><surname>Shabrina</surname> <given-names>Z</given-names></name></person-group>. <article-title>Sentiment analysis using Twitter data: a comparative application of lexicon- and machine-learning-based approach</article-title>. <source>Soc Netw Anal Min</source>. (<year>2023</year>) <volume>13</volume>:<fpage>31</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s13278-023-01030-x</pub-id><pub-id pub-id-type="pmid">36789379</pub-id></mixed-citation>
</ref>
<ref id="B14">
<label>14.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J</given-names></name> <name><surname>Wang</surname> <given-names>Z</given-names></name> <name><surname>Ho</surname> <given-names>SB</given-names></name> <name><surname>Cambria</surname> <given-names>E</given-names></name></person-group>. <article-title>Survey on sentiment analysis: evolution of research methods and topics</article-title>. <source>Artif Intell Rev</source>. (<year>2023</year>) <volume>56</volume>:<fpage>8469</fpage>&#x02013;<lpage>510</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10462-022-10386-z</pub-id><pub-id pub-id-type="pmid">36628328</pub-id></mixed-citation>
</ref>
<ref id="B15">
<label>15.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Talaat</surname> <given-names>AS</given-names></name></person-group>. <article-title>Sentiment analysis classification system using hybrid BERT models</article-title>. <source>J Big Data</source>. (<year>2023</year>) <volume>10</volume>:<fpage>110</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s40537-023-00781-w</pub-id></mixed-citation>
</ref>
<ref id="B16">
<label>16.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hazarika</surname> <given-names>D</given-names></name> <name><surname>Zimmermann</surname> <given-names>R</given-names></name> <name><surname>Poria</surname> <given-names>S</given-names></name></person-group>. <article-title>MISA: modality-invariant and -specific representations for multimodal sentiment analysis</article-title>. In: <source>Proceedings of the 28th ACM International Conference on Multimedia</source>. (<year>2020</year>). p. <fpage>1122</fpage>&#x02013;<lpage>31</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3394171.3413678</pub-id></mixed-citation>
</ref>
<ref id="B17">
<label>17.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barbieri</surname> <given-names>F</given-names></name> <name><surname>Anke</surname> <given-names>LE</given-names></name> <name><surname>Camacho-Collados</surname> <given-names>J</given-names></name></person-group>. <article-title>XLM-T: Multilingual language models in Twitter for sentiment analysis and beyond</article-title>. <source>arXiv preprint arXiv:210412250</source>. (<year>2021</year>).</mixed-citation>
</ref>
<ref id="B18">
<label>18.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hartmann</surname> <given-names>J</given-names></name> <name><surname>Heitmann</surname> <given-names>M</given-names></name> <name><surname>Siebert</surname> <given-names>C</given-names></name> <name><surname>Schamp</surname> <given-names>C</given-names></name></person-group>. <article-title>More than a feeling: accuracy and application of sentiment analysis</article-title>. <source>Int J Res Market</source>. (<year>2022</year>) <volume>40</volume>:<fpage>75</fpage>&#x02013;<lpage>87</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijresmar.2022.05.005</pub-id></mixed-citation>
</ref>
<ref id="B19">
<label>19.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>W</given-names></name> <name><surname>Li</surname> <given-names>X</given-names></name> <name><surname>Deng</surname> <given-names>Y</given-names></name> <name><surname>Bing</surname> <given-names>L</given-names></name> <name><surname>Lam</surname> <given-names>W</given-names></name></person-group>. <article-title>A survey on aspect-based sentiment analysis: tasks, methods, and challenges</article-title>. <source>IEEE Trans Knowl Data Eng</source>. (<year>2022</year>) <volume>35</volume>:<fpage>11019</fpage>&#x02013;<lpage>38</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2022.3230975</pub-id></mixed-citation>
</ref>
<ref id="B20">
<label>20.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mao</surname> <given-names>R</given-names></name> <name><surname>Liu</surname> <given-names>Q</given-names></name> <name><surname>He</surname> <given-names>K</given-names></name> <name><surname>Li</surname> <given-names>W</given-names></name> <name><surname>Cambria</surname> <given-names>E</given-names></name></person-group>. <article-title>The biases of pre-trained language models: An empirical study on prompt-based sentiment analysis and emotion detection</article-title>. <source>IEEE Trans Affect Comput</source>. (<year>2022</year>) <volume>14</volume>:<fpage>1743</fpage>&#x02013;<lpage>53</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TAFFC.2022.3204972</pub-id></mixed-citation>
</ref>
<ref id="B21">
<label>21.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>W</given-names></name> <name><surname>Li</surname> <given-names>X</given-names></name> <name><surname>Deng</surname> <given-names>Y</given-names></name> <name><surname>Bing</surname> <given-names>L</given-names></name> <name><surname>Lam</surname> <given-names>W</given-names></name></person-group>. <article-title>Towards generative aspect-based sentiment analysis</article-title>. In: <source>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</source>. Association for Computational Linguistics (<year>2021</year>). doi: <pub-id pub-id-type="doi">10.18653/v1/2021.acl-short.64</pub-id></mixed-citation>
</ref>
<ref id="B22">
<label>22.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Maleki Varnosfaderani</surname> <given-names>S</given-names></name> <name><surname>Forouzanfar</surname> <given-names>M</given-names></name></person-group>. <article-title>The role of AI in hospitals and clinics: transforming healthcare in the 21st century</article-title>. <source>Bioengineering</source>. (<year>2024</year>) <volume>11</volume>:<fpage>337</fpage>. doi: <pub-id pub-id-type="doi">10.3390/bioengineering11040337</pub-id><pub-id pub-id-type="pmid">38671759</pub-id></mixed-citation>
</ref>
<ref id="B23">
<label>23.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bordoloi</surname> <given-names>M</given-names></name> <name><surname>Biswas</surname> <given-names>S</given-names></name></person-group>. <article-title>Sentiment analysis: a survey on design framework, applications and future scopes</article-title>. <source>Artif Intell Rev</source>. (<year>2023</year>) <volume>56</volume>:<fpage>12505</fpage>&#x02013;<lpage>60</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10462-023-10442-2</pub-id><pub-id pub-id-type="pmid">37362892</pub-id></mixed-citation>
</ref>
<ref id="B24">
<label>24.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>Y</given-names></name> <name><surname>Huang</surname> <given-names>F</given-names></name> <name><surname>Jiang</surname> <given-names>X</given-names></name> <name><surname>Nie</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>M</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <etal/></person-group>. <article-title>Foundation model for advancing healthcare: Challenges, opportunities and future directions</article-title>. <source>IEEE Rev Biomed Eng</source>. (<year>2024</year>) <volume>18</volume>:<fpage>172</fpage>&#x02013;<lpage>91</lpage>. doi: <pub-id pub-id-type="doi">10.1109/RBME.2024.3496744</pub-id><pub-id pub-id-type="pmid">39531565</pub-id></mixed-citation>
</ref>
<ref id="B25">
<label>25.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>B</given-names></name> <name><surname>Yang</surname> <given-names>H</given-names></name> <name><surname>Liu</surname> <given-names>XY</given-names></name></person-group>. <article-title>Instruct-FinGPT: financial sentiment analysis by instruction tuning of general-purpose large language models</article-title>. <source>arXiv preprint arXiv:2306.12659</source>. (<year>2023</year>).</mixed-citation>
</ref>
<ref id="B26">
<label>26.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>K</given-names></name> <name><surname>Shen</surname> <given-names>W</given-names></name> <name><surname>Yang</surname> <given-names>Y</given-names></name> <name><surname>Quan</surname> <given-names>X</given-names></name> <name><surname>Wang</surname> <given-names>R</given-names></name></person-group>. <article-title>Relational graph attention network for aspect-based sentiment analysis</article-title>. In: <source>Annual Meeting of the Association for Computational Linguistics</source>. (<year>2020</year>). doi: <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.295</pub-id></mixed-citation>
</ref>
<ref id="B27">
<label>27.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wankhade</surname> <given-names>M</given-names></name> <name><surname>Rao</surname> <given-names>AC</given-names></name> <name><surname>Kulkarni</surname> <given-names>C</given-names></name></person-group>. <article-title>A survey on sentiment analysis methods, applications, and challenges</article-title>. <source>Artif Intell Rev</source>. (<year>2022</year>) <volume>55</volume>:<fpage>5731</fpage>&#x02013;<lpage>80</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10462-022-10144-1</pub-id></mixed-citation>
</ref>
<ref id="B28">
<label>28.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zore</surname> <given-names>S</given-names></name> <name><surname>Bhosale</surname> <given-names>A</given-names></name> <name><surname>Chavan</surname> <given-names>P</given-names></name></person-group>. <article-title>Sentiment analysis</article-title>. In: <source>International Journal of Electronics and Computer Applications</source> (<year>2024</year>).</mixed-citation>
</ref>
<ref id="B29">
<label>29.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nandwani</surname> <given-names>P</given-names></name> <name><surname>Verma</surname> <given-names>R</given-names></name></person-group>. <article-title>A review on sentiment analysis and emotion detection from text</article-title>. <source>Soc Netw Anal Min</source>. (<year>2021</year>) <volume>11</volume>:<fpage>81</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s13278-021-00776-6</pub-id><pub-id pub-id-type="pmid">34484462</pub-id></mixed-citation>
</ref>
<ref id="B30">
<label>30.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elgeldawi</surname> <given-names>E</given-names></name> <name><surname>Sayed</surname> <given-names>A</given-names></name> <name><surname>Galal</surname> <given-names>AR</given-names></name> <name><surname>Zaki</surname> <given-names>AM</given-names></name></person-group>. <article-title>Hyperparameter tuning for machine learning algorithms used for arabic sentiment analysis</article-title>. <source>Informatics</source>. (<year>2021</year>) <volume>8</volume>:<fpage>79</fpage>. doi: <pub-id pub-id-type="doi">10.3390/informatics8040079</pub-id></mixed-citation>
</ref>
<ref id="B31">
<label>31.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Silaparasetty</surname> <given-names>V</given-names></name></person-group>. <article-title>Sentiment analysis</article-title>. In: <source>Encyclopedia of Social Network Analysis and Mining</source>. (<year>2025</year>).</mixed-citation>
</ref>
<ref id="B32">
<label>32.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kao</surname> <given-names>GS</given-names></name> <name><surname>Thomas</surname> <given-names>HM</given-names></name></person-group>. <article-title>Test Review: C. Keith Conners Conners 3rd Edition Toronto, Ontario, Canada: Multi-Health Systems, 2008</article-title>. <source>J Psychoeduc Assess</source>. (<year>2010</year>) <volume>28</volume>:<fpage>598</fpage>&#x02013;<lpage>602</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0734282909360011</pub-id></mixed-citation>
</ref>
<ref id="B33">
<label>33.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Karimi</surname> <given-names>S</given-names></name> <name><surname>Metke-Jimenez</surname> <given-names>A</given-names></name> <name><surname>Kemp</surname> <given-names>M</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name></person-group>. <article-title>Cadec: a corpus of adverse drug event annotations</article-title>. <source>J Biomed Inform</source>. (<year>2015</year>) <volume>55</volume>:<fpage>73</fpage>&#x02013;<lpage>81</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jbi.2015.03.010</pub-id><pub-id pub-id-type="pmid">25817970</pub-id></mixed-citation>
</ref>
<ref id="B34">
<label>34.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Basiri</surname> <given-names>ME</given-names></name> <name><surname>Nemati</surname> <given-names>S</given-names></name> <name><surname>Abdar</surname> <given-names>M</given-names></name> <name><surname>Cambria</surname> <given-names>E</given-names></name> <name><surname>Acharrya</surname> <given-names>UR</given-names></name></person-group>. <article-title>ABCDM: an attention-based bidirectional CNN-RNN deep model for sentiment analysis</article-title>. <source>Fut Gener Comput Syst</source>. (<year>2021</year>) <volume>115</volume>:<fpage>279</fpage>&#x02013;<lpage>94</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.future.2020.08.005</pub-id></mixed-citation>
</ref>
<ref id="B35">
<label>35.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tan</surname> <given-names>KL</given-names></name> <name><surname>Lee</surname> <given-names>C</given-names></name> <name><surname>Anbananthen</surname> <given-names>K</given-names></name> <name><surname>Lim</surname> <given-names>K</given-names></name></person-group>. <article-title>RoBERTa-LSTM: a hybrid model for sentiment analysis with transformer and recurrent neural network</article-title>. <source>IEEE Access</source>. (<year>2022</year>) <volume>10</volume>:<fpage>21517</fpage>&#x02013;<lpage>25</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2022.3152828</pub-id></mixed-citation>
</ref>
<ref id="B36">
<label>36.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Prottasha</surname> <given-names>NJ</given-names></name> <name><surname>Sami</surname> <given-names>AA</given-names></name> <name><surname>Kowsher</surname> <given-names>M</given-names></name> <name><surname>Murad</surname> <given-names>SA</given-names></name> <name><surname>Bairagi</surname> <given-names>A</given-names></name> <name><surname>Masud</surname> <given-names>M</given-names></name> <etal/></person-group>. <article-title>Transfer learning for sentiment analysis using BERT based supervised fine-tuning</article-title>. <source>Sensors</source>. (<year>2022</year>) <volume>22</volume>:<fpage>4157</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s22114157</pub-id><pub-id pub-id-type="pmid">35684778</pub-id></mixed-citation>
</ref>
<ref id="B37">
<label>37.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dang</surname> <given-names>NC</given-names></name> <name><surname>Garc&#x000ED;a</surname> <given-names>M</given-names></name> <name><surname>Prieta</surname> <given-names>FDL</given-names></name></person-group>. <article-title>Sentiment analysis based on deep learning: a comparative study</article-title>. <source>Electronics</source>. (<year>2020</year>) <volume>9</volume>:<fpage>483</fpage>. doi: <pub-id pub-id-type="doi">10.3390/electronics9030483</pub-id></mixed-citation>
</ref>
<ref id="B38">
<label>38.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>G</given-names></name> <name><surname>Lin</surname> <given-names>TE</given-names></name> <name><surname>Zhao</surname> <given-names>Y</given-names></name> <name><surname>Lu</surname> <given-names>G</given-names></name> <name><surname>Wu</surname> <given-names>Y</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name></person-group>. <article-title>UniMSE: towards unified multimodal sentiment analysis and emotion recognition</article-title>. In: <source>Conference on Empirical Methods in Natural Language Processing</source>. (<year>2022</year>). doi: <pub-id pub-id-type="doi">10.18653/v1/2022.emnlp-main.534</pub-id></mixed-citation>
</ref>
<ref id="B39">
<label>39.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Onan</surname> <given-names>A</given-names></name></person-group>. <article-title>Sentiment analysis on product reviews based on weighted word embeddings and deep neural networks</article-title>. <source>Concurr Computat</source>. (<year>2020</year>) <volume>33</volume>:<fpage>e5909</fpage>. doi: <pub-id pub-id-type="doi">10.1002/cpe.5909</pub-id></mixed-citation>
</ref>
<ref id="B40">
<label>40.</label>
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Garreau</surname> <given-names>D</given-names></name> <name><surname>Luxburg</surname> <given-names>U</given-names></name></person-group>. <article-title>Explaining the explainer: a first theoretical analysis of LIME</article-title>. In: <source>International Conference on Artificial Intelligence and Statistics</source>. <publisher-loc>PMLR</publisher-loc> (<year>2020</year>). p. <fpage>1287</fpage>&#x02013;<lpage>1296</lpage>.</mixed-citation>
</ref>
<ref id="B41">
<label>41.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mosca</surname> <given-names>E</given-names></name> <name><surname>Szigeti</surname> <given-names>F</given-names></name> <name><surname>Tragianni</surname> <given-names>S</given-names></name> <name><surname>Gallagher</surname> <given-names>D</given-names></name> <name><surname>Groh</surname> <given-names>G</given-names></name></person-group>. <article-title>SHAP-based explanation methods: a review for NLP interpretability</article-title>. In: <source>Proceedings of the 29th International Conference on Computational Linguistics</source> (<year>2022</year>). p. <fpage>4593</fpage>&#x02013;<lpage>4603</lpage>.</mixed-citation>
</ref>
<ref id="B42">
<label>42.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kapishnikov</surname> <given-names>A</given-names></name> <name><surname>Venugopalan</surname> <given-names>S</given-names></name> <name><surname>Avci</surname> <given-names>B</given-names></name> <name><surname>Wedin</surname> <given-names>B</given-names></name> <name><surname>Terry</surname> <given-names>M</given-names></name> <name><surname>Bolukbasi</surname> <given-names>T</given-names></name></person-group>. <article-title>Guided integrated gradients: an adaptive path method for removing noise</article-title>. In: <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source> (<year>2021</year>). p. <fpage>5050</fpage>&#x02013;<lpage>5058</lpage>. doi: <pub-id pub-id-type="doi">10.1109/CVPR46437.2021.00501</pub-id></mixed-citation>
</ref>
<ref id="B43">
<label>43.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xiao</surname> <given-names>T</given-names></name> <name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Zhu</surname> <given-names>J</given-names></name> <name><surname>Yu</surname> <given-names>Z</given-names></name> <name><surname>Liu</surname> <given-names>T</given-names></name></person-group>. <article-title>Sharing attention weights for fast transformer</article-title>. <source>arXiv preprint arXiv:1906.11024</source>. (<year>2019</year>). doi: <pub-id pub-id-type="doi">10.24963/ijcai.2019/735</pub-id></mixed-citation>
</ref>
<ref id="B44">
<label>44.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Scaria</surname> <given-names>K</given-names></name> <name><surname>Gupta</surname> <given-names>H</given-names></name> <name><surname>Goyal</surname> <given-names>S</given-names></name> <name><surname>Sawant</surname> <given-names>SA</given-names></name> <name><surname>Mishra</surname> <given-names>S</given-names></name> <name><surname>Baral</surname> <given-names>C</given-names></name></person-group>. <article-title>Instructabsa: instruction learning for aspect based sentiment analysis</article-title>. <source>arXiv preprint arXiv:2302.08624</source>. (<year>2023</year>). doi: <pub-id pub-id-type="doi">10.18653/v1/2024.naacl-short.63</pub-id></mixed-citation>
</ref>
<ref id="B45">
<label>45.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chi</surname> <given-names>X</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Cheng</surname> <given-names>A</given-names></name> <name><surname>Fang</surname> <given-names>P</given-names></name> <name><surname>Tian</surname> <given-names>Z</given-names></name> <name><surname>He</surname> <given-names>Y</given-names></name> <etal/></person-group>. <article-title>Mmtrail: a multimodal trailer video dataset with language and music descriptions</article-title>. <source>arXiv preprint arXiv:2407.20962</source>. (<year>2024</year>).</mixed-citation>
</ref>
<ref id="B46">
<label>46.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>J</given-names></name> <name><surname>Yoon</surname> <given-names>W</given-names></name> <name><surname>Kim</surname> <given-names>S</given-names></name> <name><surname>Kim</surname> <given-names>D</given-names></name> <name><surname>Kim</surname> <given-names>S</given-names></name> <name><surname>So</surname> <given-names>CH</given-names></name> <etal/></person-group>. <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>. <source>Bioinformatics</source>. (<year>2020</year>) <volume>36</volume>:<fpage>1234</fpage>&#x02013;<lpage>40</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id><pub-id pub-id-type="pmid">31501885</pub-id></mixed-citation>
</ref>
<ref id="B47">
<label>47.</label>
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>R</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name></person-group>. <article-title>YNU-HPCC at SemEval-2024 Task 2: applying DeBERTa-v3-large to safe biomedical natural language inference for clinical trials</article-title>. In: <source>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</source> (<year>2024</year>). p. <fpage>785</fpage>&#x02013;<lpage>791</lpage>. doi: <pub-id pub-id-type="doi">10.18653/v1/2024.semeval-1.112</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1778405/overview">Vimal Shanmuganathan</ext-link>, Kalaignarkarunanidhi Institute of Technology (KIT), India</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1068857/overview">Xianzhi Wang</ext-link>, University of Technology Sydney, Australia</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3109090/overview">Truong-Son Hy</ext-link>, University of Alabama at Birmingham, United States</p>
</fn>
</fn-group>
</back>
</article>