<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Immunol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Immunology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Immunol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-3224</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fimmu.2025.1613479</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Enhancing Named Entity Recognition for immunology and immune-mediated disorders</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Chen</surname><given-names>Songyue</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Che</surname><given-names>Jinshan</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3039110/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Sun</surname><given-names>Mingming</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Yuhong</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Rheumatology and Immunology, First Affiliated Hospital of Bengbu Medical University</institution>, <city>Bengbu</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Fourth Clinical College of Xinxiang Medical College, Xinxiang Central Hospital</institution>, <city>Xinxiang</city>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Songyue Chen, <email xlink:href="mailto:nanovcarron@hotmail.com">nanovcarron@hotmail.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-04">
<day>04</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1613479</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>02</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Chen, Che, Sun and Wang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Chen, Che, Sun and Wang</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-04">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Named Entity Recognition (NER) in the biomedical domain, particularly within immunology and immune-mediated disorders, presents unique challenges due to the presence of complex, nested, and overlapping entities. Existing NER systems often struggle with the specialized terminologies and contextual ambiguity of immunological texts, which limits their effectiveness in downstream biomedical applications.</p>
</sec>
<sec>
<title>Methods</title>
<p>To address these challenges, we propose a domain-specific NERframework that integrates structured span encoding and knowledge-guided decoding. The framework is designed to enhance recognition accuracy under low-resource and weak supervision conditions by combining a hierarchical span encoder (SpanStructEncoder) with a constraint-based decoding strategy (Contextual Constraint Decoding, CCD). We evaluate our model on three immunology-specific datasets: the NCBI Disease Corpus (immune-related diseases), SNPPhenA (genetic variants and phenotype associations), and HLA-SPREAD (HLA-disease and drug-response relations). These datasets were selected because they represent key immunological concepts such as cytokines, immune cell types, and genetic markers that underlie immune responses and disease mechanisms.</p>
</sec>
<sec>
<title>Results and discussion</title>
<p>Experimental results demonstrate that our model achieves consistent improvements in F1-score over strong biomedical baselines including BioGPT, BioLinkBERT, and SciFive. Our results confirm that incorporating structured span representations and ontology-aware decoding significantly improves entity extraction for immunology-related texts. The proposed framework provides a robust and interpretable solution for immunology-focused biomedical text mining, facilitating applications in literature curation, biomarker discovery, and clinical decision support.</p>
</sec>
</abstract>
<kwd-group>
<kwd>named entity recognition</kwd>
<kwd>biomedical NLP</kwd>
<kwd>immunology</kwd>
<kwd>structural span encoding</kwd>
<kwd>constraint-based decoding</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="4"/>
<equation-count count="35"/>
<ref-count count="36"/>
<page-count count="14"/>
<word-count count="8390"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Systems Immunology</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Named Entity Recognition (NER) is an essential task in the biomedical domain, especially in immunology and immune-mediated disorders, due to the rapid growth of scientific literature and clinical data. Identifying entities like immune cell types, cytokines, disease names, biomarkers, and therapeutic agents is crucial for knowledge extraction, literature mining, and clinical decision-making Li et&#xa0;al. (<xref ref-type="bibr" rid="B1">1</xref>). However, general-purpose NER models often fail to capture the specialized terms and relationships found in immunological texts, leading to incomplete or inaccurate annotations Sahin et&#xa0;al. (<xref ref-type="bibr" rid="B2">2</xref>). This gap highlights the need for NER systems tailored specifically to the immunology domain Jian et&#xa0;al. (<xref ref-type="bibr" rid="B3">3</xref>). These systems not only improve biomedical knowledge curation but also support the integration of multi-omics data and assist in the discovery of new insights into immune-mediated diseases Mi and Yi (<xref ref-type="bibr" rid="B4">4</xref>). Furthermore, as personalized medicine becomes more important in treating autoimmune diseases, allergies, and cancer immunotherapy, accurate entity extraction is crucial for creating patient profiles and enabling targeted treatments Weber et&#xa0;al. (<xref ref-type="bibr" rid="B5">5</xref>).</p>
<p>Early approaches to immunology-specific NER relied on structured linguistic patterns and terminological resources Hernandez-Lareda and Auccahuasi (<xref ref-type="bibr" rid="B6">6</xref>). These systems used curated dictionaries, synonym lists, and pattern-based rules to detect relevant biomedical terms in predefined contexts Khouya et&#xa0;al. (<xref ref-type="bibr" rid="B7">7</xref>). While these methods were precise and interpretable by domain experts, they required significant manual effort and struggled with new or unseen terms Bade et&#xa0;al. (<xref ref-type="bibr" rid="B8">8</xref>). Their inability to adapt to evolving language and capture nested structures limited their scalability in broader immunological datasets Yossy et&#xa0;al. (<xref ref-type="bibr" rid="B9">9</xref>). These limitations led to a shift toward more adaptive approaches Zhang et&#xa0;al. (<xref ref-type="bibr" rid="B10">10</xref>).</p>
<p>Later developments introduced probabilistic modeling techniques to increase flexibility and learning capacity in biomedical text processing Ushio and Camacho-Collados (<xref ref-type="bibr" rid="B11">11</xref>). Methods like Conditional Random Fields (CRF), Support Vector Machines (SVM), and Hidden Markov Models (HMM) were widely adopted, as they could learn patterns from annotated data and generalize to related contexts Ray et&#xa0;al. (<xref ref-type="bibr" rid="B12">12</xref>). These models captured dependencies among tokens and incorporated various syntactic and semantic features, improving recall and consistency across immunology subdomains Chen et&#xa0;al. (<xref ref-type="bibr" rid="B13">13</xref>). However, their reliance on high-quality labeled data and the need for extensive feature engineering limited their scalability Au et&#xa0;al. (<xref ref-type="bibr" rid="B14">14</xref>). As biomedical literature grew in volume and complexity, models capable of capturing deeper semantic relationships became essential Yu et&#xa0;al. (<xref ref-type="bibr" rid="B15">15</xref>).</p>
<p>Recent advancements have introduced deep learning architectures and pretrained models specifically fine-tuned for biomedical applications Li and Meng (<xref ref-type="bibr" rid="B16">16</xref>). Architectures like BiLSTM-CRF combinations enabled automatic feature learning from sequences, significantly reducing the need for handcrafted features. Models like BioBERT, SciBERT, and PubMedBERT&#x2014;pretrained on vast biomedical corpora&#x2014;have further improved performance by capturing domain-specific semantics and contextual nuances Zhang et&#xa0;al. (<xref ref-type="bibr" rid="B17">17</xref>). These models have proven effective in recognizing complex biomedical entities, but challenges remain in entity disambiguation, low-resource settings, and model generalization Taher et&#xa0;al. (<xref ref-type="bibr" rid="B18">18</xref>). Moreover, pretrained models may not always be well-suited to the terminological complexity of immunology, limiting their clinical utility in this specialized field.</p>
<p>To address the limitations of symbolic, machine learning, and general pretrained models, we propose a domain-adaptive NER framework specifically designed for immunology and immune-mediated disorders. Our approach integrates domain-specific knowledge with state-of-the-art language models, effectively leveraging both contextual and ontological information. By incorporating immunology-focused vocabularies and curating a specialized annotated corpus, the proposed method tackles the lack of granularity and contextual accuracy seen in existing systems. This framework also emphasizes adaptability to emerging terms and scalability to new subdomains in immune research. By combining expert-validated labels with deep contextual embeddings, it improves both precision and recall while enhancing model interpretability, which is crucial for clinical applications. Thus, our method addresses a critical gap in biomedical NLP by providing an entity recognition system tailored to the evolving landscape of immunology and immune-mediated disorders.</p>
<p>The proposed method has several key advantages:</p>
<list list-type="bullet">
<list-item>
<p>We introduce a novel domain-specific fine-tuning pipeline combining BioBERT with immune-specific lexical constraints, improving recognition of rare or nested entities.</p></list-item>
<list-item>
<p>Our method achieves cross-domain adaptability and high efficiency across multiple tasks including literature mining, patient stratification, and biomarker discovery.</p></list-item>
<list-item>
<p>Experimental results on benchmark and curated datasets demonstrate a 12% F1-score improvement over existing baselines, especially in identifying complex immune-related terms.</p></list-item>
</list>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec id="s2_1">
<label>2.1</label>
<title>Biomedical NER model advances</title>
<p>Advancements in biomedical NER models have been driven by deep learning architectures and pre-trained language models. Traditional approaches, such as rule-based and dictionary-based systems, often failed to generalize across different contexts and terminologies, particularly in highly specialized subfields like immunology Zheng et&#xa0;al. (<xref ref-type="bibr" rid="B19">19</xref>). The advent of machine learning-based methods, particularly those using Conditional Random Fields (CRFs), marked an improvement by introducing data-driven pattern recognition capabilities Shen et&#xa0;al. (<xref ref-type="bibr" rid="B20">20</xref>). However, these methods still required substantial feature engineering and struggled with domain-specific vocabulary. Recent innovations have been spearheaded by deep learning, particularly models utilizing Bidirectional Long Short-Term Memory networks (BiLSTMs) and more recently, transformers as BERT Hu et&#xa0;al. (<xref ref-type="bibr" rid="B21">21</xref>). Domain-specific variants of BERT, such as BioBERT, SciBERT, and PubMedBERT, have shown remarkable performance improvements on biomedical NER tasks due to their pretraining on large-scale biomedical corpora. These models better capture the syntactic and semantic nuances of medical terminology, enabling improved entity boundary detection and classification Jarrar et&#xa0;al. (<xref ref-type="bibr" rid="B22">22</xref>). For immunology, where entities such as cytokines, immune cells, and genetic markers have complex naming conventions, such pretrained models reduce reliance on annotated corpora and enhance generalizability. Another line of work has focused on multitask learning and transfer learning. These strategies aim to leverage knowledge from related tasks or domains to improve performance on low-resource tasks, which is particularly beneficial for immunology, where annotated data may be sparse Zhou et&#xa0;al. (<xref ref-type="bibr" rid="B23">23</xref>). Incorporating auxiliary tasks such as part-of-speech tagging or syntactic parsing has been shown to improve entity recognition by enforcing syntactic coherence. Challenges remain, particularly regarding entity normalization and disambiguation Zaratiana et&#xa0;al. (<xref ref-type="bibr" rid="B24">24</xref>). Immunology includes a high degree of synonymy and polysemy, such as interleukin-2 being referred to as IL-2 or simply IL. Deep models integrated with external knowledge bases, such as the Unified Medical Language System (UMLS) or MeSH, have been proposed to address this. Incorporating graph-based approaches, such as graph neural networks (GNNs), into NER pipelines further allows the modeling of relationships between entities, which is crucial for capturing immunological interactions Ding et&#xa0;al. (<xref ref-type="bibr" rid="B25">25</xref>).</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Entity normalization and linking techniques</title>
<p>Entity normalization plays a pivotal role in transforming recognized named entities into standardized concepts within knowledge bases. This step is crucial for downstream biomedical applications, such as literature-based discovery and knowledge graph construction Shen et&#xa0;al. (<xref ref-type="bibr" rid="B26">26</xref>). The task is especially challenging in immunology due to diverse and evolving terminologies, frequent abbreviations, and context-dependent meanings of many entities Durango et&#xa0;al. (<xref ref-type="bibr" rid="B27">27</xref>). Approaches to normalization have evolved from string-matching and heuristic-based methods to more sophisticated machine learning and neural approaches Chen et&#xa0;al. (<xref ref-type="bibr" rid="B28">28</xref>). Early systems such as MetaMap or cTAKES utilized hand-crafted rules and dictionary lookups against ontologies like UMLS. While useful, these methods often failed in the presence of novel terms or ambiguous abbreviations, which are common in immunological literature Qu et&#xa0;al. (<xref ref-type="bibr" rid="B29">29</xref>). Modern normalization methods increasingly employ neural models that consider the contextual embeddings of both the detected mention and potential candidates from the knowledge base Jarrar et&#xa0;al. (<xref ref-type="bibr" rid="B30">30</xref>). Siamese networks and BERT-based dual encoders have been utilized to compute semantic similarity between mention-context pairs and canonical concepts. These models are trained on large-scale annotated datasets, which may include manually curated mappings or automatically derived weak supervision signals Darji et&#xa0;al. (<xref ref-type="bibr" rid="B31">31</xref>). Disambiguation is particularly pertinent in immunology, where entities like T cell may refer to a general immune cell type or a specific subtype with unique functional roles. Techniques that incorporate surrounding textual context, such as attention mechanisms or contextualized embeddings from large language models, provide improved disambiguation by capturing local semantic cues. Joint models that perform NER and normalization simultaneously have shown promise. By aligning the two tasks during training, these models can leverage inter-task dependencies, improving performance on both fronts. For instance, a model that knows IL-10 is a cytokine can use this information to better disambiguate it among other IL entities. To textual signals, leveraging structured data from biomedical ontologies has become a key strategy. Integrating ontology-aware representations or using graph-based learning frameworks allows systems to capture the hierarchical and relational structure of domain knowledge. In immunology, this includes linking to concepts in resources like the ImmPort database, which offers detailed descriptions of immune-related genes, proteins, and pathways.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Domain-specific challenges in immunology</title>
<p>The application of NER to immunology and immune-mediated disorders introduces a range of domain-specific challenges that necessitate tailored solutions Varad&#xe9; et&#xa0;al. (<xref ref-type="bibr" rid="B32">32</xref>). One of the primary obstacles is the highly specialized and rapidly evolving terminology. Immunological literature frequently introduces novel biomarkers, therapeutic targets, and molecular pathways, often with inconsistent naming conventions or shorthand notations Cui et&#xa0;al. (<xref ref-type="bibr" rid="B33">33</xref>). This leads to difficulties in both entity recognition and normalization. Entities in immunology are often nested or compositional, such as CD4+ T-helper cells, which encapsulate multiple layers of semantic information including cell type, surface marker, and functional role. Detecting and correctly categorizing such nested entities requires models capable of hierarchical understanding, which is not well-supported by traditional flat NER architectures Mi and Yi (<xref ref-type="bibr" rid="B4">4</xref>). Recent work has explored layered tagging schemes or span-based classification methods to address this, allowing models to represent overlapping and nested structures effectively. Furthermore, immune-mediated disorders encompass a wide array of diseases ranging from autoimmune conditions like lupus to inflammatory diseases such as Crohn&#x2019;s disease, each with unique vocabularies and clinical descriptors. This diversity complicates the construction of comprehensive annotated corpora, which are essential for supervised learning methods Weber et&#xa0;al. (<xref ref-type="bibr" rid="B5">5</xref>). To mitigate this, few-shot and zero-shot learning approaches, often based on large pre-trained models like GPT or T5, have been explored to generalize across underrepresented conditions with limited annotations. Another challenge lies in the ambiguity and contextual variability of terms Hernandez-Lareda and Auccahuasi (<xref ref-type="bibr" rid="B6">6</xref>). For instance, the term interleukin may reference a family of proteins or a specific molecular subtype, depending on the context. Disambiguating such terms often requires domain-specific context and may benefit from integrating structured knowledge sources and ontologies. Recent efforts have focused on integrating multimodal data sources such as clinical notes, lab reports, and biomedical literature to enhance model robustness and context awareness. For instance, fusing text with gene expression profiles or protein interaction networks can provide richer representations that help in identifying and classifying immune-related entities with greater accuracy.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Method</title>
<sec id="s3_1">
<label>3.1</label>
<title>Overview</title>
<p>NLP serves as a critical component for a variety of downstream applications including information retrieval, question answering, knowledge base construction, and automated content analysis. Despite decades of progress, achieving high performance on entity extraction in open-domain or domain-adaptive scenarios remains a non-trivial challenge, particularly due to the presence of ambiguous expressions, limited context, and evolving entity ontologies.</p>
<p>This section introduces the methodological structure of our proposed approach to entity extraction. The overall goal of our method is to construct a framework that not only captures the surface form of entity mentions but also effectively encodes their contextual semantics and structural dependencies. In Section 3.2, we provide the formalization of the entity extraction task within a probabilistic modeling framework. We define the input-output structure of the problem, introduce the notational conventions used throughout the paper, and clarify the assumptions regarding label space, token segmentation, and structural annotations. Particular attention is given to the representation of token-level predictions and their alignment with annotated spans. This section lays the mathematical groundwork necessary to understand how our method generalizes beyond conventional sequence labeling formulations. The following Section 3.3 presents the central component of our method, a contextualized representation module we refer to as SpanStructEncoder. Unlike standard sequence encoders that focus solely on individual token embeddings, SpanStructEncoder is designed to jointly embed token-level and span-level semantics while capturing structural correlations between overlapping and nested mentions. By employing multi-layered attention aggregation and hierarchical span filtering mechanisms, this module allows for efficient inference over variable-length entity candidates. The construction of the representation is coupled with a global span scoring function that considers both local lexical clues and contextual entity cues. In Section 3.4, we turn to the strategic dimension of our methodology. We propose a flexible knowledge-aware decoding scheme named Contextual Constraint Decoding (CCD). The aim of this strategy is to guide the prediction process using syntactic, semantic, and external knowledge-based constraints. CCD introduces a dynamic constraint propagation graph over candidate spans, enabling the model to suppress contradictory or overlapping predictions that violate linguistic priors or external schema consistency. This decoding strategy also supports partial supervision and cross-document entity consistency, thus broadening the applicability of the model to weakly-supervised or distantly-supervised environments. To avoid ambiguity regarding the input modality and task setting, we clarify that: All inputs to the proposed framework are exclusively biomedical text sequences derived from scientific literature or curated biomedical corpora. The model is designed specifically for named entity recognition (NER) tasks in the biomedical and immunology domains, where the input consists of tokenized sentence-level text without any visual or sensor-derived modalities. Contrary to prior versions or general-purpose architectures that included multimodal components such as image or LiDAR processing, the current version is strictly unimodal and text-centric. No voxelization, feature fusion, or spatial embedding mechanisms are involved in this pipeline. This text-only focus ensures conceptual consistency across all components, from input encoding through to constraint-aware decoding, and aligns directly with the use of biomedical ontologies for guided prediction.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Preliminaries</title>
<p>Let <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:mi mathvariant="script">D</mml:mi><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>N</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula> be a corpus of <inline-formula>
<mml:math display="inline" id="im2"><mml:mi>N</mml:mi></mml:math></inline-formula> textual documents, where each document <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mn>1</mml:mn><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mn>2</mml:mn><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> consists of <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> tokens drawn from a finite vocabulary <inline-formula>
<mml:math display="inline" id="im5"><mml:mi mathvariant="script">V</mml:mi></mml:math></inline-formula>.</p>
<p>Formally, the extraction task can be seen as learning a mapping <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mi>f</mml:mi><mml:mo>:</mml:mo><mml:mi mathvariant="script">X</mml:mi><mml:mo>&#x2192;</mml:mo><mml:mi mathvariant="script">P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">Y</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi mathvariant="script">I</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im6"><mml:mi mathvariant="script">X</mml:mi></mml:math></inline-formula> denotes the space of token sequences, <inline-formula>
<mml:math display="inline" id="im7"><mml:mi mathvariant="script">I</mml:mi></mml:math></inline-formula> denotes the set of index spans <inline-formula>
<mml:math display="inline" id="im8"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im9"><mml:mi mathvariant="script">P</mml:mi></mml:math></inline-formula> denotes the power set. For each input <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:mi>x</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">X</mml:mi></mml:mrow></mml:math></inline-formula>, the function <inline-formula>
<mml:math display="inline" id="im11"><mml:mi>f</mml:mi></mml:math></inline-formula> predicts a set of labeled spans.</p>
<p>We denote by <inline-formula>
<mml:math display="inline" id="im12"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> the contextual representation of token <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> obtained from a base encoder. Let <inline-formula>
<mml:math display="inline" id="im14"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>:</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3d5;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> be a function that aggregates token representations over a span. A standard choice is (<xref ref-type="disp-formula" rid="eq2">Equation 2</xref>):</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>:</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>Concat</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mtext>mean</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mi>s</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mi>s</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where Concat (&#xb7;) denotes vector concatenation.</p>
<p>Each candidate span is then scored by a classification function (<xref ref-type="disp-formula" rid="eq3">Equation 3</xref>):</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mi>&#xa0;</mml:mi><mml:mo>.</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>exp</mml:mi><mml:mo>&#xa0;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>&#x3b8;</mml:mi><mml:mi>y</mml:mi><mml:mo>&#x22a4;</mml:mo></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>:</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mi>y</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">Y</mml:mi><mml:mo>&#x222a;</mml:mo><mml:mo>{</mml:mo><mml:mtext>None</mml:mtext><mml:mo>}</mml:mo></mml:mrow></mml:msub><mml:mi>exp</mml:mi><mml:mo>&#xa0;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>&#x3b8;</mml:mi><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#x22a4;</mml:mo></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>:</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x3b8;<sub>y</sub></italic> and <italic>b<sub>y</sub></italic> are class-specific parameters. A span is predicted as an entity mention if (<xref ref-type="disp-formula" rid="eq4">Equation 4</xref>):</p>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:mi>arg</mml:mi><mml:mi>&#xa0;</mml:mi><mml:munder><mml:mrow><mml:mi>max</mml:mi></mml:mrow><mml:mrow><mml:mi>y</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">Y</mml:mi><mml:mo>&#x222a;</mml:mo><mml:mo>{</mml:mo><mml:mtext>None</mml:mtext><mml:mo>}</mml:mo></mml:mrow></mml:munder><mml:mi>P</mml:mi><mml:mo>(</mml:mo><mml:mi>y</mml:mi><mml:mo>&#xa0;</mml:mo><mml:mo>|</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2260;</mml:mo><mml:mtext>None</mml:mtext><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Unlike traditional sequence labeling approaches that rely on token-level tagging schemes such as BIOES (Begin-Inside-Outside-End-Single), our framework adopts a span-based annotation strategy. In this formulation, each candidate entity mention is defined by its start and end positions along with a corresponding entity type. This span-centric design aligns closely with the characteristics of biomedical texts, particularly in immunology, where nested and overlapping entities are common. BIOES tagging often struggles to represent such structural complexity, especially when entities partially or fully overlap, as is frequent with multi-token biomedical terms (CD4+ T cell activation and T cell).</p>
<p>In terms of computational complexity, span enumeration for a sequence of length <italic>T</italic> with maximum span length <italic>L</italic><sub>max</sub> results in &#x1d4aa;(<italic>T</italic> &#xd7; <italic>L</italic><sub>max</sub>) candidate spans. While this quadratic growth may appear prohibitive, we implement an efficient span pruning mechanism that eliminates low-probability candidates based on initial encoder logits and ontological compatibility constraints. The decoding process, powered by our Contextual Constraint Decoding (CCD) module, performs graph-based selection and filtering in approximately linear time with respect to the number of high-confidence spans, which are significantly fewer than the theoretical maximum. On practical hardware (NVIDIA A100), our model processes batches of 512-token documents in under 120 ms per training iteration and completes inference in under 15 ms per document on average. This balance between flexibility and computational efficiency allows our method to remain scalable in real-world biomedical applications, even under dense annotation conditions or weak supervision scenarios.</p>
<p>To formulate span enumeration, we define the set of all possible spans in document <italic>x</italic> up to a maximum length <italic>L</italic><sub>max</sub> (<xref ref-type="disp-formula" rid="eq5">Equation 5</xref>):</p>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:mi mathvariant="script">S</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mo>|</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>s</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>T</mml:mi><mml:mo>,</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>s</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow></mml:msub><mml:mo>}</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>The total number of candidate spans is <italic>O</italic>(<italic>T</italic> &#xb7; <italic>L</italic><sub>max</sub>).</p>
<p>Let <inline-formula>
<mml:math display="inline" id="im15"><mml:mrow><mml:msub><mml:mi>e</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> be a labeled entity. The full prediction of a model for document <italic>x</italic> is a set (<xref ref-type="disp-formula" rid="eq6">Equation 6</xref>):</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="script">E</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">S</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xd7;</mml:mo><mml:mi mathvariant="script">Y</mml:mi><mml:mo>&#xa0;</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mi>arg</mml:mi><mml:mi>&#xa0;</mml:mi><mml:munder><mml:mrow><mml:mi>max</mml:mi></mml:mrow><mml:mrow><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:munder><mml:mi>P</mml:mi><mml:mo>(</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#xa0;</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>y</mml:mi><mml:mo>}</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>In order to capture higher-order dependencies between overlapping spans, we define a global scoring function <inline-formula>
<mml:math display="inline" id="im16"><mml:mrow><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> over sets of candidate spans (<xref ref-type="disp-formula" rid="eq7">Equation 7</xref>):</p>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">E</mml:mi></mml:mrow></mml:munder><mml:mi>log</mml:mi><mml:mi>&#xa0;</mml:mi><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mi>&#xa0;</mml:mi><mml:mo>.</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>&#x3bb;</mml:mi><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2260;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2229;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2260;</mml:mo><mml:mo>&#x2205;</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:munder><mml:mtext>&#x3a9;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where &#x2126; is a penalty term encoding conflict between overlapping spans, and <inline-formula>
<mml:math display="inline" id="im17"><mml:mrow><mml:mi>&#x3bb;</mml:mi><mml:mo>&gt;</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula> controls the regularization strength.</p>
<p>We further introduce a compatibility function <inline-formula>
<mml:math display="inline" id="im18"><mml:mrow><mml:mtext>&#x3a8;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> between labels that governs allowable co-occurrence patterns of neighboring or overlapping entities (<xref ref-type="disp-formula" rid="eq8">Equation 8</xref>):</p>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:mtext>&#x3a8;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mtable columnalign="left" equalrows="true" equalcolumns="true"><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mtext>if&#x2004;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">C</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>&#x221e;</mml:mi></mml:mrow></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mtext>otherwise</mml:mtext><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im19"><mml:mi mathvariant="script">C</mml:mi></mml:math></inline-formula> &#x2286; <inline-formula>
<mml:math display="inline" id="im20"><mml:mi mathvariant="script">Y</mml:mi></mml:math></inline-formula> &#xd7; <inline-formula>
<mml:math display="inline" id="im21"><mml:mi mathvariant="script">Y</mml:mi></mml:math></inline-formula> is the set of admissible label transitions.</p>
<p>The goal is to compute the most probable, compatible subset of spans (<xref ref-type="disp-formula" rid="eq9">Equation 9</xref>):</p>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:msup><mml:mover accent="true"><mml:mi mathvariant="script">E</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo></mml:msup><mml:mi>arg</mml:mi><mml:mi>&#xa0;</mml:mi><mml:munder><mml:mrow><mml:mi>max</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="script">E</mml:mi><mml:mo>&#x2286;</mml:mo><mml:mi mathvariant="script">S</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:munder><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mtext>&#x2003;s</mml:mtext><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:mo>&#x2200;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo>,</mml:mo><mml:mtext>&#xa0;&#x3a8;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&gt;</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>&#x221e;</mml:mi><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>To reason about nested entities, we define a span hierarchy &#x210b; such that (<xref ref-type="disp-formula" rid="eq10">Equation 10</xref>):</p>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x227a;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mtext>&#x2004;</mml:mtext><mml:mo>&#x21d4;</mml:mo><mml:mtext>&#x2004;</mml:mtext><mml:mi>s</mml:mi><mml:mo>&#x2264;</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#x2264;</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#x2264;</mml:mo><mml:mi>t</mml:mi><mml:mtext>&#x2004;and&#x2004;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2260;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>and impose a structural prior that encourages consistency between nested spans. We define (<xref ref-type="disp-formula" rid="eq11">Equation 11</xref>):</p>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mtext>nest</mml:mtext></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">E</mml:mi></mml:mrow></mml:munder><mml:mrow><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">E</mml:mi></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x227a;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:munder><mml:mrow><mml:mi>&#x3b4;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi>&#x3b3;</mml:mi><mml:mrow><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im22"><mml:mi>&#x3b4;</mml:mi></mml:math></inline-formula> is the Kronecker delta, and <inline-formula>
<mml:math display="inline" id="im23"><mml:mrow><mml:msub><mml:mi>&#x3b3;</mml:mi><mml:mrow><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> is a type-dependent consistency prior.</p>
<p>Another dimension of our formulation involves the modeling of cross-token dependencies via adjacency aware similarity functions. For any two tokens <italic>i</italic> and <italic>j</italic>, define (<xref ref-type="disp-formula" rid="eq12">Equation 12</xref>):</p>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>t</mml:mi><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mo>(</mml:mo><mml:msubsup><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x22a4;</mml:mo></mml:msubsup><mml:mi>W</mml:mi><mml:msub><mml:mi>h</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x3c3;</italic> is a sigmoid function and <italic>W</italic> is a learned bilinear interaction matrix. This is used to construct a token similarity graph <italic>G</italic> = (<italic>V, E</italic>), where <italic>V</italic> = {1<italic>,&#x2026;,T</italic>} and edges are defined by a threshold over Sim(<italic>i, j</italic>).</p>
<p>The graph structure is incorporated into span encoding via a graph-augmented attention mechanism (<xref ref-type="disp-formula" rid="eq13">Equation 13</xref>):</p>
<disp-formula id="eq13"><label>(13)</label>
<mml:math display="block" id="M13"><mml:mrow><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>:</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi>E</mml:mi></mml:mrow></mml:munder><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>h</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext>exp</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mtext>Sim</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>:</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi>E</mml:mi></mml:mrow></mml:msub><mml:mtext>exp</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>SpanStructEncoder</title>
<p>We propose a novel architectural component, SpanStructEncoder, designed to encode and score textual spans for entity extraction in immunology-related texts. Unlike conventional sequence labeling architectures, SpanStructEncoder jointly models hierarchical span semantics and structural dependencies to better capture nested and overlapping entities (As shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Schematic diagram of SpanStructEncoder. The figure presents the full pipeline of SpanStructEncoder which integrates multi-channel span aggregation for capturing both boundary-sensitive and internal semantic features structural attention graph modeling to encode span-level interactions and type-aware coherence regularization that aligns span embeddings according to semantic type prototypes under weak supervision settings Each component contributes to constructing robust and discriminative span representations suitable for biomedical named entity recognition tasks with nested and overlapping structures.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-16-1613479-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating a multi-step process for span aggregation and scoring in text processing. Tokens are processed to enumerate candidate spans, undergoing multi-channel span aggregation with concatenation, convolution, and pooling. A structural attention graph processes genes, diseases, and chemicals, while type-aware coherence regularization applies contrastive loss. Key terms include Conv1d for convolution and span-span relations.</alt-text>
</graphic></fig>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Multi-Channel Span Aggregation</title>
<p>To generate expressive span representations for Named Entity Recognition (NER), we adopt a multi-channel aggregation strategy that integrates both boundary and internal span information (As shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>).</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Schematic diagram of multi-channel span aggregation. The figure illustrates how the model integrates span-level semantic features from multiple attention heads by computing query key and value projections over the input and aggregating their outputs through a dedicated multi-channel span aggregation module The resulting representations are concatenated and passed through a linear projection to form a unified span embedding capturing both contextual and boundary-level cues across varying receptive fields.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-16-1613479-g002.tif">
<alt-text content-type="machine-generated">Diagram of a multi-head attention mechanism. Span embeddings are inputted into a linear projection, leading to three heads labeled Head 1, Head 2, and Head 3. Each head outputs with R values of 3, 2, and 1, respectively. All outputs combine into a single output leading to spans.</alt-text>
</graphic></fig>
<p>Let <inline-formula>
<mml:math display="inline" id="im24"><mml:mrow><mml:mi>x</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>T</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> be an input token sequence and <inline-formula>
<mml:math display="inline" id="im25"><mml:mrow><mml:mi mathvariant="script">S</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mo>|</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>s</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>T</mml:mi><mml:mo>,</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>s</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow></mml:msub><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula> denote the set of candidate spans, where <inline-formula>
<mml:math display="inline" id="im26"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> controls maximum span width. Each span <inline-formula>
<mml:math display="inline" id="im27"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> is represented as a fixed-dimensional embedding vector <inline-formula>
<mml:math display="inline" id="im28"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> that combines lexical, contextual, and positional features. Token-level contextual embeddings <inline-formula>
<mml:math display="inline" id="im29"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> are first obtained using a pre-trained transformer-based encoder (<xref ref-type="disp-formula" rid="eq14">Equation 14</xref>):</p>
<disp-formula id="eq14"><label>(14)</label>
<mml:math display="block" id="M14"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mtext>Encoder</mml:mtext><mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>T</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>t</mml:mi></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>We construct <inline-formula>
<mml:math display="inline" id="im30"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> by aggregating information from multiple channels: the start token embedding <inline-formula>
<mml:math display="inline" id="im31"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, the end token embedding <inline-formula>
<mml:math display="inline" id="im32"><mml:mrow><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, the mean of the embeddings over the span, the element-wise max over the span, and a positional encoding that encodes span width. Formally, the span embedding is given by <xref ref-type="disp-formula" rid="eq15">Equation 15</xref>:</p>
<disp-formula id="eq15"><label>(15)</label>
<mml:math display="block" id="M15"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>Concat</mml:mtext><mml:mi>&#xa0;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mtext>mean</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mi>s</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mi>s</mml:mi></mml:mrow><mml:mi>t</mml:mi></mml:msubsup><mml:msub><mml:mi>h</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x3c8;</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im33"><mml:mrow><mml:msub><mml:mi>&#x3c8;</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> is a learned embedding that captures positional priors based on span length <inline-formula>
<mml:math display="inline" id="im34"><mml:mrow><mml:mi>&#x2113;</mml:mi><mml:mo>=</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>s</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula>, computed as <xref ref-type="disp-formula" rid="eq16">Equation 16</xref>:</p>
<disp-formula id="eq16"><label>(16)</label>
<mml:math display="block" id="M16"><mml:mrow><mml:msub><mml:mi>&#x3c8;</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mtext>len</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">[</mml:mo><mml:mi>&#x2113;</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>len</mml:mtext></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>To further enhance expressiveness, we introduce a gating mechanism that adaptively weights different channels based on their relevance. Let <italic>g<sub>s,t</sub></italic>= <italic>&#x3c3;</italic>(<italic>W<sub>g</sub>r<sub>s,t</sub></italic>+ <italic>b<sub>g</sub></italic>) be a learned gate vector, where <italic>&#x3c3;</italic>(&#xb7;) denotes the sigmoid activation, and the final span representation becomes <xref ref-type="disp-formula" rid="eq17">Equation 17</xref>:</p>
<disp-formula id="eq17"><label>(17)</label>
<mml:math display="block" id="M17"><mml:mrow><mml:msubsup><mml:mi>r</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mtext>final</mml:mtext></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>with &#x2299; denoting element-wise multiplication. This gated formulation allows the model to suppress noisy signals and prioritize the most informative components. By combining multiple lexical aggregators and position-aware embeddings, this approach effectively encodes both boundary-sensitive and content-sensitive features, which is critical for detecting variable-length biomedical entities with ambiguous or discontinuous mentions. Moreover, this structure lays the groundwork for subsequent span-level interactions by ensuring that each span is represented with high semantic granularity and structural awareness.</p>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Structural attention graph</title>
<p>To effectively capture the rich structural interactions among overlapping and nested entity spans, we construct a structural attention graph <italic>G</italic> = (<italic>V,E</italic>), where each node represents a candidate span (<italic>s, t</italic>) &#x2208; <inline-formula>
<mml:math display="inline" id="im35"><mml:mi mathvariant="script">S</mml:mi></mml:math></inline-formula>(<italic>x</italic>) and edges connect spans based on predefined structural relations such as inclusion (one span nested in another), overlap, or adjacency. These relations are critical in biomedical texts where entities frequently appear in nested forms or share overlapping tokens. For each node <italic>i</italic>, we define a neighborhood <italic>N</italic>(<italic>i</italic>) comprising spans with structural relevance to <italic>i</italic>. To model the influence of neighboring spans, we apply a graph-based attention mechanism that computes a weighted structural context vector <italic>c<sub>i</sub></italic> for each span <italic>i</italic> (<xref ref-type="disp-formula" rid="eq18">Equation 18</xref>):</p>
<disp-formula id="eq18"><label>(18)</label>
<mml:math display="block" id="M18"><mml:mrow><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>N</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:munder><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where the attention weight <italic>&#x3b1;<sub>ij</sub></italic> measures the compatibility between span <italic>i</italic> and neighbor <italic>j</italic> and is defined as <xref ref-type="disp-formula" rid="eq19">Equation 19</xref>:</p>
<disp-formula id="eq19"><label>(19)</label>
<mml:math display="block" id="M19"><mml:mrow><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext>exp</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x22a4;</mml:mo></mml:msubsup><mml:msub><mml:mi>W</mml:mi><mml:mi>a</mml:mi></mml:msub><mml:msub><mml:mi>r</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>N</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msub><mml:mtext>exp</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mo>&#x22a4;</mml:mo></mml:msubsup><mml:msub><mml:mi>W</mml:mi><mml:mi>a</mml:mi></mml:msub><mml:msub><mml:mi>r</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>with <inline-formula>
<mml:math display="inline" id="im36"><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mi>a</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>r</mml:mi></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> being a trainable bilinear projection. This allows the model to dynamically weigh neighbor contributions based on semantic similarity and structural roles. The resulting context vector is passed through a feed-forward network and combined with the original span embedding via residual connection and normalization to produce the enhanced embedding (<xref ref-type="disp-formula" rid="eq20">Equation 20</xref>):</p>
<disp-formula id="eq20"><label>(20)</label>
<mml:math display="block" id="M20"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mtext>LayerNorm</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mtext>FFN</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>c</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>To further strengthen span interactions, we integrate a span-level self-attention mechanism using a Transformer encoder that operates on the full set of span embeddings <italic>R</italic> = {<italic>r<sub>i</sub></italic>}. Attention scores are computed as <xref ref-type="disp-formula" rid="eq21">Equation 21</xref>:</p>
<disp-formula id="eq21"><label>(21)</label>
<mml:math display="block" id="M21"><mml:mrow><mml:mtext>Attn</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi>W</mml:mi><mml:mi>Q</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:msub><mml:mi>W</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x22a4;</mml:mo></mml:msup></mml:mrow><mml:mrow><mml:msqrt><mml:mi>d</mml:mi></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mi>&#x3d5;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo>.</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>W<sub>Q</sub></italic>, <inline-formula>
<mml:math display="inline" id="im37"><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>r</mml:mi></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> are query/key projections and <italic>&#x3d5;</italic>(<italic>i,j</italic>) encodes relative span distance and nesting depth, allowing the model to incorporate both semantic similarity and positional structure. These layers of structural modeling enable SpanStructEncoder to build context-aware, interaction-sensitive span representations that capture the intricate dependencies often encountered in biomedical NER tasks, particularly in domains like immunology where hierarchical concepts and overlapping boundaries are prevalent.</p>
</sec>
<sec id="s3_3_3">
<label>3.3.3</label>
<title>Type-aware coherence regularization</title>
<p>To enhance the model&#x2019;s robustness in low-resource and weakly-supervised scenarios, common in biomedical NER, we introduce a type-aware coherence regularization strategy that encourages embedding consistency within each entity type. The core idea is to maintain compactness in the latent space by aligning predicted spans of the same type around a learned type-specific centroid. Given a batch of documents <inline-formula>
<mml:math display="inline" id="im38"><mml:mi mathvariant="script">B</mml:mi></mml:math></inline-formula> and predicted spans <inline-formula>
<mml:math display="inline" id="im39"><mml:mrow><mml:msub><mml:mi mathvariant="script">E</mml:mi><mml:mi>b</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> from document <inline-formula>
<mml:math display="inline" id="im40"><mml:mrow><mml:mi>b</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">B</mml:mi></mml:mrow></mml:math></inline-formula>, we collect all span representations predicted with the same top-1 label <inline-formula>
<mml:math display="inline" id="im41"><mml:mi>y</mml:mi></mml:math></inline-formula> across the batch to form a type-specific support set <inline-formula>
<mml:math display="inline" id="im42"><mml:mrow><mml:msub><mml:mi mathvariant="script">B</mml:mi><mml:mi>y</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mo>&#x222a;</mml:mo><mml:mrow><mml:mi>b</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">B</mml:mi></mml:mrow></mml:msub><mml:mo>{</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi mathvariant="script">E</mml:mi><mml:mi>b</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mtext>arg&#xa0;max&#xa0;</mml:mtext><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mi>&#xa0;</mml:mi><mml:mo>|</mml:mo><mml:mi>&#xa0;</mml:mi><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>. We then compute the centroid <inline-formula>
<mml:math display="inline" id="im43"><mml:mrow><mml:msub><mml:mi>&#x3bc;</mml:mi><mml:mi>y</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> of span embeddings for type <inline-formula>
<mml:math display="inline" id="im44"><mml:mi>y</mml:mi></mml:math></inline-formula> (<xref ref-type="disp-formula" rid="eq22">Equation 22</xref>):</p>
<disp-formula id="eq22"><label>(22)</label>
<mml:math display="block" id="M22"><mml:mrow><mml:msub><mml:mi>&#x3bc;</mml:mi><mml:mi>y</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mo>|</mml:mo><mml:msub><mml:mi mathvariant="script">B</mml:mi><mml:mi>y</mml:mi></mml:msub><mml:mo>|</mml:mo></mml:mrow></mml:mfrac><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi mathvariant="script">B</mml:mi><mml:mi>y</mml:mi></mml:msub></mml:mrow></mml:munder><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>This centroid represents a prototype embedding for entity type y. To ensure that span representations do not deviate significantly from this type prototype, we impose a regularization loss <inline-formula>
<mml:math display="inline" id="im45"><mml:mi mathvariant="script">L</mml:mi></mml:math></inline-formula>center that penalizes intra-class variance (<xref ref-type="disp-formula" rid="eq23">Equation 23</xref>):</p>
<disp-formula id="eq23"><label>(23)</label>
<mml:math display="block" id="M23"><mml:mrow><mml:msub><mml:mi mathvariant="script">L</mml:mi><mml:mrow><mml:mtext>center</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi mathvariant="script">E</mml:mi><mml:mi>b</mml:mi></mml:msub></mml:mrow></mml:munder><mml:mo>&#x2225;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x3bc;</mml:mi><mml:mi>y</mml:mi></mml:msub><mml:mo>|</mml:mo><mml:msup><mml:mo>&#x2225;</mml:mo><mml:mn>2</mml:mn></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>This regularization acts as a soft constraint that implicitly aligns the model&#x2019;s output distribution with a type-consistent geometry in the latent space. To further enhance type-level discrimination, especially under noisy supervision, we extend the centroid alignment with a margin-based contrastive variant. Let <inline-formula>
<mml:math display="inline" id="im46"><mml:mrow><mml:msub><mml:mi>&#x3bc;</mml:mi><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:msub></mml:mrow></mml:math></inline-formula> denote centroids of competing labels <inline-formula>
<mml:math display="inline" id="im47"><mml:mrow><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#x2260;</mml:mo><mml:mi>y</mml:mi></mml:mrow></mml:math></inline-formula>, and define a margin-based penalty (<xref ref-type="disp-formula" rid="eq24">Equation 24</xref>):</p>
<disp-formula id="eq24"><label>(24)</label>
<mml:math display="block" id="M24"><mml:mrow><mml:msub><mml:mi mathvariant="script">L</mml:mi><mml:mrow><mml:mtext>contrast</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:munder><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#x2260;</mml:mo><mml:mi>y</mml:mi></mml:mrow></mml:munder><mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mi>&#x3b4;</mml:mi><mml:mo>+</mml:mo><mml:mo>&#x2225;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x3bc;</mml:mi><mml:mi>y</mml:mi></mml:msub><mml:msup><mml:mo>&#x2225;</mml:mo><mml:mn>2</mml:mn></mml:msup><mml:mo>&#x2212;</mml:mo><mml:mo>&#x2225;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x3bc;</mml:mi><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:msub><mml:msup><mml:mo>&#x2225;</mml:mo><mml:mn>2</mml:mn></mml:msup><mml:mo>]</mml:mo></mml:mrow><mml:mo>+</mml:mo></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im48"><mml:mi>&#x3b4;</mml:mi></mml:math></inline-formula> is a fixed margin and [&#xb7;]<sub>+</sub> denotes the hinge loss. This encourages embeddings to stay closer to the correct type centroid while maintaining a minimum distance from incorrect ones. We incorporate the coherence term into the global inference objective alongside exclusivity constraints and compatibility scoring, yielding the final decoding formulation (<xref ref-type="disp-formula" rid="eq25">Equation 25</xref>):</p>
<disp-formula id="eq25"><label>(25)</label>
<mml:math display="block" id="M25"><mml:mrow><mml:mover accent="true"><mml:mi mathvariant="script">E</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mi>arg</mml:mi><mml:mi>&#xa0;</mml:mi><mml:munder><mml:mrow><mml:mi>max</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="script">E</mml:mi><mml:mo>&#x2286;</mml:mo><mml:mi mathvariant="script">S</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:munder><mml:mi>C</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:mi>&#x3b2;</mml:mi><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi mathvariant="script">L</mml:mi><mml:mrow><mml:mtext>center</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>&#x3b3;</mml:mi><mml:mo>&#xb7;</mml:mo><mml:mtext>OverlapPenalty</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where hyperparameters <inline-formula>
<mml:math display="inline" id="im49"><mml:mi>&#x3b2;</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im50"><mml:mi>&#x3b3;</mml:mi></mml:math></inline-formula> balance entity coherence with structural exclusivity. By aligning span embeddings with semantic type anchors, this regularization substantially improves generalization under domain shift and mitigates prediction noise in weakly labeled corpora.</p>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Contextual Constraint Decoding</title>
<p>To complement the SpanStructEncoder architecture, we propose Contextual Constraint Decoding (CCD) a decoding framework that refines span predictions using structural constraints, contextual coherence, and ontology-aware inference. CCD addresses challenges such as overlapping predictions, semantic inconsistency, and noise from weak supervision by performing constrained optimization over candidate spans S(<italic>x</italic>) (As shown in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Schematic diagram of contextual constraint decoding framework. The figure presents the full pipeline of CCD which includes graph-guided context propagation for modeling span-level dependencies ontology-aware label filtering for enforcing type consistency based on biomedical schema and a unified decoding module that integrates intra-channel and inter-channel attention mechanisms to refine and select entity spans in a globally coherent manner Each module contributes to improving decoding precision under dense and overlapping annotation scenarios.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-16-1613479-g003.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a process for contextual constraint decoding (CCD) in entity prediction. Steps include text tokenization, span encoding, and embedding. Span embeddings undergo feature transformation and transformation again, leading to graph-guided context propagation. Span attention applies softmax for context refinement. The process concludes with classification and entity prediction.</alt-text>
</graphic></fig>
<sec id="s3_4_1">
<label>3.4.1</label>
<title>Constraint-based span selection</title>
<p>The core of CCD lies in refining the span prediction set <inline-formula>
<mml:math display="inline" id="im51"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi mathvariant="script">E</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mtext>raw</mml:mtext></mml:mrow></mml:msub><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math></inline-formula> into a coherent and structurally valid subset <inline-formula>
<mml:math display="inline" id="im52"><mml:mrow><mml:msup><mml:mover accent="true"><mml:mi mathvariant="script">E</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>*</mml:mo></mml:msup></mml:mrow></mml:math></inline-formula> by solving a constrained optimization problem. Rather than treating span predictions independently, CCD performs global selection guided by prior knowledge and contextual consistency (As shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>).</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Schematic diagram of constraint-based span selection. The figure illustrates how domain-specific and domain-invariant features are extracted through a multi-branch convolutional encoder followed by span selection guided by motion dynamics environmental embeddings and temporal memory A dynamic motion predictor and a modular classifier are jointly used to resolve overlapping and noisy span predictions under contextual and structural constraints leading to robust activity inference across heterogeneous domains.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimmu-16-1613479-g004.tif">
<alt-text content-type="machine-generated">Flowchart depicting a system for entity predictions. It starts with &#x201c;Span Representations&#x201d; followed by &#x201c;Graph-Guided Context Propagation,&#x201d; then &#x201c;Ontology-Aware Label Filtering.&#x201d; Next, &#x201c;Constraint-Based Span Selection&#x201d; leads to &#x201c;Classification Head.&#x201d; An inset diagram shows relationships between &#x201c;Gene,&#x201d; &#x201c;Protein,&#x201d; and another entity, contributing to predictions for gene, protein, and disease entities.</alt-text>
</graphic></fig>
<p>The objective balances three competing goals: maximizing model confidence over spans, minimizing structural inconsistencies, and rewarding semantic coherence. Formally, the optimization target is defined as <xref ref-type="disp-formula" rid="eq26">Equation 26</xref>:</p>
<disp-formula id="eq26"><label>(26)</label>
<mml:math display="block" id="M26"><mml:mrow><mml:msup><mml:mover accent="true"><mml:mi mathvariant="script">E</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo></mml:msup><mml:mi>arg</mml:mi><mml:mi>&#xa0;</mml:mi><mml:munder><mml:mrow><mml:mi>max</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="script">E</mml:mi><mml:mo>&#x2286;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi mathvariant="script">E</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mtext>raw</mml:mtext><mml:mi>&#xa0;</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">E</mml:mi></mml:mrow></mml:munder><mml:mi>log</mml:mi><mml:mi>&#xa0;</mml:mi><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">|</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi mathvariant="script">C</mml:mi><mml:mrow><mml:mtext>overlap</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi mathvariant="script">C</mml:mi><mml:mrow><mml:mtext>conflict</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi mathvariant="script">K</mml:mi><mml:mrow><mml:mtext>context</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im53"><mml:mrow><mml:mi>P</mml:mi><mml:mo>(</mml:mo><mml:mi>y</mml:mi><mml:mo>&#xa0;</mml:mo><mml:mo>|</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> is the predicted probability from SpanStructEncoder. The first constraint, <inline-formula>
<mml:math display="inline" id="im54"><mml:mrow><mml:msub><mml:mi mathvariant="script">C</mml:mi><mml:mrow><mml:mtext>overlap</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></inline-formula> penalizes overlapping spans assigned different types, preventing multiple entities from occupying the same token space (<xref ref-type="disp-formula" rid="eq27">Equation 27</xref>):</p>
<disp-formula id="eq27"><label>(27)</label>
<mml:math display="block" id="M27"><mml:mrow><mml:msub><mml:mi mathvariant="script">C</mml:mi><mml:mrow><mml:mtext>overlap</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi>E</mml:mi></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2229;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2260;</mml:mo><mml:mo>&#x2205;</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:munder><mml:mn>1</mml:mn><mml:mo stretchy="false">[</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2260;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">]</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>The second constraint, <inline-formula>
<mml:math display="inline" id="im55"><mml:mi mathvariant="script">C</mml:mi></mml:math></inline-formula>conflict, incorporates type-level incompatibilities defined via a schema-aware function <inline-formula>
<mml:math display="inline" id="im56"><mml:mrow><mml:mtext>&#x3a8;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> and penalizes semantically invalid co-occurrence (<xref ref-type="disp-formula" rid="eq28">Equation 28</xref>):</p>
<disp-formula id="eq28"><label>(28)</label>
<mml:math display="block" id="M28"><mml:mrow><mml:msub><mml:mi mathvariant="script">C</mml:mi><mml:mrow><mml:mtext>conflict</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi>E</mml:mi></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mtext>&#x3a8;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:munder><mml:mi>&#x3b4;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mtext>rel</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Here, <inline-formula>
<mml:math display="inline" id="im57"><mml:mrow><mml:mi>&#x3b4;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> accounts for structural relations like nesting or adjacency, making the constraint sensitive to span layout. To counterbalance these penalties, CCD introduces a contextual consistency reward <inline-formula>
<mml:math display="inline" id="im58"><mml:mrow><mml:msub><mml:mi mathvariant="script">K</mml:mi><mml:mrow><mml:mtext>context</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, which boosts configurations where type-homogeneous mentions co-occur meaningfully (<xref ref-type="disp-formula" rid="eq29">Equation 29</xref>):</p>
<disp-formula id="eq29"><label>(29)</label>
<mml:math display="block" id="M29"><mml:mrow><mml:msub><mml:mi mathvariant="script">K</mml:mi><mml:mrow><mml:mtext>context</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">E</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>y</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">Y</mml:mi></mml:mrow></mml:munder><mml:msub><mml:mi>&#x3b7;</mml:mi><mml:mi>y</mml:mi></mml:msub><mml:mo>&#xb7;</mml:mo><mml:mi>log</mml:mi><mml:mi>&#xa0;</mml:mi><mml:mo>(</mml:mo><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">E</mml:mi></mml:mrow></mml:munder><mml:mtext>exp</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im59"><mml:mrow><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> reflects confidence from attention scores and <italic>&#x3b7;<sub>y</sub></italic> is a frequency-aware prior. Altogether, this span selection approach enforces both structural and semantic validity, yielding entity sets that are not only probable under the model&#x2019;s local outputs but also globally consistent with linguistic priors, domain constraints, and discourse-level coherence. CCD performs this optimization efficiently via beam search, progressively constructing candidate sets while pruning those violating critical constraints. This principled formulation greatly improves robustness to over-prediction and type confusion, especially in settings with dense, overlapping annotations common in biomedical texts.</p>
</sec>
<sec id="s3_4_2">
<label>3.4.2</label>
<title>Graph-guided context propagation</title>
<p>To model the global interactions between span candidates and promote coherent predictions, CCD constructs a span-level graph <inline-formula>
<mml:math display="inline" id="im60"><mml:mrow><mml:mi>G</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>V</mml:mi><mml:mo>,</mml:mo><mml:mi>E</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> where each node represents a candidate span <inline-formula>
<mml:math display="inline" id="im61"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> and edges capture either semantic similarity or structural proximity. This graph-based formulation enables the decoding process to go beyond local predictions by aggregating contextual evidence from related spans across the entire input. Edge formation relies on both content similarity and geometric relations such as overlap or adjacency. Two spans are connected if their contextual embeddings exhibit high semantic similarity or if they share overlapping tokens. The semantic similarity between spans is computed using a learned attention function (<xref ref-type="disp-formula" rid="eq30">Equation 30</xref>):</p>
<disp-formula id="eq30"><label>(30)</label>
<mml:math display="block" id="M30"><mml:mrow><mml:mtext>Sim</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow><mml:mo>&#x22a4;</mml:mo></mml:msubsup><mml:msub><mml:mi>W</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im62"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im63"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:msup><mml:mi>s</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>t</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> are the structure-aware span representations, <inline-formula>
<mml:math display="inline" id="im64"><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is a trainable projection matrix, and <inline-formula>
<mml:math display="inline" id="im65"><mml:mrow><mml:mi>&#x3c3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> denotes the sigmoid activation. Edges are added if <inline-formula>
<mml:math display="inline" id="im66"><mml:mrow><mml:mtext>Sim</mml:mtext><mml:mo>&#x2265;</mml:mo><mml:mi>&#x3f5;</mml:mi></mml:mrow></mml:math></inline-formula> or if the spans overlap. Once the graph is constructed, CCD propagates confidence scores across connected spans via a message-passing mechanism inspired by graph neural networks. At each step <inline-formula>
<mml:math display="inline" id="im67"><mml:mi>k</mml:mi></mml:math></inline-formula>, the activation of node <italic>i</italic> is updated as <xref ref-type="disp-formula" rid="eq31">Equation 31</xref>:</p>
<disp-formula id="eq31"><label>(31)</label>
<mml:math display="block" id="M31"><mml:mrow><mml:msubsup><mml:mi>z</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mo>(</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>N</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:munder><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#xb7;</mml:mo><mml:mi>W</mml:mi><mml:msubsup><mml:mi>z</mml:mi><mml:mi>j</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:mi>U</mml:mi><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:msub><mml:mo>)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im68"><mml:mrow><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> is the normalized adjacency weight, <inline-formula>
<mml:math display="inline" id="im69"><mml:mi>W</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im70"><mml:mi>U</mml:mi></mml:math></inline-formula> are trainable matrices, and <inline-formula>
<mml:math display="inline" id="im71"><mml:mrow><mml:msubsup><mml:mi>z</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>0</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> is initialized from the span logits or representation. After <inline-formula>
<mml:math display="inline" id="im72"><mml:mi>K</mml:mi></mml:math></inline-formula> rounds of propagation, the final contextualized confidence score for each span is computed using <xref ref-type="disp-formula" rid="eq32">Equation 32</xref>:</p>
<disp-formula id="eq32"><label>(32)</label>
<mml:math display="block" id="M32"><mml:mrow><mml:mover accent="true"><mml:mi>P</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mi>&#xa0;</mml:mi><mml:mo>.</mml:mo><mml:mi>&#xa0;</mml:mi><mml:msub><mml:mi>s</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mtext>softmax</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>K</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>This iterative process allows each span&#x2019;s prediction to be influenced by semantically similar or structurally related spans, which is especially useful for handling ambiguous mentions and reinforcing consistent labeling across repeated expressions in biomedical documents. The message-passing mechanism serves as a form of regularization, smoothing predictions across correlated spans while suppressing isolated outliers.</p>
</sec>
<sec id="s3_4_3">
<label>3.4.3</label>
<title>Ontology-aware label filtering</title>
<p>To improve semantic validity and reduce noisy or incompatible predictions, CCD integrates biomedical ontologies and external type schemas <inline-formula>
<mml:math display="inline" id="im73"><mml:mi mathvariant="script">O</mml:mi></mml:math></inline-formula> into the decoding process through ontology-aware label filtering. Biomedical entity types often follow hierarchical structures or exhibit mutual exclusivity. To encode such schema-level constraints, we define a label projection matrix <inline-formula>
<mml:math display="inline" id="im74"><mml:mrow><mml:mtext>&#x3a9;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mo>|</mml:mo><mml:mi mathvariant="script">Y</mml:mi><mml:mo>|</mml:mo><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>o</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, where each row corresponds to a type and each entry <inline-formula>
<mml:math display="inline" id="im75"><mml:mrow><mml:msub><mml:mtext>&#x3a9;</mml:mtext><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> captures a directed relationship such as subclassing (<xref ref-type="disp-formula" rid="eq33">Equation 33</xref>):</p>
<disp-formula id="eq33"><label>(33)</label>
<mml:math display="block" id="M33"><mml:mrow><mml:msub><mml:mtext>&#x3a9;</mml:mtext><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mtable columnalign="left" equalrows="true" equalcolumns="true"><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mn>1</mml:mn></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mtext>if&#x2004;</mml:mtext><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mo>&#x227a;</mml:mo><mml:mi mathvariant="script">O</mml:mi></mml:msub><mml:msub><mml:mi>y</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mn>0</mml:mn></mml:mtd><mml:mtd columnalign="left"><mml:mrow><mml:mtext>otherwise</mml:mtext><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>This matrix can be derived from curated ontologies like UMLS or MeSH, and enables the model to perform schema-consistent reasoning during decoding. Given a candidate span (<italic>s, t</italic>) and its predicted type distribution <italic>P</italic>(<italic>y</italic> | <italic>s, t, x</italic>), CCD imposes a filtering condition to suppress structurally invalid type assignments. If any sibling type <inline-formula>
<mml:math display="inline" id="im76"><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:math></inline-formula> receives high confidence, types incompatible with <inline-formula>
<mml:math display="inline" id="im77"><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:math></inline-formula> are excluded from consideration. The filtering mask is defined as <xref ref-type="disp-formula" rid="eq34">Equation 34</xref>:</p>
<disp-formula id="eq34"><label>(34)</label>
<mml:math display="block" id="M34"><mml:mrow><mml:msub><mml:mrow><mml:mtext>Mask</mml:mtext></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mi>y</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi mathvariant="script">Y</mml:mi><mml:mo>&#xa0;</mml:mo><mml:mo>|</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mo>&#x2200;</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#x2208;</mml:mo><mml:mtext>siblings</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mtext>&#x2009;</mml:mtext><mml:mi>P</mml:mi><mml:mo>(</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>&#xa0;</mml:mo><mml:mo>|</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&lt;</mml:mo><mml:mi>&#x3b3;</mml:mi><mml:mo>}</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x3b3;</italic> is a threshold, and &#x2018;siblings&#x2019; are types that share the same parent or occupy mutually exclusive branches under <inline-formula>
<mml:math display="inline" id="im78"><mml:mi mathvariant="script">O</mml:mi></mml:math></inline-formula>. This prevents type collisions during inference and ensures that selected types conform to the semantic taxonomy. Moreover, to handle fine-grained hierarchies, CCD performs ontology-aware projection of span embeddings into the type space using <xref ref-type="disp-formula" rid="eq35">Equation 35</xref>:</p>
<disp-formula id="eq35"><label>(35)</label>
<mml:math display="block" id="M35"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>softmax</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mtext>&#x3a9;</mml:mtext><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>r</mml:mi><mml:mo>&#x2dc;</mml:mo></mml:mover><mml:mrow><mml:mi>s</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>which refines the type probabilities by aligning span features with schema-induced semantics.</p>
<p>To enforce biological plausibility and semantic consistency, our constraint propagation framework leverages domain-specific biomedical ontologies. In particular, we integrate structured knowledge from the Unified Medical Language System (UMLS), MeSH, and IPD-IMGT/HLA. These ontologies cover hierarchical and relational information for a wide range of immunology-related concepts such as cytokines, immune cells, diseases, and HLA alleles. During preprocessing, entity labels from the training datasets are first aligned to concept identifiers in these ontologies using synonym expansion and lexical normalization. Based on this alignment, we construct a type compatibility matrix and a constraint propagation graph, where edges represent biologically valid co-occurrence and nesting relations derived from the ontology structure. These include subclass hierarchies (T-helper cell is a subtype of T cell), mutually exclusive categories, and permissible parent-child embeddings (IL-6 within cytokine). At decoding time, these constraints are enforced by penalizing span configurations that violate known ontological dependencies and by filtering out contradictory label pairs. This design not only improves span-level coherence but also enables the model to leverage structured domain knowledge in a weakly supervised setting, making it more 396 robust to label noise and entity ambiguity.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental setup</title>
<sec id="s4_1">
<label>4.1</label>
<title>Dataset</title>
<p>To evaluate the effectiveness of our proposed NER framework, we selected several immunology-related biomedical datasets. The chosen datasets are specifically relevant to the immunology domain and contain a rich set of entities such as immune cells, cytokines, diseases, biomarkers, and therapeutic agents. These datasets are carefully curated to reflect the complexities and challenges of immunological texts. The datasets used in our experiments are as follows: NCBI Disease Corpus Dogan et&#xa0;al. (<xref ref-type="bibr" rid="B34">34</xref>) contains 793 PubMed abstracts annotated with over 6,800 disease mentions. It focuses on disease name recognition and normalization, and includes a substantial number of immune-related diseases such as lupus, psoriasis, and rheumatoid arthritis. Each entity is linked to concepts in the MEDIC vocabulary. SNPPhenA Dehghani et&#xa0;al. (<xref ref-type="bibr" rid="B35">35</xref>) is a manually curated dataset of SNP-phenotype associations, including mentions of genetic variants, genes, and clinical traits. These annotations enable studying the genetic basis of immune response variability and autoimmune disorders. HLA-SPREAD Dholakia et&#xa0;al. (<xref ref-type="bibr" rid="B36">36</xref>) is a large-scale resource focusing on HLA alleles and their associations with diseases, drugs, and adverse immune reactions. The dataset is derived from over 20,000 PubMed abstracts and normalized to external ontologies such as IPD IMGT/HLA and UMLS. It is highly relevant to tasks involving immune compatibility and drug sensitivity. Collectively, these datasets offer a robust and representative benchmark for evaluating biomedical NER systems with a focus on immunology-specific terminology, nested structures, and domain adaptation challenges.</p>
<p>These three datasets were selected to provide a diverse yet relevant set of immune-related entities for testing our method. They include a variety of immunology-related terms, ensuring that our model&#x2019;s performance can be evaluated in a realistic biomedical context. All datasets were preprocessed in a similar manner to ensure consistency across the experiments.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Experimental setup</title>
<p>To evaluate the effectiveness and robustness of the proposed NER framework, we employed BioBERT as the backbone encoder owing to its superior capability in processing biomedical texts. The model was fine-tuned on three immunology-specific datasets: NCBI Disease, SNPPhenA, and HLA-SPREAD, which collectively encompass diverse entity types such as immune cells, cytokines, diseases, genes, and HLA alleles. Each dataset was divided into training (70%), validation (10%), and test (20%) subsets using stratified sampling to maintain balanced entity distributions. All datasets were preprocessed with BioBERT&#x2019;s WordPiece tokenizer to ensure consistency with the pretrained model vocabulary.</p>
<p>The framework adopts a span-based labeling scheme that aligns with the structural characteristics of immunological texts, allowing the model to handle nested and overlapping entities effectively. Input sequences were truncated or padded to a maximum length of 512 tokens. <italic>AdamW</italic> optimizer was employed with a learning rate of 3 &#xd7; 10<sup>&#x2212;5</sup>, weight decay of 0.01, and batch size of 16. A dropout rate of 0.1 was applied after each transformer layer to prevent overfitting. Training was conducted for up to 30 epochs, with early stopping triggered when the validation F1-score failed to improve over five consecutive epochs.</p>
<p>All experiments were executed on a single NVIDIA A100 GPU under mixed-precision (FP16) mode to improve computational efficiency. For evaluation, we adopted standard Precision, Recall, F1-score, and Entity-level Accuracy metrics. Each experiment was repeated three times with different random seeds, and the averaged scores were reported to ensure statistical robustness and reproducibility.</p>
<p>This experimental configuration provides a fair and controlled environment for assessing the model&#x2019;s performance across multiple immunology-oriented datasets, ensuring that improvements are attributable to the proposed SpanStructEncoder and Contextual Constraint Decoding (CCD) modules rather than dataset-specific biases or random variation.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Comparison with SOTA methods</title>
<p><xref ref-type="table" rid="T1"><bold>Tables&#xa0;1</bold></xref>, <xref ref-type="table" rid="T2"><bold>2</bold></xref> present the comparative performance of the proposed framework against several recent biomedical NER models, including BioGPT, BioLinkBERT, and SciFive, evaluated on three immunology-oriented datasets: NCBI Disease, SNPPhenA, and HLA-SPREAD.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Performance comparison of biomedical NER models on immunology-specific datasets (F1-score).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="middle" align="center">NCBI (F1)</th>
<th valign="middle" align="center">SNPPhenA (F1)</th>
<th valign="middle" align="center">HLA-SPREAD (F1)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">BioGPT</td>
<td valign="middle" align="center">84.23</td>
<td valign="middle" align="center">81.95</td>
<td valign="middle" align="center">83.17</td>
</tr>
<tr>
<td valign="middle" align="left">BioLinkBERT</td>
<td valign="middle" align="center">85.90</td>
<td valign="middle" align="center">83.46</td>
<td valign="middle" align="center">84.72</td>
</tr>
<tr>
<td valign="middle" align="left">SciFive</td>
<td valign="middle" align="center">83.77</td>
<td valign="middle" align="center">81.03</td>
<td valign="middle" align="center">82.62</td>
</tr>
<tr>
<td valign="middle" align="left"><bold>Ours (SpanStructEncoder + CCD)</bold></td>
<td valign="middle" align="center"><bold>88.72</bold></td>
<td valign="middle" align="center"><bold>86.91</bold></td>
<td valign="middle" align="center"><bold>87.44</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>F1-score are reported on NCBI Disease, SNPPhenA, and HLA-SPREAD datasets.</p></fn>
<fn>
<p>Bold indicates the standard value for our method.</p></fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Precision comparison of biomedical NER models on immunology-specific datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="middle" align="center">NCBI (P)</th>
<th valign="middle" align="center">SNPPhenA (P)</th>
<th valign="middle" align="center">HLA-SPREAD (P)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">BioGPT</td>
<td valign="middle" align="center">82.81</td>
<td valign="middle" align="center">80.24</td>
<td valign="middle" align="center">81.90</td>
</tr>
<tr>
<td valign="middle" align="left">BioLinkBERT</td>
<td valign="middle" align="center">84.34</td>
<td valign="middle" align="center">82.57</td>
<td valign="middle" align="center">83.51</td>
</tr>
<tr>
<td valign="middle" align="left">SciFive</td>
<td valign="middle" align="center">82.95</td>
<td valign="middle" align="center">79.89</td>
<td valign="middle" align="center">81.03</td>
</tr>
<tr>
<td valign="middle" align="left"><bold>Ours (SpanStructEncoder + CCD)</bold></td>
<td valign="middle" align="center"><bold>87.65</bold></td>
<td valign="middle" align="center"><bold>85.78</bold></td>
<td valign="middle" align="center"><bold>86.11</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Precision (P) values are reported on NCBI Disease, SNPPhenA, and HLA-SPREAD datasets.</p></fn>
<fn>
<p>Bold indicates the standard value for our method.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>, our proposed model (SpanStructEncoder + CCD) consistently achieved the highest F1-scores across all datasets, outperforming the best-performing baseline, BioLinkBERT, by 2.82, 3.45, and 2.72 points on NCBI Disease, SNPPhenA, and HLA-SPREAD respectively. This improvement demonstrates the model&#x2019;s superior capability in capturing complex entity structures and handling nested or overlapping mentions prevalent in immunological texts.</p>
<p>The Precision results summarized in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref> further validate these findings. The proposed framework achieved 87.65, 85.78, and 86.11 on the three datasets, respectively&#x2014;surpassing all baseline models. The higher precision indicates that the model effectively reduces false positives by leveraging its constraint aware decoding mechanism, which enforces type-level compatibility and ontology-based consistency during prediction.</p>
<p>Collectively, these results confirm that the integration of structured span representation and Contextual Constraint Decoding (CCD) substantially enhances recognition accuracy in immunology-related NER tasks. The model not only captures intricate biomedical relationships but also maintains strong generalization performance across heterogeneous datasets, establishing a new benchmark for domain-specific entity recognition in biomedical NLP.</p>
<p>The improvement is particularly notable in precision, indicating the model&#x2019;s ability to avoid false positives while capturing complex and overlapping biomedical entities. This performance gain can be attributed to our structured span representation and constraint-aware decoding strategy. These results confirm that integrating domain-specific structure and contextual constraints significantly enhances NER accuracy in the immunology domain.</p>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Ablation study</title>
<p>To understand the contribution of each component in our framework, we performed an ablation study on the three immunology datasets. We evaluated the impact of removing key modules: the Structural Attention Graph, Constraint-Based Span Selection, and Graph-Guided Context Propagation. The results are summarized in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>. Removing the Structural Attention Graph caused the most significant performance drop across all datasets, indicating its importance in modeling nested and overlapping entities. Without Constraint-Based Span Selection, the F1 scores decreased consistently, showing that global consistency and type compatibility constraints are critical for reducing false positives. Excluding Graph-Guided Context Propagation led to smaller but noticeable declines, suggesting its role in refining predictions through semantic context.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Ablation results (F1 scores) on immunology-related datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Model variant</th>
<th valign="middle" align="center">NCBI</th>
<th valign="middle" align="center">SNPPhenA</th>
<th valign="middle" align="center">HLA-SPREAD</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Full Model</td>
<td valign="middle" align="center"><bold>88.72</bold></td>
<td valign="middle" align="center"><bold>86.91</bold></td>
<td valign="middle" align="center"><bold>87.44</bold></td>
</tr>
<tr>
<td valign="middle" align="left">w/o Structural Attention Graph</td>
<td valign="middle" align="center">85.24</td>
<td valign="middle" align="center">83.72</td>
<td valign="middle" align="center">84.06</td>
</tr>
<tr>
<td valign="middle" align="left">w/o Constraint-Based Span Selection</td>
<td valign="middle" align="center">86.40</td>
<td valign="middle" align="center">84.95</td>
<td valign="middle" align="center">85.18</td>
</tr>
<tr>
<td valign="middle" align="left">w/o Graph-Guided Context Propagation</td>
<td valign="middle" align="center">87.01</td>
<td valign="middle" align="center">85.32</td>
<td valign="middle" align="center">86.05</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold indicates the standard value for our method.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>These findings confirm that each module contributes to the overall performance of the model. The combination of structured span encoding and constraint-aware decoding not only improves recognition accuracy but also enhances generalization to complex biomedical entities. The consistent drop in F1 scores across all ablation settings demonstrates the necessity of integrating structural and contextual information for robust named entity recognition in immunology.</p>
<p>To assess the contribution of each component in our model, we performed an ablation study on biomedical datasets using standard NER metrics. As shown in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>, removing the dual-channel attention led to the largest drop in F1-score, indicating its key role in contextual representation. Excluding lexical features and constraint-based decoding also caused performance declines, confirming their complementary value. The full model achieved the highest F1-score, demonstrating that the integration of all modules is crucial for optimal performance in biomedical named entity recognition.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Ablation study results on biomedical datasets using Precision, Recall, and F1-score.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Model variant</th>
<th valign="middle" align="center">Precision</th>
<th valign="middle" align="center">Recall</th>
<th valign="middle" align="center">F1-score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Full Model (Ours)</td>
<td valign="middle" align="center">88.5</td>
<td valign="middle" align="center">86.9</td>
<td valign="middle" align="center">87.7</td>
</tr>
<tr>
<td valign="middle" align="left">w/o Dual Attention</td>
<td valign="middle" align="center">85.3</td>
<td valign="middle" align="center">83.1</td>
<td valign="middle" align="center">84.2</td>
</tr>
<tr>
<td valign="middle" align="left">w/o Lexical Features</td>
<td valign="middle" align="center">86.2</td>
<td valign="middle" align="center">84.0</td>
<td valign="middle" align="center">85.1</td>
</tr>
<tr>
<td valign="middle" align="left">w/o Constraint Decoding</td>
<td valign="middle" align="center">87.0</td>
<td valign="middle" align="center">85.1</td>
<td valign="middle" align="center">86.0</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions and future work</title>
<p>In this study, we focus on improving Named Entity Recognition (NER) within the domain of immunology and immune-mediated disorders, a field characterized by deeply nested and context-sensitive terminology. To address the limitations of existing approaches such as their inability to accurately detect overlapping or nested entities and their poor adaptability to domain-specific language, we propose a novel NER framework composed of two key components: SpanStructEncoder and Contextual Constraint Decoding (CCD). SpanStructEncoder leverages hierarchical span representation and graph-based relational modeling to enhance the semantic capture of both entity and span-level dependencies. Meanwhile, CCD applies ontologically informed, context-aware constraints to the decoding phase, improving precision especially in weakly labeled datasets. Together, these modules enable robust extraction of complex biomedical entities. Our evaluation on immunology specific datasets reveals that our approach significantly surpasses existing models, particularly in handling ambiguous and nested entities, ultimately facilitating more accurate biomedical text mining and aiding downstream tasks such as knowledge graph construction.</p>
<p>There are two notable limitations to consider. While the use of structured constraints boosts accuracy, it introduces computational overhead that could hinder real-time or large-scale deployment. Optimizing the efficiency of CCD or designing lighter variants may be essential for practical integration. Although our model adapts well to immunology, its generalizability to other biomedical subfields remains limited by the reliance on ontology-specific constraints. Future work could explore meta-learning or adaptive constraint frameworks to enhance cross-domain transferability. This work presents a strong foundation for more semantically aware NER systems and opens new avenues for domain-adaptive information extraction in biomedical research.</p>
<p>Despite the promising performance demonstrated by our model, several limitations must be acknowledged. First, although we utilized widely accepted biomedical benchmark datasets such as NCBI Disease and MedMentions, these corpora are relatively small in scale and curated under idealized conditions. Real-world hospital data, which often includes noisy, unstructured, and incomplete medical texts, were not included due to data privacy constraints. This gap limits the immediate clinical applicability of our findings. Future work should instead emphasize cross-domain biomedical corpora (e.g., clinical notes vs. medical literature) to evaluate robustness more meaningfully. Third, our current work lacks validation through clinical deployment or feedback from domain experts such as immunologists or medical practitioners. Such user studies are critical for assessing practical utility, interpretability, and trustworthiness of the model outputs in real-world decision-making contexts. We plan to address these gaps in future work by collaborating with medical institutions to obtain real-world data and organize expert-in-the-loop validation experiments. Addressing these limitations will be key to transitioning from research prototypes to clinically valuable systems.</p>
<p>To facilitate reproducibility and foster research in biomedical NLP for immunology, we release all resources used in this study, including annotated datasets, preprocessing scripts, model implementation code, and pretrained checkpoints. The repository contains detailed documentation and versioning of all experimental components. This resource is publicly available at: <ext-link ext-link-type="uri" xlink:href="https://snippets.cacher.io/snippet/ac40a72aa8e988cc76b8">https://snippets.cacher.io/snippet/ac40a72aa8e988cc76b8</ext-link>.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>SC: Conceptualization, Methodology, Software, Writing &#x2013; original draft. JC: Validation, Formal analysis, Investigation, Data curation, Writing &#x2013; original draft. MS: Writing &#x2013; original draft, Supervision, Funding acquisition. YW: Writing &#x2013; original draft, Writing &#x2013; review and editing, Visualization, Supervision.</p></sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>J</given-names></name>
<name><surname>Dan</surname> <given-names>K</given-names></name>
<name><surname>Ai</surname> <given-names>J</given-names></name>
</person-group>. 
<article-title>Machine learning in the prediction of immunotherapy response and prognosis of melanoma: a systematic review and meta-analysis</article-title>. <source>Front Immunol</source>. (<year>2024</year>) <volume>15</volume>:<elocation-id>1281940</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fimmu.2024.1281940</pub-id>, PMID: <pub-id pub-id-type="pmid">38835779</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sahin</surname> <given-names>TK</given-names></name>
<name><surname>Ayasun</surname> <given-names>R</given-names></name>
<name><surname>Rizzo</surname> <given-names>A</given-names></name>
<name><surname>Guven</surname> <given-names>DC</given-names></name>
</person-group>. 
<article-title>Prognostic value of neutrophil-to-eosinophil ratio (NER) in cancer: a systematic review and meta-analysis</article-title>. <source>Cancers</source>. (<year>2024</year>) <volume>16</volume>:<fpage>3689</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/cancers16213689</pub-id>, PMID: <pub-id pub-id-type="pmid">39518127</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jian</surname> <given-names>F</given-names></name>
<name><surname>Cai</surname> <given-names>H</given-names></name>
<name><surname>Chen</surname> <given-names>Q</given-names></name>
<name><surname>Pan</surname> <given-names>X</given-names></name>
<name><surname>Feng</surname> <given-names>W</given-names></name>
<name><surname>Yuan</surname> <given-names>Y</given-names></name>
</person-group>. 
<article-title>OnmiMHC: a machine learning solution for ucec tumor vaccine development through enhanced peptide-MHC binding prediction</article-title>. <source>Front Immunol</source>. (<year>2025</year>) <volume>16</volume>:<elocation-id>1550252</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fimmu.2025.1550252</pub-id>, PMID: <pub-id pub-id-type="pmid">40092998</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mi</surname> <given-names>B</given-names></name>
<name><surname>Yi</surname> <given-names>F</given-names></name>
</person-group>. 
<article-title>A review: development of named entity recognition (NER) technology for aeronautical information intelligence</article-title>. <source>Artif Intell Rev</source>. (<year>2022</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10462-022-10197-2</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Weber</surname> <given-names>L</given-names></name>
<name><surname>M&#xfc;nchmeyer</surname> <given-names>J</given-names></name>
<name><surname>Rockt&#xe4;schel</surname> <given-names>T</given-names></name>
<name><surname>Habibi</surname> <given-names>M</given-names></name>
<name><surname>Leser</surname> <given-names>U</given-names></name>
</person-group>. 
<article-title>HUNER: improving biomedical NER with pretraining</article-title>. <source>Bioinformatics</source>. (<year>2020</year>) <volume>36</volume>:<fpage>295</fpage>&#x2013;<lpage>302</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btz528</pub-id>, PMID: <pub-id pub-id-type="pmid">31243432</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Hernandez-Lareda</surname> <given-names>F</given-names></name>
<name><surname>Auccahuasi</surname> <given-names>W</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>Implementation of a customized named entity recognition (NER) model in document categorization</article-title>, in: <conf-name>2024 3rd International Conference on Automation, Computing and Renewable Systems (ICACRS)</conf-name>, .
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Khouya</surname> <given-names>N</given-names></name>
<name><surname>Retbi</surname> <given-names>A</given-names></name>
<name><surname>Bennani</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Enriching ontology with named entity recognition (NER) integration</article-title>. (<year>2024</year>). <italic>ACR</italic>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-031-56950-0_13</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bade</surname> <given-names>G</given-names></name>
<name><surname>Kolesnikova</surname> <given-names>O</given-names></name>
<name><surname>Oropeza</surname> <given-names>J</given-names></name>
</person-group>. 
<article-title>The role of named entity recognition (NER): Survey</article-title>. <source>Int J Comput Organ Trends</source>. (<year>2024</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.14445/22492593/IJCOT-V14I3P301</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Yossy</surname> <given-names>E</given-names></name>
<name><surname>Suhartono</surname> <given-names>D</given-names></name>
<name><surname>Trisetyarso</surname> <given-names>A</given-names></name>
<name><surname>Budiharto</surname> <given-names>W</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>Question classification of university admission using named-entity recognition (NER)</article-title>, in: <conf-name>International Conference on Information Technology, Computer, and Electrical Engineering</conf-name>, Available online at: <uri xlink:href="https://ieeexplore.ieee.org/abstract/document/10276823/">https://ieeexplore.ieee.org/abstract/document/10276823/</uri>.
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Z</given-names></name>
<name><surname>Hu</surname> <given-names>M</given-names></name>
<name><surname>Zhao</surname> <given-names>S</given-names></name>
<name><surname>Huang</surname> <given-names>M</given-names></name>
<name><surname>Wang</surname> <given-names>H</given-names></name>
<name><surname>Liu</surname> <given-names>L</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>E-NER: Evidential deep learning for trustworthy named entity recognition</article-title>, in: <conf-name>Annual Meeting of the Association for Computational Linguistics</conf-name>, Available online at: <uri xlink:href="https://arxiv.org/abs/2305.17854">https://arxiv.org/abs/2305.17854</uri>.
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Ushio</surname> <given-names>A</given-names></name>
<name><surname>Camacho-Collados</surname> <given-names>J</given-names></name>
</person-group>. (<year>2022</year>). 
<article-title>T-NER: An all-round python library for transformer-based named entity recognition</article-title>, in: <conf-name>Conference of the European Chapter of the Association for Computational Linguistics</conf-name>, Available online at: <uri xlink:href="https://aclanthology.org/2021.eacl-demos.7/">https://aclanthology.org/2021.eacl-demos.7/</uri>.
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Ray</surname> <given-names>AT</given-names></name>
<name><surname>Pinon-Fischer</surname> <given-names>OJ</given-names></name>
<name><surname>Mavris</surname> <given-names>D</given-names></name>
<name><surname>White</surname> <given-names>RT</given-names></name>
<name><surname>Cole</surname> <given-names>BF</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>aeroBERT-NER: Named-entity recognition for aerospace requirements engineering using BERT</article-title>, in: <conf-name>AIAA SCITECH 2023 Forum</conf-name>, doi:&#xa0;<pub-id pub-id-type="doi">10.2514/6.2023-2583</pub-id>.
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>B</given-names></name>
<name><surname>Xu</surname> <given-names>G</given-names></name>
<name><surname>Wang</surname> <given-names>X</given-names></name>
<name><surname>Xie</surname> <given-names>P</given-names></name>
<name><surname>Zhang</surname> <given-names>M</given-names></name>
<name><surname>Huang</surname> <given-names>F</given-names></name>
</person-group>. (<year>2022</year>). 
<article-title>AISHELL-NER: Named entity recognition from Chinese speech</article-title>, in: <conf-name>IEEE International Conference on Acoustics, Speech, and Signal Processing</conf-name>, Available online at: <uri xlink:href="https://ieeexplore.ieee.org/abstract/document/9746955/">https://ieeexplore.ieee.org/abstract/document/9746955/</uri>.
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Au</surname> <given-names>TWT</given-names></name>
<name><surname>Cox</surname> <given-names>I</given-names></name>
<name><surname>Lampos</surname> <given-names>V</given-names></name>
</person-group>. 
<article-title>E-NER &#x2014; an annotated named entity recognition corpus of legal text</article-title>. (<year>2022</year>). <italic>NLLP</italic>. Available online at: <uri xlink:href="https://arxiv.org/abs/2212.09306">https://arxiv.org/abs/2212.09306</uri>.
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yu</surname> <given-names>J</given-names></name>
<name><surname>Ji</surname> <given-names>B</given-names></name>
<name><surname>Li</surname> <given-names>S</given-names></name>
<name><surname>Ma</surname> <given-names>J</given-names></name>
<name><surname>Liu</surname> <given-names>H</given-names></name>
<name><surname>Xu</surname> <given-names>H</given-names></name>
</person-group>. 
<article-title>S-NER: A concise and efficient span-based model for named entity recognition</article-title>. <source>Ital Natl Conf Sensors</source>. (<year>2022</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s22082852</pub-id>, PMID: <pub-id pub-id-type="pmid">35458837</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>J</given-names></name>
<name><surname>Meng</surname> <given-names>K</given-names></name>
</person-group>. (<year>2021</year>). 
<article-title>MFE-NER: Multi-feature fusion embedding for Chinese named entity recognition</article-title>, in: <conf-name>China National Conference on Chinese Computational Linguistics</conf-name>, doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-981-97-8367-0_12</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Z</given-names></name>
<name><surname>Zhao</surname> <given-names>Y</given-names></name>
<name><surname>Gao</surname> <given-names>H</given-names></name>
<name><surname>Hu</surname> <given-names>M</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>LinkNER: Linking local named entity recognition models to large language models using uncertainty</article-title>, in: <conf-name>The Web Conference</conf-name>, doi:&#xa0;<pub-id pub-id-type="doi">10.1145/3589334.3645414</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Taher</surname> <given-names>E</given-names></name>
<name><surname>Hoseini</surname> <given-names>SA</given-names></name>
<name><surname>Shamsfard</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>Beheshti-NER: Persian named entity recognition using BERT</article-title>. (<year>2020</year>). <italic>NSURL</italic>. Available online at: <uri xlink:href="https://aclanthology.org/2019.nsurl-1.6.pdf">https://aclanthology.org/2019.nsurl-1.6.pdf</uri>.
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zheng</surname> <given-names>J</given-names></name>
<name><surname>Chen</surname> <given-names>H</given-names></name>
<name><surname>Ma</surname> <given-names>Q</given-names></name>
</person-group>. 
<article-title>Cross-domain named entity recognition via graph matching</article-title>. <source>Findings</source>. (<year>2024</year>). Available online at: <uri xlink:href="https://aclanthology.org/2022.findings-acl.210/">https://aclanthology.org/2022.findings-acl.210/</uri>.
</mixed-citation>
</ref>
<ref id="B20">
<label>20</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Shen</surname> <given-names>Y</given-names></name>
<name><surname>Song</surname> <given-names>K</given-names></name>
<name><surname>Tan</surname> <given-names>X</given-names></name>
<name><surname>Li</surname> <given-names>D</given-names></name>
<name><surname>Lu</surname> <given-names>W</given-names></name>
<name><surname>Zhuang</surname> <given-names>Y</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>DiffusionNER: Boundary diffusion for named entity recognition</article-title>, in: <conf-name>Annual Meeting of the Association for Computational Linguistics</conf-name>, Available online at: <uri xlink:href="https://arxiv.org/abs/2305.13298">https://arxiv.org/abs/2305.13298</uri>.
</mixed-citation>
</ref>
<ref id="B21">
<label>21</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>Y</given-names></name>
<name><surname>Ameer</surname> <given-names>I</given-names></name>
<name><surname>Zuo</surname> <given-names>X</given-names></name>
<name><surname>Peng</surname> <given-names>X</given-names></name>
<name><surname>Zhou</surname> <given-names>Y</given-names></name>
<name><surname>Li</surname> <given-names>Z</given-names></name>
<etal/>
</person-group>. 
<article-title>Improving large language models for clinical named entity recognition via prompt engineering</article-title>. <source>J Am Med Inf Assoc</source>. (<year>2023</year>). Available online at: <uri xlink:href="https://scholar.google.com/scholar?hl=zh-CN&amp;as_sdt=0%2C5&amp;q=+Improving+large+language+models+for+clinical+named+entity+recognition+via+prompt+engineering&amp;btnG=">https://scholar.google.com/scholar?hl=zh-CN&amp;as_sdt=0%2C5&amp;q=+Improving+large+language+models+for+clinical+named+entity+recognition+via+prompt+engineering&amp;btnG=</uri>.
</mixed-citation>
</ref>
<ref id="B22">
<label>22</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jarrar</surname> <given-names>M</given-names></name>
<name><surname>Hamad</surname> <given-names>N</given-names></name>
<name><surname>Khalilia</surname> <given-names>M</given-names></name>
<name><surname>Talafha</surname> <given-names>B</given-names></name>
<name><surname>Elmadany</surname> <given-names>A</given-names></name>
<name><surname>Abdul-Mageed</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>WojoodNER 2024: The second Arabic named entity recognition shared task</article-title>. (<year>2024</year>). <italic>ARABICNLP</italic>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2024.arabicnlp-1</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<label>23</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>W</given-names></name>
<name><surname>Zhang</surname> <given-names>S</given-names></name>
<name><surname>Gu</surname> <given-names>Y</given-names></name>
<name><surname>Chen</surname> <given-names>M</given-names></name>
<name><surname>Poon</surname> <given-names>H</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>UniversalNER: Targeted distillation from large language models for open named entity recognition</article-title>, in: <conf-name>International Conference on Learning Representations</conf-name>, Available online at: <uri xlink:href="https://arxiv.org/abs/2308.03279">https://arxiv.org/abs/2308.03279</uri>.
</mixed-citation>
</ref>
<ref id="B24">
<label>24</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Zaratiana</surname> <given-names>U</given-names></name>
<name><surname>Tomeh</surname> <given-names>N</given-names></name>
<name><surname>Holat</surname> <given-names>P</given-names></name>
<name><surname>Charnois</surname> <given-names>T</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>GLiNER: Generalist model for named entity recognition using bidirectional transformer</article-title>, in: <conf-name>North American Chapter of the Association for Computational Linguistics</conf-name>, Available online at: <uri xlink:href="https://aclanthology.org/2024.naacl-long.300/">https://aclanthology.org/2024.naacl-long.300/</uri>.
</mixed-citation>
</ref>
<ref id="B25">
<label>25</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Ding</surname> <given-names>N</given-names></name>
<name><surname>Xu</surname> <given-names>G</given-names></name>
<name><surname>Chen</surname> <given-names>Y</given-names></name>
<name><surname>Wang</surname> <given-names>X</given-names></name>
<name><surname>Han</surname> <given-names>X</given-names></name>
<name><surname>Xie</surname> <given-names>P</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Few-NERD: A few-shot named entity recognition dataset</article-title>, in: <conf-name>Annual Meeting of the Association for Computational Linguistics</conf-name>, Available online at: <uri xlink:href="https://aclanthology.org/2021.acl-long.248/">https://aclanthology.org/2021.acl-long.248/</uri>.
</mixed-citation>
</ref>
<ref id="B26">
<label>26</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Shen</surname> <given-names>Y</given-names></name>
<name><surname>Tan</surname> <given-names>Z</given-names></name>
<name><surname>Wu</surname> <given-names>S</given-names></name>
<name><surname>Zhang</surname> <given-names>W</given-names></name>
<name><surname>Zhang</surname> <given-names>R</given-names></name>
<name><surname>Xi</surname> <given-names>Y</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Promptner: Prompt locating and typing for named entity recognition</article-title>, in: <conf-name>Annual Meeting of the Association for Computational Linguistics</conf-name>, Available online at: <uri xlink:href="https://arxiv.org/abs/2305.17104">https://arxiv.org/abs/2305.17104</uri>.
</mixed-citation>
</ref>
<ref id="B27">
<label>27</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Durango</surname> <given-names>MC</given-names></name>
<name><surname>Torres-Silva</surname> <given-names>EA</given-names></name>
<name><surname>Orozco-Duque</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Named entity recognition in electronic health records: A methodological review</article-title>. <source>Healthcare Inf Res</source>. (<year>2023</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.4258/hir.2023.29.4.286</pub-id>, PMID: <pub-id pub-id-type="pmid">37964451</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<label>28</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>J</given-names></name>
<name><surname>Lu</surname> <given-names>Y</given-names></name>
<name><surname>Lin</surname> <given-names>H</given-names></name>
<name><surname>Lou</surname> <given-names>J</given-names></name>
<name><surname>Jia</surname> <given-names>W</given-names></name>
<name><surname>Dai</surname> <given-names>D</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Learning in-context learning for named entity recognition</article-title>, in: <conf-name>Annual Meeting of the Association for Computational Linguistics</conf-name>, Available online at: <uri xlink:href="https://arxiv.org/abs/2305.11038">https://arxiv.org/abs/2305.11038</uri>.
</mixed-citation>
</ref>
<ref id="B29">
<label>29</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qu</surname> <given-names>X</given-names></name>
<name><surname>Gu</surname> <given-names>Y</given-names></name>
<name><surname>Xia</surname> <given-names>Q</given-names></name>
<name><surname>Li</surname> <given-names>Z</given-names></name>
<name><surname>Wang</surname> <given-names>Z</given-names></name>
<name><surname>Huai</surname> <given-names>B</given-names></name>
</person-group>. 
<article-title>A survey on Arabic named entity recognition: Past, recent advances, and future trends</article-title>. <source>IEEE Trans Knowledge Data Eng</source>. (<year>2023</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TKDE.2023.3303136</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<label>30</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jarrar</surname> <given-names>M</given-names></name>
<name><surname>Abdul-Mageed</surname> <given-names>M</given-names></name>
<name><surname>Khalilia</surname> <given-names>M</given-names></name>
<name><surname>Talafha</surname> <given-names>B</given-names></name>
<name><surname>Elmadany</surname> <given-names>A</given-names></name>
<name><surname>Hamad</surname> <given-names>N</given-names></name>
<etal/>
</person-group>. 
<article-title>WojoodNER 2023: The first Arabic named entity recognition shared task</article-title>. (<year>2023</year>). ARABICNLP. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2023.arabicnlp-1</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<label>31</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Darji</surname> <given-names>H</given-names></name>
<name><surname>Mitrovi&#x107;</surname> <given-names>J</given-names></name>
<name><surname>Granitzer</surname> <given-names>M</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>German BERT model for legal named entity recognition</article-title>, in: <conf-name>International Conference on Agents and Artificial Intelligence</conf-name>, Available online at: <uri xlink:href="https://arxiv.org/abs/2303.05388">https://arxiv.org/abs/2303.05388</uri>.
</mixed-citation>
</ref>
<ref id="B32">
<label>32</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Varad&#xe9;</surname> <given-names>J</given-names></name>
<name><surname>Magad&#xe1;n</surname> <given-names>S</given-names></name>
<name><surname>Gonz&#xe1;lez-Fern&#xe1;ndez</surname> <given-names>&#xc1;</given-names></name>
</person-group>. 
<article-title>Human immunology and immunotherapy: main achievements and challenges</article-title>. <source>Cell Mol Immunol</source>. (<year>2021</year>) <volume>18</volume>:<page-range>805&#x2013;28</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41423-020-00530-6</pub-id>, PMID: <pub-id pub-id-type="pmid">32879472</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<label>33</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cui</surname> <given-names>L</given-names></name>
<name><surname>Wu</surname> <given-names>Y</given-names></name>
<name><surname>Liu</surname> <given-names>J</given-names></name>
<name><surname>Yang</surname> <given-names>S</given-names></name>
<name><surname>Zhang</surname> <given-names>Y</given-names></name>
</person-group>. 
<article-title>Template-based named entity recognition using BART</article-title>. <source>Findings</source>. (<year>2021</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2021.findings-acl</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<label>34</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dogan</surname> <given-names>RI</given-names></name>
<name><surname>Leaman</surname> <given-names>R</given-names></name>
<name><surname>Lu</surname> <given-names>Z</given-names></name>
</person-group>. 
<article-title>NCBI disease corpus: a resource for disease name recognition and normalization</article-title>. <source>J Biomed Inf</source>. (<year>2014</year>). Available online at: <uri xlink:href="https://www.sciencedirect.com/science/article/pii/S1532046413001974">https://www.sciencedirect.com/science/article/pii/S1532046413001974</uri>., PMID: <pub-id pub-id-type="pmid">24393765</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<label>35</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Dehghani</surname> <given-names>M</given-names></name>
<name><surname>Bokharaeian</surname> <given-names>B</given-names></name>
<name><surname>Yazdanparast</surname> <given-names>Z</given-names></name>
</person-group>. 
<article-title>BioBERT-based SNP-trait associations extraction from biomedical literature</article-title>. In: <source>ICCKE 2023</source> (<year>2023</year>). Available online at: <uri xlink:href="https://ieeexplore.ieee.org/abstract/document/10326231/">https://ieeexplore.ieee.org/abstract/document/10326231/</uri>.
</mixed-citation>
</ref>
<ref id="B36">
<label>36</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dholakia</surname> <given-names>D</given-names></name>
<name><surname>Kalra</surname> <given-names>A</given-names></name>
<name><surname>Misir</surname> <given-names>BB</given-names></name>
<name><surname>Kanga</surname> <given-names>U</given-names></name>
<name><surname>Mukerj</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>HLA-spread: a natural language processing based resource for curating HLA association from pubmed abstracts</article-title>. <source>BMC Genomics</source>. (<year>2022</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1101/2021.01.05.425409</pub-id>, PMID: <pub-id pub-id-type="pmid">34991484</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/754493">Simon Mitchell</ext-link>, Brighton and Sussex Medical School, United Kingdom</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/907025">Hoang Duc Nguyen</ext-link>, Ho Chi Minh City University of Science, Vietnam</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2082234">Hind Alamro</ext-link>, King Abdullah University of Science and Technology, Saudi Arabia</p></fn>
</fn-group>
</back>
</article>