<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2025.1668642</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Soft prompt-tuning for plant pest and disease classification from colloquial descriptions</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Xinlu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3139167/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Xinbing</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Yi</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2540459/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Engineering Design and Research Institute Co., Ltd, Yangzhou University</institution>, <addr-line>Yangzhou</addr-line>,&#xa0;<country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>School of Electrical, Energy and Power Engineering, Yangzhou University</institution>, <addr-line>Yangzhou</addr-line>,&#xa0;<country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>School of Information Engineering, Yangzhou University</institution>, <addr-line>Yangzhou</addr-line>,&#xa0;<country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2163165/overview">Fan Yang</ext-link>, Jiangsu Normal University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3163308/overview">Hongming Zhang</ext-link>, Northwest A and F University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3163685/overview">Xuchao Guo</ext-link>, Shandong Agricultural University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3164936/overview">Junqi Ding</ext-link>, Zhejiang Police Vocational Academy, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3165179/overview">Qi Wang</ext-link>, Guizhou University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Xinbing Li, <email xlink:href="mailto:xbli@yzu.edu.cn">xbli@yzu.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>29</day>
<month>09</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1668642</elocation-id>
<history>
<date date-type="received">
<day>18</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>09</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Liu, Li and Zhu.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Liu, Li and Zhu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The precise identification of plant pests and diseases plays a crucial role in preserving crop health and optimizing agricultural productivity. In practice, however, farmers frequently report symptoms in informal, everyday language. Traditional intelligent farming assistants are built upon domain-specific classification frameworks that depend on formal terminologies and structured symptom inputs, leading to subpar performance when faced with natural, unstructured farmer descriptions. To address this issue, we propose an innovative approach that classifies plant pests and diseases from colloquial symptom reports by leveraging soft prompt-tuning. Initially, we utilize Pretrained Language Models (PLMs) to conduct named entity recognition and retrieve domain-specific knowledge to enrich the input. Notably, this knowledge enrichment process introduces a kind of semantic alignment between the colloquial input and the acquired knowledge, enabling the model to better align informal expressions with formal agricultural concepts. Next, we apply a soft prompt-tuning strategy coupled with an external knowledge enhanced verbalizer for the classification task. The experimental results demonstrate that the proposed method outperforms baseline approaches, including state-of-the-art(SOTA) large language models (LLMs), in classifying plant pests and diseases from informal farmer descriptions. These results highlight the potential of prompt-tuning methods in bridging the gap between informal descriptions and expert knowledge, offering practical implications for the development of more accessible and intelligent agricultural support systems.</p>
</abstract>
<kwd-group>
<kwd>plant pests and diseases classification</kwd>
<kwd>colloquial descriptions</kwd>
<kwd>soft prompt-tuning</kwd>
<kwd>verbalizer</kwd>
<kwd>natural language processing</kwd>
</kwd-group>
<counts>
<fig-count count="4"/>
<table-count count="9"/>
<equation-count count="17"/>
<ref-count count="46"/>
<page-count count="18"/>
<word-count count="10373"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Plant pests and diseases are among the most pressing challenges in modern agriculture, threatening crop health, reducing yields, and causing substantial economic losses worldwide (<xref ref-type="bibr" rid="B5">Donatelli et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B19">Liu and Wang, 2021</xref>). Effective diagnosis and timely intervention are essential to mitigate these threats, particularly in rural and smallholder farming communities where expert support is often limited (<xref ref-type="bibr" rid="B21">Nayagam et&#xa0;al., 2023</xref>).</p>
<p>In these real-world agricultural settings, farmers typically report plant symptoms based on their direct observations and personal experiences rather than using standardized scientific terminology (<xref ref-type="bibr" rid="B24">Rodriguez-Garcia et&#xa0;al., 2021</xref>). These descriptions are highly colloquial, reflecting local linguistic habits and intuitive interpretations of visible symptoms. For instance, a farmer might describe an infection as &#x201c;The leaves are yellow and have red dots, with spider-web-like threads on the back&#x201d;, whereas a technical expert would label the condition as &#x201c;yellowing with red spider mite infestation.&#x201d; Similarly, the phrase &#x201c;The rice has grown white fuzz&#x201d; might correspond to &#x201c;powdery mildew&#x201d; in agronomic terms. Such linguistic mismatches create a substantial barrier between user-reported information and formal agricultural knowledge systems.</p>
<p>Despite the rise of intelligent agricultural assistants powered by natural language processing and image classification technologies, most of these systems are designed around structured, expert-level inputs and rely heavily on terminological consistency (<xref ref-type="bibr" rid="B32">Toscano-Miranda et&#xa0;al., 2022</xref>). Current approaches typically require users to select symptoms from predefined categories or input disease names and signs that align closely with entries in agricultural knowledge bases (<xref ref-type="bibr" rid="B34">Wang et&#xa0;al., 2024a</xref>). While this design performs adequately in controlled environments or when operated by trained personnel, it fails to accommodate the informal, diverse, and highly variable language used by farmers in natural dialogue (<xref ref-type="bibr" rid="B15">Li and Wang, 2024</xref>). As a result, these systems often misclassify or fail to recognize pests and diseases when presented with unstructured, colloquial input. Moreover, the colloquial descriptions are often very short and ambiguous, which exacerbates the challenge of accurate classification. Some recent studies in short text classification have attempted to tackle similar issues of data sparsity and semantic ambiguity by using character-level attention mechanisms combined with feature selection (<xref ref-type="bibr" rid="B42">Zhu et&#xa0;al., 2020</xref>), or by leveraging prompt-learning with external knowledge expansion (<xref ref-type="bibr" rid="B44">Zhu et&#xa0;al., 2024</xref>). However, these methods generally do not explicitly integrate agricultural domain knowledge nor address the unique linguistic patterns of farmer-reported symptoms, limiting their applicability in this context.</p>
<p>To bridge this gap between colloquial farmer descriptions and formal pest and disease classification, in this paper, we introduce an innovative approach for plant pest and disease classification based on colloquial descriptions by leveraging soft prompt-tuning. Unlike conventional fine-tuning methods that require extensive re-training of model parameters on domain-specific datasets, soft prompt-tuning introduces lightweight, continuous prompt vectors that guide the model&#x2019;s attention toward relevant linguistic patterns without modifying the core model architecture. Specifically, our method first leverages the AgriBERT based on PLMs for named entity recognition to extract key agricultural entities from the obfuscated text, and the agricultural knowledge graph is introduced to query domain-specific knowledge related to the entities. Then, the user-provided fuzzy description and the retrieved knowledge are concatenated to the soft prompt-tuning model. The external verbalizer further enriches the model&#x2019;s understanding by mapping informal expressions to corresponding technical terms using structured agricultural knowledge, allowing the model to interpret and classify colloquial symptom descriptions more accurately. By leveraging the generalization capabilities of PLMs and integrating domain knowledge through the constructed verbalizer, our method effectively aligns natural language descriptions with standardized pest and disease categories. Comprehensive experiments conducted on two datasets demonstrate that our method outperforms the SOTA baselines including LLMs. In summary, the primary contributions of our work are outlined below:</p>
<list list-type="order">
<list-item>
<p>We identify and address a critical gap in plant pest and disease classification by focusing on the challenge of interpreting colloquial, non-standard symptom descriptions provided in real-world scenarios, which are often overlooked by existing intelligent agricultural systems designed around formal terminology.</p>
</list-item>
<list-item>
<p>We introduce an innovative classification approach based on soft prompt-tuning, enhanced with an external knowledge extension verbalizer, which effectively bridges informal linguistic input and domain-specific agricultural knowledge without requiring extra fine-tuning.</p>
</list-item>
<list-item>
<p>We construct and evaluate our method on two datasets of real-world, demonstrating superior classification accuracy and robustness compared to the SOTA baselines including LLMs, thus highlighting the practical potential of our method for improving intelligent agricultural diagnostics in real-world scenarios.</p>
</list-item>
</list>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec id="s2_1">
<label>2.1</label>
<title>Plant pests and diseases classification</title>
<p>Plant diseases and pests are significant factors determining both the yield and quality of crops, which can be addressed by means of artificial intelligence (<xref ref-type="bibr" rid="B30">Spence et&#xa0;al., 2020</xref>). These diseases and pests represent a form of natural disasters that disrupt the healthy growth of plants, potentially leading to plant mortality throughout the entire development stage, from seed formation to seedling growth (<xref ref-type="bibr" rid="B19">Liu and Wang, 2021</xref>).</p>
<p>Traditional approaches to plant pest and disease classification have predominantly relied on manual inspections and specialized knowledge, which are labor-intensive, time-consuming, and prone to human mistakes and biases (<xref ref-type="bibr" rid="B38">Xing and Lee, 2022</xref>). With the rise of machine learning and computer vision, automated image-based classification methods have gained widespread attention for their potential to improve efficiency and accuracy (<xref ref-type="bibr" rid="B4">Domingues et&#xa0;al., 2022</xref>). For example, Shoaib et&#xa0;al. proposed advanced deep learning models for plant disease detection, highlighting the effectiveness of Convolutional Neural 85 Networks (CNNs) in learning hierarchical features from images (<xref ref-type="bibr" rid="B28">Shoaib et&#xa0;al., 2023</xref>). Some classical 86 architectures such as AlexNet, VGGNet, ResNet, and Inception have been employed to classify diseases in various crops, including tomato, rice, maize, and citrus (<xref ref-type="bibr" rid="B31">Sumaya et&#xa0;al., 2024</xref>). For instance, Yueteng et&#xa0;al. demonstrated that an improved ResNet architecture enhances recognition accuracy in complex plant disease datasets (<xref ref-type="bibr" rid="B40">Yueteng et&#xa0;al., 2021</xref>). Furthermore, traditional machine learning models like Support Vector Machines (SVM), k-Nearest Neighbors (k-NN), and Random Forests have been deployed, often in conjunction with manually extracted features such as color, texture, and shape descriptors. For example, Kale et&#xa0;al. analyzed crop disease detection using these classifiers and found that while effective under certain conditions, they often struggle with generalizability across diverse environmental conditions and are limited when dealing with visually similar symptoms among different diseases (<xref ref-type="bibr" rid="B12">Kale and Shitole, 2021</xref>). However, these methods often struggle with generalizability across diverse environmental conditions and are limited when dealing with visually similar symptoms among different diseases.</p>
<p>Recently, to overcome the limitations of single-modal approaches, there have already been some efforts on exploring multi-modal learning frameworks for plant pest and disease classification, which integrate heterogeneous data sources, such as images, textual descriptions, sensor data, and environmental metadata (<xref ref-type="bibr" rid="B39">Yang et&#xa0;al., 2021</xref>). This paradigm aims to enhance the robustness and contextual awareness of classification systems (<xref ref-type="bibr" rid="B17">Liu et&#xa0;al., 2025a</xref>). For example, Wei et&#xa0;al. proposed a multi-modal transformer architecture for citrus pests and diseases classification, where both image and text features are encoded and aligned through a cross-attention mechanism, enabling improved retrieval and identification performance (<xref ref-type="bibr" rid="B36">Wei et&#xa0;al., 2023</xref>). Similarly, Duan et&#xa0;al. introduced a multimodal system combining RGB images, text data, and environmental cues to facilitate pest detection and classification, demonstrating superior performance over image-only models, especially in complex agricultural scenarios (<xref ref-type="bibr" rid="B7">Duan et&#xa0;al., 2023</xref>). Wang et&#xa0;al. proposed Agri-LLaVA, an advanced multimodal assistant enriched with domain knowledge, designed specifically for managing 108 agricultural pests and diseases. Agri-LLaVA is trained on an extensive multimodal dataset, containing more than 221 varieties of pests and diseases, amounting to roughly 400,000 data samples. By integrating domain-specific knowledge into its training process, Agri-LLaVA demonstrates superior performance in both multimodal agricultural dialogue and visual comprehension, offering innovative solutions to tackle pest and disease challenges in agriculture (<xref ref-type="bibr" rid="B35">Wang et&#xa0;al., 2024b</xref>). These approaches leverage the complementarity of modalities, while images provide morphological cues, textual and contextual data supply semantic and environmental understanding, which proves to be useful for fine-grained and field-based classification tasks.</p>
<p>Recently, to overcome the limitations of single-modal approaches, there have already been some efforts on exploring multi-modal learning frameworks for plant pest and disease classification, which integrate heterogeneous data sources, such as images, textual descriptions, sensor data, and environmental metadata (<xref ref-type="bibr" rid="B39">Yang et&#xa0;al., 2021</xref>). This paradigm aims to enhance the robustness and contextual awareness of classification systems (<xref ref-type="bibr" rid="B17">Liu et&#xa0;al., 2025a</xref>). For example, Wei et&#xa0;al. proposed a multi-modal transformer architecture for citrus pests and diseases classification, where both image and text features are encoded and aligned through a cross-attention mechanism, enabling improved retrieval and identification performance (<xref ref-type="bibr" rid="B36">Wei et&#xa0;al., 2023</xref>). Similarly, Duan et&#xa0;al. introduced a multimodal system combining RGB images, text data, and environmental cues to facilitate pest detection and classification, demonstrating superior performance over image-only models, especially in complex agricultural scenarios (<xref ref-type="bibr" rid="B7">Duan et&#xa0;al., 2023</xref>). Wang et&#xa0;al. proposed Agri-LLaVA, an advanced multimodal assistant enriched with domain knowledge, designed specifically for managing 127 agricultural pests and diseases. Agri-LLaVA is trained on an extensive multimodal dataset, containing more than 221 varieties of pests and diseases, amounting to roughly 400,000 data samples. By integrating domain-specific knowledge into its training process, Agri-LLaVA demonstrates superior performance in both multimodal agricultural dialogue and visual comprehension, offering innovative solutions to tackle pest and disease challenges in agriculture (<xref ref-type="bibr" rid="B35">Wang et&#xa0;al., 2024b</xref>). In addition, Zhao et&#xa0;al. introduced PlanText, a gradually masked guidance framework to align image phenotypes with trait descriptions for plant disease texts, further highlighting the potential of integrating visual and textual modalities in plant health analysis (<xref ref-type="bibr" rid="B41">Zhao et&#xa0;al., 2024</xref>). Meanwhile, Dong et&#xa0;al. developed PlantPAD, a large-scale image phenomics platform for plant science, which provides high-quality resources for training and validating plant disease classification systems (<xref ref-type="bibr" rid="B6">Dong et&#xa0;al., 2024</xref>). These approaches leverage the complementarity of modalities, while images provide morphological cues, textual and contextual data supply semantic and environmental understanding, which proves to be useful for fine-grained and field-based classification tasks.</p>
<p>Although the above-mentioned multi-modal approaches have shown promise in plant pests and diseases classification, most existing methods primarily treat non-visual modalities as auxiliary inputs to enhance image-based features. This image-centric design often overlooks the independent value and discriminative power of other modalities, particularly textual data. In real-world agricultural scenarios, textual descriptions are typically colloquial, non-standard, and context-dependent, posing significant challenges to conventional multi-modal fusion strategies. While some studies have explored robust textual encoding techniques to handle noisy or weakly structured inputs (<xref ref-type="bibr" rid="B45">Zhu et&#xa0;al., 2023</xref>), these characteristics nevertheless result in a persistent semantic gap that current models struggle to bridge, thereby limiting their robustness and generalizability. These characteristics lead to a semantic gap that current models struggle to bridge, limiting their robustness and generalizability. To address these limitations, in this paper, we propose a novel approach to improve the model&#x2019;s capacity to understand and utilize natural language expressions effectively for more accurate and practical plant disease classification.</p>
<p>While multimodal and text-based approaches have achieved progress, existing models still face significant challenges when processing colloquial, non-standard user inputs. Farmers&#x2019; symptom descriptions are often short, vague, and expressed in everyday language, which are inconsistent with the professional terminologies used in agricultural knowledge bases. For example, models trained on standardized datasets struggle to align colloquial expressions with technical disease terms, leading to misclassification or failure to recognize symptoms. Moreover, the semantic ambiguity and variability of colloquial text introduce additional noise, weakening the model&#x2019;s ability to capture fine-grained distinctions across different disease categories. These limitations further underscore the necessity of developing methods that can effectively bridge colloquial language with domain-specific knowledge, which is precisely the problem our study seeks to address.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Prompt-tuning</title>
<p>Prompt-tuning has surfaced as an efficient and effective method for adjusting Pre-trained Language Models (PLMs) to downstream tasks without requiring full model fine-tuning (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2021</xref>). This paradigm transferred downstream tasks through cloze-style objectives, which is particularly attractive in resource-constrained settings due to its efficiency and ability to preserve general language knowledge encoded in PLMs. The evolution of prompt-tuning includes both discrete and soft prompt methods. Early work in manual prompt design relied on human intuition to craft natural language prompts that could guide PLMs toward the desired behavior, including relation extraction (<xref ref-type="bibr" rid="B9">Han et&#xa0;al., 2021</xref>), knowledge probing (<xref ref-type="bibr" rid="B23">Petroni et&#xa0;al., 2019</xref>), and text classification (<xref ref-type="bibr" rid="B10">Hu et&#xa0;al., 2021</xref>). For example, Han et&#xa0;al. introduced a prompt-tuning model with rules for many-class classification tasks, encoding prior knowledge into prompt-tuning via logic rules and proposing manually designed sub-prompts to construct task-specific prompts (<xref ref-type="bibr" rid="B9">Han et&#xa0;al., 2021</xref>).</p>
<p>However, the manually created prompt proved to be inflexible and suboptimal, leading to the development of automated prompt generation strategies (<xref ref-type="bibr" rid="B14">Li and Liang, 2021</xref>). In the soft prompt-tuning, continuous embeddings are served as prompts and optimized while keeping the PLM&#x2019;s weights frozen. For instance, Shin et&#xa0;al. developed the AUTOPROMPT method for generating prompts across various NLP downstream tasks (<xref ref-type="bibr" rid="B27">Shin et&#xa0;al., 2020</xref>). In the method, an auto-prompt consisted of the input sentence and the set of trigger tokens. These tokens remain consistent across all inputs and are determined via a gradient-based search mechanism. Wu et&#xa0;al. proposed an information-theoretic approach that framed soft prompt-tuning as optimizing the mutual information between the prompts and other model parameters (<xref ref-type="bibr" rid="B37">Wu et&#xa0;al., 2023</xref>). The technique involved two loss functions to achieve proper prompt initialization and extract relevant task-specific information from downstream tasks. Zhu et&#xa0;al. proposed a soft prompt-tuning method for short text stream classification (<xref ref-type="bibr" rid="B43">Zhu et&#xa0;al., 2025</xref>), which builds the verbalizer using internal knowledge rather than retrieving from external knowledge bases, further optimizing it through additional tailored strategies. Considering the advantages of soft prompt in contrast to manually crafted prompts, in this paper, we introduce the soft prompt-tuning method for colloquial descriptions in plant pest and disease classification.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Methodology</title>
<sec id="s3_1">
<label>3.1</label>
<title>Overall architecture</title>
<p>As shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, the proposed method first leverages the AgriBERT (<xref ref-type="bibr" rid="B1">Chen et&#xa0;al., 2024</xref>) for named entity recognition model to extract key agricultural entities from the obfuscated text, with relevant attribute information retrieved from the AgriKG (<xref ref-type="bibr" rid="B2">Chen et&#xa0;al., 2019</xref>) agricultural knowledge graph to effectively supplement domain knowledge. Then, the user-provided fuzzy description and the retrieved knowledge are concatenated to the soft prompt-tuning model. For the prompt-tuning method, the external verbalizer further enriches the model&#x2019;s understanding by mapping informal expressions to corresponding technical terms using structured agricultural knowledge, allowing the model to interpret and classify colloquial symptom descriptions more accurately. By leveraging the generalization capabilities of PLMs and integrating domain knowledge through the constructed verbalizer, our approach notably enhanced both the precision and interpretability of pest and disease predictions.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Illustration of the proposed method combining AgriBERT-based entity recognition and AgriKG retrieval, enhanced by prompt learning with soft templates and extended verbalizers for improved pest and disease prediction.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1668642-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a text classification model for identifying species like the Red Spider Mite. It incorporates components such as a knowledge graph, PLM Embedding Layer, PLM Transformer, and Verbalizer. Inputs go through a knowledge expansion phase via AgriKG and ArgiBERT, leading to classification. The output is matched to descriptions like &#x201c;The leaves are yellow and have red dots, with spider-web-like threads on the back."</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Knowledge enhancement</title>
<p>First, we utilize the AgriBERT named entity recognition model, specifically trained for agricultural texts, to extract relevant entities from the input fuzzy agricultural text. This model captures contextual information through a multi-layer bidirectional self-attention mechanism and incorporates a global pointer mechanism for entity localization.</p>
<p>For the input fuzzy text <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <italic>t<sub>i</sub>
</italic> represents the <italic>i</italic>-th word in the text, the AgriBERT model outputs a set of entity labels <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where each entity <italic>e<sub>i</sub>
</italic>includes the entity type and its position within the text. However, this positional information is still relatively coarse and cannot guarantee precise boundary detection. We use the global pointer mechanism <italic>P</italic>(<inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) to represent the specific position of entity <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in the text, as described by the following formula (<xref ref-type="disp-formula" rid="eq1">Equation 1</xref>):</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>start<sub>i</sub>
</italic> and <italic>end<sub>i</sub>
</italic> represent the start and end positions of entity <italic>e<sub>i</sub>
</italic>, respectively. Thus, <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> provides the semantic label, while <italic>P</italic>(<inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) precisely anchors the boundary, thereby enhancing the model&#x2019;s robustness in handling overlapping or ambiguous entities.</p>
<p>After extracting the entities, we leverage AgriKG, a publicly available agricultural knowledge graph, to query domain-specific knowledge related to the entities. By querying AgriKG, we obtain the corresponding relevant knowledge <italic>K<sub>i</sub>
</italic>for each entity <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, which contains various attributes related to the entity. Let <italic>K<sub>i</sub>
</italic> represent the set of knowledge fragments obtained from AgriKG. We organize these knowledge fragments as follows (<xref ref-type="disp-formula" rid="eq2">Equation 2</xref>):</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where each knowledge fragment <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> provides specific information relevant to entity <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and contains domain related knowledge in agriculture.</p>
<p>Next, we concatenate the user-provided fuzzy description <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>t</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and the relevant knowledge fragments <italic>K<sub>i</sub> </italic>retrieved from AgriKG. The fuzzy text <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the words in the non-expert language provided by the user. We concatenate the user&#x2019;s description <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> with the knowledge fragments from AgriKG in the following format (<xref ref-type="disp-formula" rid="eq3">Equation 3</xref>):</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mtext>Enhanced&#xa0;Description</mml:mtext>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>z</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mtext>SEP</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where [SEP] is a separator used to distinguish the original description from the knowledge fragments. The concatenated text contains both the user&#x2019;s non-expert description and the supplemental domain knowledge, thereby enhancing the professionalism and completeness of the text.</p>
<p>To further illustrate this process, we provide two running examples in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. Each input is first parsed by AgriBERT-NER to extract entities, then enriched with compact knowledge snippets from AgriKG, and finally concatenated into the enhanced description. As shown in the table, the pipeline effectively aligns colloquial farmer expressions with formal agronomic terminology, leading to accurate classification results.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Running examples illustrating how the proposed framework processes colloquial farmer descriptions.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Colloquial Input</th>
<th valign="middle" align="left">NER (Entities)</th>
<th valign="middle" align="left">KG Snippets</th>
<th valign="middle" align="left">Enhanced Description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">The whole field can look like it&#x2019;s been burned from far away because so many leaves have turned brown and died.</td>
<td valign="middle" align="left">field, burned, leaves, brown, died</td>
<td valign="middle" align="left">leaf necrosis, burnt appearance, rice blast lesion</td>
<td valign="middle" align="left">Colloquial input followed by [SEP] and knowledge snippets:<break/>&#x201c;leaf necrosis; burnt appearance; rice blast lesion&#x201d;</td>
</tr>
<tr>
<td valign="middle" align="left">The spots on the leaves are a dry, tan, or light brown color and are always surrounded by a bright yellow ring.</td>
<td valign="middle" align="left">spots, leaves, dry, tan/light brown, yellow ring</td>
<td valign="middle" align="left">leaf spot symptom, necrotic lesion, halo chlorosis</td>
<td valign="middle" align="left">Colloquial input followed by [SEP] and knowledge snippets:<break/>&#x201c;leaf spot symptom; necrotic lesion; halo chlorosis&#x201d;</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Each input is first parsed by AgriBERT-NER to extract entities, then enriched with knowledge snippets from the agricultural KG, and finally transformed into an enhanced description.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Soft template construction</title>
<p>We adopt AgriBERT as the backbone PLM. This model is specifically trained on agricultural text tasks, enabling strong capabilities in agricultural terminology recognition and semantic representation. Essentially, AgriBERT follows the BERT architecture, consisting of 12 Transformer encoder layers, each with a 768-dimensional hidden representation and 12 self-attention heads.</p>
<p>In contrast to prompt-tuning methods relying on manually crafted templates, our method utilizes soft templates learned within a continuously optimized prompt space. When integrated with the enhanced description <inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> described earlier, this approach enables more adaptive text recognition by the model, and can be formulated as (<xref ref-type="disp-formula" rid="eq4">Equation 4</xref>):</p>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>x<sub>en</sub>
</italic> represents the enhanced description obtained by concatenating the fuzzy text with the knowledge fragments introduced in Section 3.2, <inline-formula>
<mml:math display="inline" id="im14">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> learnable token, the prompt <inline-formula>
<mml:math display="inline" id="im16">
<mml:mi>T</mml:mi>
</mml:math>
</inline-formula> is subsequently passed through the encoder of aPLM to generate hidden states <inline-formula>
<mml:math display="inline" id="im17">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mtext>MASK</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Accordingly, the soft prompt is formulated as (<xref ref-type="disp-formula" rid="eq5">Equation 5</xref>):</p>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
<mml:mo>,</mml:mo>
<mml:mo stretchy="false">[</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mtext>mask</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>To further enhance the model&#x2019;s ability to capture temporal and contextual information in non-standard expressions, we integrate a two-layer bidirectional LSTM encoder head into the prompt-learning framework. The input size, hidden size, and embedding size are all set to 768 to maintain consistency between forward and backward information flows. During inter-layer representation transfer, each LSTM layer employs the standard hidden-state propagation mechanism to preserve information progression. The final output is obtained by concatenating the hidden states from both directions, which is then fed into the classifier for prediction. This process can be visualized as (<xref ref-type="disp-formula" rid="eq6">Equation 6</xref>):</p>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:msubsup>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo>'</mml:mo>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="true">&#x2190;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mtext>LSTM</mml:mtext>
</mml:mrow>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mover accent="true">
<mml:mrow>
<mml:mtext>LSTM</mml:mtext>
</mml:mrow>
<mml:mo stretchy="true">&#x2190;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="true">&#x2190;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>h<sub>i</sub>
</italic> denotes the hidden state input at the <italic>i</italic>-th position of the input sequence, derived from the encoder of AgriBERT. Specifically, it represents the contextualized embedding obtained by feeding the soft prompt together with the textual input into the Transformer architecture. The final fused representation <inline-formula>
<mml:math display="inline" id="im18">
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>'</mml:mo>
</mml:msubsup>
</mml:math>
</inline-formula> is formed by concatenating the hidden states from the forward and backward directions of the BiLSTM, thereby capturing bidirectional contextual information.</p>
<p>Ultimately, the model improves its performance and output quality by determining the optimal values of variables via the loss function, as illustrated in (<xref ref-type="disp-formula" rid="eq7">Equation 7</xref>).</p>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>=</mml:mo>
<mml:mtext>arg&#xa0;</mml:mtext>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>'</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>To adjust the model parameters, we adopt the cross-entropy loss, which quantifies the divergence between predicted outputs and ground-truth labels. The objective function is formulated as (<xref ref-type="disp-formula" rid="eq8">Equation 8</xref>):</p>
<disp-formula id="eq8">
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:mi>&#x2112;</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:mtext>log&#xa0;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>y</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msup>
<mml:mo>|</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>|</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mo>|</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where N denotes the number of training instances, <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:msup>
<mml:mi>y</mml:mi>
<mml:mo>*</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the gold label, and <inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:msup>
<mml:mo>|</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the L2 penalty on parameters <inline-formula>
<mml:math display="inline" id="im21">
<mml:mi>&#x3b8;</mml:mi>
</mml:math>
</inline-formula>. The penalty term helps alleviate overfitting by restricting parameter magnitudes, and the coefficient <italic>&#x3b1;</italic> balances the impact of regularization in the overall loss.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Verbalizer construction</title>
<p>Prompt-tuning studies (<xref ref-type="bibr" rid="B26">Schick and Schutze, 2020</xref>) have shown that aligning label words with their target categories <italic>y</italic> helps reduce the mismatch between textual input and label representation. This process, referred to as automatic label word selection (<xref ref-type="bibr" rid="B8">Gao et&#xa0;al., 2020</xref>) or verbalization (<xref ref-type="bibr" rid="B25">Schick et&#xa0;al., 2020</xref>), can be formally expressed as (<xref ref-type="disp-formula" rid="eq9">Equation 9</xref>):</p>
<disp-formula id="eq9">
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>|</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
<mml:mo>}</mml:mo>
<mml:mover>
<mml:mo>&#x2192;</mml:mo>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:mover>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represented a word in the verbalizer. In our method, we build the verbalizer by leveraging words extracted from an external knowledge graph. This method expands semantic diversity while promoting greater robustness and generalizability of the verbalizer.</p>
<p>To retrieve mapping words associated with the target categories from a knowledge graph, we employ Related Words<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref> as our external source. This knowledge graph aggregates multiple resources, such as word vectors, ConceptNet (<xref ref-type="bibr" rid="B29">Speer et&#xa0;al., 2017</xref>), and WordNet (<xref ref-type="bibr" rid="B22">Pedersen et&#xa0;al., 2004</xref>), allowing us to extract an initial set of words <inline-formula>
<mml:math display="inline" id="im23">
<mml:mi>v</mml:mi>
</mml:math>
</inline-formula> for each category label <italic>y</italic>, thereby constructing the base verbalizer. Given the vast amount of text and the possibility of noise or irrelevant content, we implement three optimization strategies to refine the extracted words. These strategies are designed to capture various facets of the expanded word characteristics, aiming to uncover the underlying intent of the original text. The specific methods are outlined below:</p>
<p>FastText Similarity: A commonly employed method for improving verbalizer construction consists of evaluating the semantic similarity between category labels and their extended label terms. This approach uses the FastText embedding model to generate vector representations and calculate the cosine similarity between category label terms and their expanded equivalents. Let <inline-formula>
<mml:math display="inline" id="im24">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im25">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote the embeddings of a category label <inline-formula>
<mml:math display="inline" id="im26">
<mml:mi>y</mml:mi>
</mml:math>
</inline-formula> and an extended label term <inline-formula>
<mml:math display="inline" id="im27">
<mml:mi>v</mml:mi>
</mml:math>
</inline-formula>, respectively. The cosine similarity is expressed as (<xref ref-type="disp-formula" rid="eq10">Equation 10</xref>):</p>
<disp-formula id="eq10">
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:mtext>cos&#xa0;</mml:mtext>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>g</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:msubsup>
<mml:mi>E</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:msubsup>
<mml:mi>E</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>g</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mi>E</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:msqrt>
<mml:mo>&#xd7;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>g</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msubsup>
<mml:mi>E</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im28">
<mml:mi>g</mml:mi>
</mml:math>
</inline-formula> refers to the dimension of the word embedding, while <inline-formula>
<mml:math display="inline" id="im29">
<mml:mrow>
<mml:msubsup>
<mml:mi>E</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> indicates the <inline-formula>
<mml:math display="inline" id="im30">
<mml:mrow>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> component of the vector <inline-formula>
<mml:math display="inline" id="im31">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Notably, to ensure relevance, only the top <italic>N</italic> expanded words with the highest cosine similarity to the category label are preserved, while those with low similarity are excluded.</p>
<p>Probability Prediction: Leveraging contextual cues and prompt templates to estimate the likelihood of masked tokens is a crucial technique in refining the verbalizer. This is achieved using a PLM(e.g., BERT), which outputs the probability distribution over potential words filling the [MASK] position.</p>
<p>More concretely, given a prompt template <italic>T</italic>, the model masks certain words in the input and computes <italic>p</italic>(<italic>T</italic>[<italic>MASK</italic>]), the probability distribution over possible replacements. This distribution reflects the strength of association between each candidate word and the target category.</p>
<p>We apply BERT to obtain this distribution and select the top <italic>N</italic> most probable terms to expand the label word set.</p>
<p>Context Information: In order to enrich the label word set and effectively utilize the surrounding context of masked tokens, we propose an expansion strategy based on context windows. Rather than relying on conventional N-gram models, our approach leverages non-autoregressive PLMs like BERT to capture contextual dependencies. Given that BERT cannot directly estimate full-sentence generation probabilities, we address this constraint through the application of a symmetric sliding window approach.</p>
<p>Assuming a window size of <inline-formula>
<mml:math display="inline" id="im32">
<mml:mi>c</mml:mi>
</mml:math>
</inline-formula>, the context centered around the [MASK] token can be represented as (<xref ref-type="disp-formula" rid="eq11">Equation 11</xref>):</p>
<disp-formula id="eq11">
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Within this framework, each word <inline-formula>
<mml:math display="inline" id="im33">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in the window is sequentially masked and input into the BERT model for calculating the loss associated with predicting the masked word (<xref ref-type="disp-formula" rid="eq12">Equation 12</xref>):</p>
<disp-formula id="eq12">
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>log&#xa0;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>|</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im34">
<mml:mi>V</mml:mi>
</mml:math>
</inline-formula> denotes the vocabulary set, <inline-formula>
<mml:math display="inline" id="im35">
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula> is the indicator function, and <inline-formula>
<mml:math display="inline" id="im36">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>|</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represents the predicted probability distribution of BERT conditioned on <inline-formula>
<mml:math display="inline" id="im37">
<mml:mi>W</mml:mi>
</mml:math>
</inline-formula> with <inline-formula>
<mml:math display="inline" id="im38">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> excluded.</p>
<p>In the experiments, label word candidates are sorted according to their sequence loss <italic>L</italic>(<italic>W</italic>), and those with higher loss values are discarded. Only the words with the lowest losses are preserved. A fixed window size of <inline-formula>
<mml:math display="inline" id="im39">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> is used, and for consistency, the <inline-formula>
<mml:math display="inline" id="im40">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>15</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> identified by each of the three strategies are selected to construct the expanded label word set.</p>
<p>The combination of FastText Similarity, Probability Prediction, and Context Information enables a multi311 faceted enhancement of the verbalizer, thereby substantially improving the model&#x2019;s semantic understanding of category labels.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Final detection</title>
<p>Once the external knowledge-based verbalizer has been refined using the three proposed strategies, we compute the prediction score using a weighted average of the label word scores. In particular, the final prediction <inline-formula>
<mml:math display="inline" id="im41">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>^</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> is obtained by aggregating the scores of all candidate categories according to their respective word weights. These weights are calculated based on the contribution of each word, as formulated below (<xref ref-type="disp-formula" rid="eq13">Equation 13</xref>):</p>
<disp-formula id="eq13">
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:mtext>arg&#xa0;</mml:mtext>
<mml:munder>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mtext>MASK</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>v</mml:mi>
<mml:mo>|</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Here, <inline-formula>
<mml:math display="inline" id="im42">
<mml:mi>V</mml:mi>
</mml:math>
</inline-formula> refers to the collection of label words linked to the category <inline-formula>
<mml:math display="inline" id="im43">
<mml:mi>y</mml:mi>
</mml:math>
</inline-formula>, while <inline-formula>
<mml:math display="inline" id="im44">
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> indicates the size of this set. The probability function <inline-formula>
<mml:math display="inline" id="im45">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mtext>MASK</mml:mtext>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mtext>v</mml:mtext>
<mml:mo>|</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mtext>en</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> evaluates how likely the label word <inline-formula>
<mml:math display="inline" id="im46">
<mml:mi>v</mml:mi>
</mml:math>
</inline-formula> is, conditioned on the enhanced description <inline-formula>
<mml:math display="inline" id="im47">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experiments</title>
<sec id="s4_1">
<label>4.1</label>
<title>Data setting</title>
<p>In this study, we use two benchmark English datasets: the PlantWild dataset and the GojiPest dataset.</p>
<p>PlantWild: PlantWild is a large-scale dataset for wild plant disease recognition, covering multiple healthy plant categories and plant disease categories. It contains over 50,000 images, with each plant disease category accompanied by rich textual descriptions. In this study, we primarily use the textual descriptions from this dataset, which provide detailed explanations of the fine-grained features of various plant diseases, helping the model identify subtle differences between them.</p>
<p>GojiPest: GojiPest is a cross-modal image-text dataset focused on goji plant pests and diseases. It supports tasks such as image collection, text creation, data augmentation, classification, and image-text pairing. The dataset includes images and textual descriptions for various common goji pests and diseases. Similar to the PlantWild dataset, we only use the textual descriptions from this dataset in our study, focusing on utilizing the descriptive information for enhancing the understanding and classification of goji plant pests and diseases.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Baseline methods</title>
<p>In order to assess the effectiveness of our approach, we conducted comparisons with SOTA methods.</p>
<p>Stacked Denoising Autoencoders (SDA) (<xref ref-type="bibr" rid="B46">Zhu et&#xa0;al., 2019</xref>): SDA is a conventional unsupervised deep learning model that generates detailed feature representations for both the training and test datasets via an autoencoder. Subsequently, a classifier is trained on labeled data from the training set to carry out classification tasks on the test data.</p>
<p>TextCNN (<xref ref-type="bibr" rid="B13">Kim, 2014</xref>): A deep learning network that incorporates a convolutional layer to extract contextual features from labeled training data. Once trained, the model is applied to execute classification tasks on the test set.</p>
<p>BERT (<xref ref-type="bibr" rid="B3">Devlin et&#xa0;al., 2018</xref>): BERT (Bidirectional Encoder Representations from Transformers) is based on the Transformer framework. It reformulates tasks into cloze-style (fill-in-the-blank) questions, making it a robust baseline approach for a variety of NLP tasks.</p>
<p>AgriBERT (<xref ref-type="bibr" rid="B1">Chen et&#xa0;al., 2024</xref>): AgriBERT is a pre-trained language model specifically designed for agricultural domain texts. It is trained on a large corpus of agricultural literature and technical reports, making it more adept at understanding agricultural terminologies and contexts compared to general-domain PLMs like BERT.</p>
<p>Prompt Learning (PL) (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2021</xref>): This method integrates input data from each chunk into a pre-designed template, using only the category name to build the verbalizer in regular prompt-tuning. For consistency, the templates in PL align with those used in our experiments.</p>
<p>P-tuning (<xref ref-type="bibr" rid="B18">Liu et&#xa0;al., 2022</xref>): P-tuning is a method for soft prompt-tuning that involves learning continuous prompts by embedding trainable variables into the input representations, instead of using hand-crafted templates.</p>
<p>Mistral (<xref ref-type="bibr" rid="B11">Jiang et&#xa0;al., 2023</xref>): Mistral is an emerging large-scale language model created by the Mistral AI team. It is particularly known for its high computational efficiency and robust generative capabilities, outperforming similar models, especially in multimodal tasks.</p>
<p>LLaMA3 (<xref ref-type="bibr" rid="B33">Touvron et&#xa0;al., 2023</xref>): LLaMA3 is an efficient, large-scale language model developed by Meta, designed to function well in low-resource environments. It reduces the number of parameters to minimize computational costs while maintaining solid performance in natural language reasoning and generation tasks.</p>
<p>SimSTC (<xref ref-type="bibr" rid="B16">Liu et&#xa0;al., 2025b</xref>): A straightforward framework for graph contrastive learning applied to short text classification. The method performs graph learning on various component graphs related to text, generating multi-view text embeddings, upon which contrastive learning is directly applied.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Experiment settings</title>
<p>In this experiment, to generate vague descriptions, we constructed an &#x201c;expert-non-expert&#x201d; parallel corpus and fine-tuned a large language model using LoRA technology, enabling it to convert specialized terms into more accessible expressions. Specifically, we first collected approximately 1,200 expert-level symptom descriptions from agricultural manuals and online agricultural knowledge bases. Each description was then paraphrased by GPT-4 into a farmer-style colloquial version, resulting in an expert&#x2013;non-expert pair. Based on this corpus, we fine-tuned LLaMA-8B with LoRA, using a configuration of rank = 8, <italic>&#x3b1;</italic> = 16, learning rate = 2e-4, and 5 training epochs on dual NVIDIA A6000 GPUs. To ensure quality, we manually filtered the generated sentences to remove incomplete or duplicate expressions. Furthermore, three agricultural experts validated a random sample of 300 pairs, achieving an agreement rate above 90%. This process ensured that the constructed parallel corpus is reliable and suitable for subsequent fuzzification.</p>
<p>We then used the fine-tuned model to apply fuzzification to the text in the dataset. Next, we applied our method to enhance the fuzzified text, resulting in the final dataset for subsequent experiments. The dataset was separated into subsets for training, testing, and validation, with 70% of the data assigned to training, 20% designated for testing, and the remaining 10% kept for validation.</p>
<p>For methods based on deep neural networks and fine-tuned pre-trained language models (e.g., SDA, TextCNN, SimSTC, BERT and AgriBERT), the full training dataset was utilized, as these models necessitate large quantities of data for effective learning. Furthermore, we applied the hyperparameters specified in the original papers to maintain consistency and achieve optimal performance. For the prompt-tuning approaches (PL and P-tuning), a 20-shot configuration was implemented for both methods. To ensure fairness, we kept the parameter settings identical across these approaches: dropout rate was set to 0.5, learning rate to 3e-5, batch size to 32, and weight decay to 1e-5. The hyperparameters, including batch size and learning rate, were determined through repeated empirical experiments to achieve optimal performance. The models were trained for 5 epochs to guarantee thorough training and stable outcomes, with the Adam optimizer employed for parameter tuning. For large language models (e.g., LLaMA3 and Mistral), classification was performed directly by formulating prompts in a question-answer format. Unlike traditional models, these systems do not rely on conventional training techniques but instead fine-tune the prompts specifically for classification tasks.</p>
<p>The effectiveness of our methods is evaluated using the following four key metrics: As <xref ref-type="disp-formula" rid="eq14">Equations 14</xref>&#x2013;<xref ref-type="disp-formula" rid="eq17">17</xref> is shown below.</p>
<p>Accuracy (Acc): The proportion of correctly predicted samples compared to the total number of samples.</p>
<disp-formula id="eq14">
<label>(14)</label>
<mml:math display="block" id="M14">
<mml:mrow>
<mml:mtext>Accuracy</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Precision (Pre): The ratio of positive samples among the predicted positive samples.</p>
<disp-formula id="eq15">
<label>(15)</label>
<mml:math display="block" id="M15">
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Recall (Rec): The ratio of correctly predicted positive samples to the total number of actual positive samples.</p>
<disp-formula id="eq16">
<label>(16)</label>
<mml:math display="block" id="M16">
<mml:mrow>
<mml:mtext>Recall</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>F1 Score (F1-S): The harmonic mean of Precision and Recall, used as a comprehensive measure of classification performance. A higher F1 score indicates better overall performance.</p>
<disp-formula id="eq17">
<label>(17)</label>
<mml:math display="block" id="M17">
<mml:mrow>
<mml:mtext>F</mml:mtext>
<mml:mn>1</mml:mn>
<mml:mtext>&#xa0;Score</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#xb7;</mml:mo>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#xb7;</mml:mo>
<mml:mtext>Recall</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>Recall</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>All experiments were conducted on a server equipped with an NVIDIA A100 GPU, a 64-core AMD EPYC 7763 processor, and 512 GB of memory. The experiments were performed using Python 3.9.16 and PyTorch 1.12.0 with CUDA support.</p>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Main results</title>
<p>
<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> presents the performance results of our method and baseline models across two datasets (PlantWild and GojiPest). Based on these experimental results, the following insights have been observed:</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>The experimental results on three datasets using four different evaluation metrics.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Datasets</th>
<th valign="middle" rowspan="2" align="center">Task</th>
<th valign="middle" rowspan="2" align="center">Metrics</th>
<th valign="middle" colspan="10" align="center">Methods</th>
</tr>
<tr>
<th valign="middle" align="center">SDA</th>
<th valign="middle" align="center">TextCNN</th>
<th valign="middle" align="center">BERT</th>
<th valign="middle" align="center">AgriBERT</th>
<th valign="middle" align="center">PL</th>
<th valign="middle" align="center">P-tuning</th>
<th valign="middle" align="center">Mistral</th>
<th valign="middle" align="center">LLaMA3</th>
<th valign="middle" align="center">SimSTC</th>
<th valign="middle" align="center">Ours</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="16" align="center">PlantWild</td>
<td valign="middle" rowspan="4" align="center">Apple</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">66.67</td>
<td valign="middle" align="center">71.15</td>
<td valign="middle" align="center">60.79</td>
<td valign="middle" align="center">82.69</td>
<td valign="middle" align="center">80.25</td>
<td valign="middle" align="center">81.73</td>
<td valign="middle" align="center">81.73</td>
<td valign="middle" align="center">82.69</td>
<td valign="middle" align="center">87.89</td>
<td valign="middle" align="center">
<bold>95.19</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">65.02</td>
<td valign="middle" align="center">59.92</td>
<td valign="middle" align="center">63.17</td>
<td valign="middle" align="center">83.81</td>
<td valign="middle" align="center">79.63</td>
<td valign="middle" align="center">81.25</td>
<td valign="middle" align="center">87.85</td>
<td valign="middle" align="center">82.72</td>
<td valign="middle" align="center">87.65</td>
<td valign="middle" align="center">
<bold>95.97</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">66.67</td>
<td valign="middle" align="center">71.15</td>
<td valign="middle" align="center">60.79</td>
<td valign="middle" align="center">84.81</td>
<td valign="middle" align="center">80.79</td>
<td valign="middle" align="center">81.43</td>
<td valign="middle" align="center">73.97</td>
<td valign="middle" align="center">76.80</td>
<td valign="middle" align="center">87.89</td>
<td valign="middle" align="center">
<bold>95.20</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">65.83</td>
<td valign="middle" align="center">63.16</td>
<td valign="middle" align="center">60.80</td>
<td valign="middle" align="center">81.76</td>
<td valign="middle" align="center">80.04</td>
<td valign="middle" align="center">80.70</td>
<td valign="middle" align="center">80.31</td>
<td valign="middle" align="center">79.65</td>
<td valign="middle" align="center">87.31</td>
<td valign="middle" align="center">
<bold>95.32</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Corn</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">68.91</td>
<td valign="middle" align="center">65.55</td>
<td valign="middle" align="center">55.21</td>
<td valign="middle" align="center">74.79</td>
<td valign="middle" align="center">59.49</td>
<td valign="middle" align="center">61.35</td>
<td valign="middle" align="center">67.23</td>
<td valign="middle" align="center">71.43</td>
<td valign="middle" align="center">80.00</td>
<td valign="middle" align="center">
<bold>80.67</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">69.04</td>
<td valign="middle" align="center">52.32</td>
<td valign="middle" align="center">56.95</td>
<td valign="middle" align="center">77.26</td>
<td valign="middle" align="center">59.30</td>
<td valign="middle" align="center">60.01</td>
<td valign="middle" align="center">80.26</td>
<td valign="middle" align="center">79.45</td>
<td valign="middle" align="center">79.26</td>
<td valign="middle" align="center">
<bold>82.78</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">68.91</td>
<td valign="middle" align="center">65.55</td>
<td valign="middle" align="center">55.21</td>
<td valign="middle" align="center">74.89</td>
<td valign="middle" align="center">60.98</td>
<td valign="middle" align="center">61.67</td>
<td valign="middle" align="center">67.50</td>
<td valign="middle" align="center">71.67</td>
<td valign="middle" align="center">78.00</td>
<td valign="middle" align="center">
<bold>80.83</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">68.97</td>
<td valign="middle" align="center">57.33</td>
<td valign="middle" align="center">55.27</td>
<td valign="middle" align="center">75.11</td>
<td valign="middle" align="center">50.67</td>
<td valign="middle" align="center">59.50</td>
<td valign="middle" align="center">73.32</td>
<td valign="middle" align="center">75.36</td>
<td valign="middle" align="center">76.29</td>
<td valign="middle" align="center">
<bold>80.66</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Cucumber</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">66.95</td>
<td valign="middle" align="center">79.66</td>
<td valign="middle" align="center">58.34</td>
<td valign="middle" align="center">68.64</td>
<td valign="middle" align="center">63.28</td>
<td valign="middle" align="center">66.95</td>
<td valign="middle" align="center">
<bold>88.14</bold>
</td>
<td valign="middle" align="center">84.75</td>
<td valign="middle" align="center">78.87</td>
<td valign="middle" align="center">83.05</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">68.44</td>
<td valign="middle" align="center">88.15</td>
<td valign="middle" align="center">61.38</td>
<td valign="middle" align="center">81.01</td>
<td valign="middle" align="center">62.97</td>
<td valign="middle" align="center">66.20</td>
<td valign="middle" align="center">
<bold>91.64</bold>
</td>
<td valign="middle" align="center">90.63</td>
<td valign="middle" align="center">78.83</td>
<td valign="middle" align="center">82.82</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">66.95</td>
<td valign="middle" align="center">79.66</td>
<td valign="middle" align="center">58.34</td>
<td valign="middle" align="center">68.33</td>
<td valign="middle" align="center">65.39</td>
<td valign="middle" align="center">67.13</td>
<td valign="middle" align="center">
<bold>87.93</bold>
</td>
<td valign="middle" align="center">84.48</td>
<td valign="middle" align="center">79.87</td>
<td valign="middle" align="center">82.99</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">67.69</td>
<td valign="middle" align="center">75.96</td>
<td valign="middle" align="center">60.97</td>
<td valign="middle" align="center">66.50</td>
<td valign="middle" align="center">63.47</td>
<td valign="middle" align="center">65.95</td>
<td valign="middle" align="center">
<bold>89.75</bold>
</td>
<td valign="middle" align="center">87.45</td>
<td valign="middle" align="center">80.60</td>
<td valign="middle" align="center">82.66</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Tomato</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">65.55</td>
<td valign="middle" align="center">72.27</td>
<td valign="middle" align="center">53.53</td>
<td valign="middle" align="center">70.59</td>
<td valign="middle" align="center">58.95</td>
<td valign="middle" align="center">61.26</td>
<td valign="middle" align="center">68.91</td>
<td valign="middle" align="center">57.98</td>
<td valign="middle" align="center">72.50</td>
<td valign="middle" align="center">
<bold>73.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">67.64</td>
<td valign="middle" align="center">
<bold>81.80</bold>
</td>
<td valign="middle" align="center">51.93</td>
<td valign="middle" align="center">72.13</td>
<td valign="middle" align="center">58.21</td>
<td valign="middle" align="center">67.27</td>
<td valign="middle" align="center">67.05</td>
<td valign="middle" align="center">72.69</td>
<td valign="middle" align="center">75.00</td>
<td valign="middle" align="center">73.06</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">65.55</td>
<td valign="middle" align="center">72.27</td>
<td valign="middle" align="center">53.53</td>
<td valign="middle" align="center">70.57</td>
<td valign="middle" align="center">60.09</td>
<td valign="middle" align="center">61.15</td>
<td valign="middle" align="center">68.99</td>
<td valign="middle" align="center">57.70</td>
<td valign="middle" align="center">72.50</td>
<td valign="middle" align="center">
<bold>73.30</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">66.57</td>
<td valign="middle" align="center">64.01</td>
<td valign="middle" align="center">52.82</td>
<td valign="middle" align="center">68.60</td>
<td valign="middle" align="center">58.14</td>
<td valign="middle" align="center">64.06</td>
<td valign="middle" align="center">68.01</td>
<td valign="middle" align="center">64.34</td>
<td valign="middle" align="center">71.33</td>
<td valign="middle" align="center">
<bold>72.74</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="8" align="center">GojiPest</td>
<td valign="middle" rowspan="4" align="center">Insect1</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">71.91</td>
<td valign="middle" align="center">75.26</td>
<td valign="middle" align="center">59.36</td>
<td valign="middle" align="center">75.11</td>
<td valign="middle" align="center">82.44</td>
<td valign="middle" align="center">90.08</td>
<td valign="middle" align="center">64.96</td>
<td valign="middle" align="center">60.23</td>
<td valign="middle" align="center">81.90</td>
<td valign="middle" align="center">
<bold>95.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">79.14</td>
<td valign="middle" align="center">75.53</td>
<td valign="middle" align="center">60.12</td>
<td valign="middle" align="center">73.88</td>
<td valign="middle" align="center">82.58</td>
<td valign="middle" align="center">84.91</td>
<td valign="middle" align="center">73.94</td>
<td valign="middle" align="center">65.78</td>
<td valign="middle" align="center">71.20</td>
<td valign="middle" align="center">
<bold>90.61</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">71.91</td>
<td valign="middle" align="center">75.26</td>
<td valign="middle" align="center">59.36</td>
<td valign="middle" align="center">79.31</td>
<td valign="middle" align="center">85.71</td>
<td valign="middle" align="center">89.71</td>
<td valign="middle" align="center">66.15</td>
<td valign="middle" align="center">68.32</td>
<td valign="middle" align="center">81.90</td>
<td valign="middle" align="center">
<bold>96.14</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">75.35</td>
<td valign="middle" align="center">74.68</td>
<td valign="middle" align="center">59.87</td>
<td valign="middle" align="center">75.64</td>
<td valign="middle" align="center">82.45</td>
<td valign="middle" align="center">86.12</td>
<td valign="middle" align="center">70.03</td>
<td valign="middle" align="center">67.03</td>
<td valign="middle" align="center">71.02</td>
<td valign="middle" align="center">
<bold>91.31</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Insect2</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">74.83</td>
<td valign="middle" align="center">74.50</td>
<td valign="middle" align="center">55.82</td>
<td valign="middle" align="center">72.28</td>
<td valign="middle" align="center">84.05</td>
<td valign="middle" align="center">90.65</td>
<td valign="middle" align="center">66.78</td>
<td valign="middle" align="center">67.02</td>
<td valign="middle" align="center">92.01</td>
<td valign="middle" align="center">
<bold>94.95</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">75.45</td>
<td valign="middle" align="center">75.46</td>
<td valign="middle" align="center">59.65</td>
<td valign="middle" align="center">73.46</td>
<td valign="middle" align="center">84.50</td>
<td valign="middle" align="center">90.44</td>
<td valign="middle" align="center">59.86</td>
<td valign="middle" align="center">67.52</td>
<td valign="middle" align="center">92.65</td>
<td valign="middle" align="center">
<bold>93.94</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">74.83</td>
<td valign="middle" align="center">74.50</td>
<td valign="middle" align="center">55.82</td>
<td valign="middle" align="center">74.31</td>
<td valign="middle" align="center">85.29</td>
<td valign="middle" align="center">91.04</td>
<td valign="middle" align="center">54.05</td>
<td valign="middle" align="center">68.30</td>
<td valign="middle" align="center">92.01</td>
<td valign="middle" align="center">
<bold>94.96</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">75.14</td>
<td valign="middle" align="center">74.67</td>
<td valign="middle" align="center">57.36</td>
<td valign="middle" align="center">73.88</td>
<td valign="middle" align="center">83.94</td>
<td valign="middle" align="center">89.78</td>
<td valign="middle" align="center">56.81</td>
<td valign="middle" align="center">67.91</td>
<td valign="middle" align="center">91.41</td>
<td valign="middle" align="center">
<bold>92.95</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bolder ones mean better.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<list list-type="order">
<list-item>
<p>Our approach consistently surpasses all baseline models across various evaluation metrics. Specifically, for the Apple subtask, it attains much higher accuracy and F1 score, showcasing the effectiveness of our method in few-shot learning scenarios. This result suggests that our method is highly effective at leveraging limited labeled data to achieve superior classification performance compared to other models.</p>
</list-item>
<list-item>
<p>Although we used a 15-shot learning setup, which inherently limits the model&#x2019;s performance due to the small number of training samples, our results show that prompt-based learning significantly improves performance in few-shot scenarios. When compared to traditional deep learning methods like TextCNN and SDA, our approach achieves higher accuracy and F1 scores across multiple subtasks, thus confirming the advantages of prompt learning in handling few-shot tasks.</p>
</list-item>
<list-item>
<p>Pre-trained language models like BERT and AgriBERT perform well on agricultural texts but face real-world limitations. Our soft prompt-tuning with a knowledge-enhanced verbalizer outperforms them on colloquial datasets. It effectively handles non-standardized farmer descriptions. Unlike full fine tuning, it updates fewer parameters. This yields higher efficiency and better transferability in few-shot or resource-limited settings.</p>
</list-item>
<list-item>
<p>While LLMs such as LLaMA3 and Mistral are powerful, their performance is less stable when applied to short-text tasks, such as plant disease and pest description classification. These models show variability when handling noisy or perturbed inputs, leading to fluctuations in performance metrics. In contrast, our method demonstrates consistent and robust performance, particularly in tasks involving ambiguous or noisy descriptions, underscoring the stability and reliability of our approach.</p>
</list-item>
<list-item>
<p>When compared to traditional deep learning models, prompt-based learning models outperform them, especially in the Cucumber subtask, where our method significantly surpassed TextCNN. This suggests that prompt-based learning is better suited to the challenges posed by few-shot learning, allowing the model to more effectively process short-text descriptions and achieve higher classification accuracy.</p>
</list-item>
<list-item>
<p>The observed performance differences across the various tasks indicate that prompt-based learning models are adaptable to a range of task characteristics. In particular, our method excels in tasks that involve more complex or detailed descriptions, further demonstrating its ability to generalize effectively across diverse inputs.</p>
</list-item>
<list-item>
<p>Visualized confusion matrices from representative subsets further corroborate our method&#x2019;s effectiveness. On GojiPest Insect1, the model achieved relatively high per-class accuracy, with &#x201c;chihuo&#x201d; and &#x201c;daqingyechan&#x201d; reaching 96.4% and 95.6%, respectively. On PlantWild Apple, the model cleanly separates disease types with all classes above 93% (e.g., &#x201c;black rot&#x201d; 93.3%, &#x201c;mosaic virus&#x201d; 96.7%). These results confirm that our approach achieves uniformly high discrimination and robust generalization across categories, as shown in the <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>.</p>
</list-item>
</list>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>
<bold>(A)</bold> shows the confusion matrix for the GojiPest Insect1 dataset and <bold>(B)</bold> shows the confusion matrix for the PlantWild Apple dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1668642-g002.tif">
<alt-text content-type="machine-generated">Two confusion matrices for classification tasks. (A) GojiPest_Insect1 matrix shows high accuracy for categories like &#x201c;chihuo&#x201d; and &#x201c;daqingyechan,&#x201d; with accuracy percentages ranging from 88.1% to 96.4%. (B) PlantWild_Apple matrix displays high accuracy for &#x201c;black rot&#x201d; and &#x201c;mosaic virus,&#x201d; with percentages between 94.1% and 96.7%. Both matrices feature a gradient color scale indicating frequency.</alt-text>
</graphic>
</fig>
<p>In conclusion, our method consistently outperforms baseline models across all subtasks, especially in few-shot learning and in handling noisy short-text descriptions. This highlights the advantages of prompt-based learning and knowledge expansion. Future work could explore integrating domain-specific knowledge to optimize prompt templates further and improve the model&#x2019;s generalization abilities for real-world applications.</p>
</sec>
<sec id="s4_5">
<label>4.5</label>
<title>The comparative study of large models</title>
<p>To comprehensively evaluate the robustness of our proposed method, we conducted additional experiments along three dimensions: comparisons with fine-tuned large language models, parameter-efficient fine-tuning, and advanced prompting strategies. Specifically, we systematically compared our text-based approach with fine-tuned Qwen, LLaMA with LoRA-based fine-tuning, and Qwen variants equipped with Chain-of-Thought (CoT) reasoning and Self-Consistency (SC) voting under the same few-shot setting.</p>
<p>The experimental results, as shown in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, reveal three major findings: (1) Directly using LLMs to analyze agricultural text lacks in-depth reasoning, resulting in performance inferior to methods that incorporate CoT and SC; (2) In scenarios with limited data, LoRA struggles to fully capture the diversity and complexity of the task, making it difficult to generalize effectively to different agricultural text scenarios. This lack of sufficient data support during fine-tuning leads to unstable model performance, with significant drops in certain tasks. (3) Advanced prompting strategies (CoT + SC) bring improvements in some scenarios, but their overall performance is unstable and remains lower than the experimental results of our method.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Performance comparison of five large language model methods across six tasks.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Datasets</th>
<th valign="middle" rowspan="2" align="center">Task</th>
<th valign="middle" rowspan="2" align="center">Metrics</th>
<th valign="middle" colspan="5" align="center">Methods</th>
</tr>
<tr>
<th valign="middle" align="center">Qwen</th>
<th valign="middle" align="center">QwenCoT</th>
<th valign="middle" align="center">Qwen-SC</th>
<th valign="middle" align="center">LLaMALoRA</th>
<th valign="middle" align="center">Ours</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="15" align="center">PlantWild</td>
<td valign="middle" rowspan="4" align="center">Apple</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">92.23</td>
<td valign="middle" align="center">85.44</td>
<td valign="middle" align="center">75.96</td>
<td valign="middle" align="center">77.12</td>
<td valign="middle" align="center">
<bold>95.19</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">75.85</td>
<td valign="middle" align="center">71.20</td>
<td valign="middle" align="center">73.70</td>
<td valign="middle" align="center">76.89</td>
<td valign="middle" align="center">
<bold>95.97</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">72.97</td>
<td valign="middle" align="center">66.29</td>
<td valign="middle" align="center">75.96</td>
<td valign="middle" align="center">76.77</td>
<td valign="middle" align="center">
<bold>95.20</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">74.38</td>
<td valign="middle" align="center">68.66</td>
<td valign="middle" align="center">74.66</td>
<td valign="middle" align="center">76.83</td>
<td valign="middle" align="center">
<bold>95.32</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Corn</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">74.58</td>
<td valign="middle" align="center">74.75</td>
<td valign="middle" align="center">72.44</td>
<td valign="middle" align="center">77.82</td>
<td valign="middle" align="center">
<bold>80.67</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">
<bold>86.26</bold>
</td>
<td valign="middle" align="center">75.21</td>
<td valign="middle" align="center">72.94</td>
<td valign="middle" align="center">66.51</td>
<td valign="middle" align="center">
<bold>82.78</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">75.00</td>
<td valign="middle" align="center">75.00</td>
<td valign="middle" align="center">72.44</td>
<td valign="middle" align="center">62.66</td>
<td valign="middle" align="center">
<bold>80.83</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">80.24</td>
<td valign="middle" align="center">75.11</td>
<td valign="middle" align="center">72.38</td>
<td valign="middle" align="center">64.52</td>
<td valign="middle" align="center">
<bold>80.66</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Cucumber</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">79.49</td>
<td valign="middle" align="center">
<bold>88.03</bold>
</td>
<td valign="middle" align="center">73.90</td>
<td valign="middle" align="center">77.29</td>
<td valign="middle" align="center">
<bold>83.05</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">64.29</td>
<td valign="middle" align="center">72.88</td>
<td valign="middle" align="center">76.99</td>
<td valign="middle" align="center">80.58</td>
<td valign="middle" align="center">
<bold>82.82</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">63.47</td>
<td valign="middle" align="center">70.34</td>
<td valign="middle" align="center">73.90</td>
<td valign="middle" align="center">77.07</td>
<td valign="middle" align="center">
<bold>82.99</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">63.87</td>
<td valign="middle" align="center">71.59</td>
<td valign="middle" align="center">70.84</td>
<td valign="middle" align="center">78.79</td>
<td valign="middle" align="center">
<bold>82.66</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Tomato</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">66.27</td>
<td valign="middle" align="center">69.66</td>
<td valign="middle" align="center">70.67</td>
<td valign="middle" align="center">66.39</td>
<td valign="middle" align="center">
<bold>73.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">76.48</td>
<td valign="middle" align="center">60.89</td>
<td valign="middle" align="center">
<bold>79.06</bold>
</td>
<td valign="middle" align="center">69.80</td>
<td valign="middle" align="center">
<bold>73.06</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">66.61</td>
<td valign="middle" align="center">53.95</td>
<td valign="middle" align="center">70.67</td>
<td valign="middle" align="center">66.26</td>
<td valign="middle" align="center">
<bold>73.30</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="9" align="center">GojiPest</td>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">71.25</td>
<td valign="middle" align="center">57.24</td>
<td valign="middle" align="center">67.34</td>
<td valign="middle" align="center">67.99</td>
<td valign="middle" align="center">
<bold>72.74</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Insect1</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">73.44</td>
<td valign="middle" align="center">74.39</td>
<td valign="middle" align="center">70.44</td>
<td valign="middle" align="center">62.52</td>
<td valign="middle" align="center">
<bold>95.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">83.14</td>
<td valign="middle" align="center">58.19</td>
<td valign="middle" align="center">65.61</td>
<td valign="middle" align="center">63.36</td>
<td valign="middle" align="center">
<bold>90.61</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">74.20</td>
<td valign="middle" align="center">63.16</td>
<td valign="middle" align="center">70.44</td>
<td valign="middle" align="center">62.42</td>
<td valign="middle" align="center">
<bold>96.14</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">78.41</td>
<td valign="middle" align="center">60.57</td>
<td valign="middle" align="center">64.52</td>
<td valign="middle" align="center">62.88</td>
<td valign="middle" align="center">
<bold>91.31</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Insect2</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">72.88</td>
<td valign="middle" align="center">77.91</td>
<td valign="middle" align="center">74.93</td>
<td valign="middle" align="center">62.94</td>
<td valign="middle" align="center">
<bold>94.95</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">73.81</td>
<td valign="middle" align="center">75.21</td>
<td valign="middle" align="center">74.47</td>
<td valign="middle" align="center">68.00</td>
<td valign="middle" align="center">
<bold>93.94</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">53.00</td>
<td valign="middle" align="center">46.24</td>
<td valign="middle" align="center">74.93</td>
<td valign="middle" align="center">60.99</td>
<td valign="middle" align="center">
<bold>94.96</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">61.70</td>
<td valign="middle" align="center">57.27</td>
<td valign="middle" align="center">68.99</td>
<td valign="middle" align="center">69.42</td>
<td valign="middle" align="center">
<bold>92.95</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Accuracy (%), Precision (%), Recall (%), and F1 (%) are reported.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>In addition, an in-depth error analysis shows that LLMs tend to underperform on datasets containing short, ambiguous, and highly colloquial expressions, such as the Tomato dataset. This inconsistency can be attributed to its sensitivity to informal expressions and its limited ability to generalize across heterogeneous agricultural data. In contrast, our method leverages knowledge-enhanced soft prompt-tuning to explicitly bridge colloquial farmer descriptions with formal agricultural terminology, thereby achieving more stable and reliable performance even under noisy and diverse input conditions.</p>
</sec>
<sec id="s4_6">
<label>4.6</label>
<title>Ablation study</title>
<sec id="s4_6_1">
<label>4.6.1</label>
<title>Ablation study on the verbalizer</title>
<p>To accurately evaluate the contribution of the external knowledge-enhanced verbalizer, we conducted ablation experiments to systematically analyze the impact of different construction strategies on model performance. The experiment compared four configurations: a baseline model using only category names as label words, a simple knowledge enhancement method based on synonym expansion, ablated variants employing knowledge graph retrieval but with one of the optimization strategies removed (including FastText similarity filtering, context information ranking, or probability prediction), and the full model integrating all three optimization strategies.</p>
<p>The experimental results, as shown in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>, demonstrate three key findings: (1) External knowledge is essential, with our full method significantly outperforming the category-name-only baseline (e.g., improving accuracy from 71.15% to 95.19% in the Apple task); (2) Structured knowledge from knowledge graphs proves substantially more effective than simple synonym expansion, which often introduces noise; (3) All three optimization strategies&#x2014;FastText similarity, probability prediction, and context information&#x2014;are necessary, as ablating any consistently degrades performance, confirming their joint role in noise filtering and high-quality label word selection.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>The ablation study results across two datasets using four different evaluation metrics.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Datasets</th>
<th valign="middle" rowspan="2" align="center">Task</th>
<th valign="middle" rowspan="2" align="center">Metrics</th>
<th valign="middle" colspan="6" align="center">Methods</th>
</tr>
<tr>
<th valign="middle" align="center">Label.</th>
<th valign="middle" align="center">Synonym</th>
<th valign="middle" align="center">-Prob.</th>
<th valign="middle" align="center">-FastText.</th>
<th valign="middle" align="center">-Context.</th>
<th valign="middle" align="center">Full(Ours)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="16" align="center">PlantWild</td>
<td valign="middle" rowspan="4" align="center">Apple</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">71.15</td>
<td valign="middle" align="center">81.73</td>
<td valign="middle" align="center">86.54</td>
<td valign="middle" align="center">85.58</td>
<td valign="middle" align="center">88.46</td>
<td valign="middle" align="center">
<bold>95.19</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">69.84</td>
<td valign="middle" align="center">82.63</td>
<td valign="middle" align="center">85.54</td>
<td valign="middle" align="center">84.53</td>
<td valign="middle" align="center">87.66</td>
<td valign="middle" align="center">
<bold>95.97</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">70.33</td>
<td valign="middle" align="center">81.69</td>
<td valign="middle" align="center">86.41</td>
<td valign="middle" align="center">84.86</td>
<td valign="middle" align="center">88.54</td>
<td valign="middle" align="center">
<bold>95.20</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">69.18</td>
<td valign="middle" align="center">80.13</td>
<td valign="middle" align="center">85.71</td>
<td valign="middle" align="center">84.27</td>
<td valign="middle" align="center">87.99</td>
<td valign="middle" align="center">
<bold>95.32</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Corn</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">71.43</td>
<td valign="middle" align="center">72.27</td>
<td valign="middle" align="center">75.63</td>
<td valign="middle" align="center">73.11</td>
<td valign="middle" align="center">78.15</td>
<td valign="middle" align="center">
<bold>80.67</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">72.69</td>
<td valign="middle" align="center">76.19</td>
<td valign="middle" align="center">75.52</td>
<td valign="middle" align="center">72.54</td>
<td valign="middle" align="center">78.02</td>
<td valign="middle" align="center">
<bold>82.78</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">71.61</td>
<td valign="middle" align="center">72.47</td>
<td valign="middle" align="center">75.80</td>
<td valign="middle" align="center">73.25</td>
<td valign="middle" align="center">78.30</td>
<td valign="middle" align="center">
<bold>80.83</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">71.70</td>
<td valign="middle" align="center">68.28</td>
<td valign="middle" align="center">74.43</td>
<td valign="middle" align="center">71.75</td>
<td valign="middle" align="center">77.90</td>
<td valign="middle" align="center">
<bold>80.66</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Cucumber</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">74.58</td>
<td valign="middle" align="center">77.12</td>
<td valign="middle" align="center">77.97</td>
<td valign="middle" align="center">80.51</td>
<td valign="middle" align="center">78.81</td>
<td valign="middle" align="center">
<bold>83.05</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">75.67</td>
<td valign="middle" align="center">78.31</td>
<td valign="middle" align="center">78.41</td>
<td valign="middle" align="center">80.40</td>
<td valign="middle" align="center">79.55</td>
<td valign="middle" align="center">
<bold>82.82</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">74.43</td>
<td valign="middle" align="center">76.87</td>
<td valign="middle" align="center">77.82</td>
<td valign="middle" align="center">80.37</td>
<td valign="middle" align="center">78.59</td>
<td valign="middle" align="center">
<bold>82.99</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">74.59</td>
<td valign="middle" align="center">75.32</td>
<td valign="middle" align="center">77.67</td>
<td valign="middle" align="center">79.92</td>
<td valign="middle" align="center">78.59</td>
<td valign="middle" align="center">
<bold>82.66</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Tomato</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">61.34</td>
<td valign="middle" align="center">65.55</td>
<td valign="middle" align="center">71.43</td>
<td valign="middle" align="center">70.59</td>
<td valign="middle" align="center">66.39</td>
<td valign="middle" align="center">
<bold>73.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">59.68</td>
<td valign="middle" align="center">65.27</td>
<td valign="middle" align="center">73.15</td>
<td valign="middle" align="center">72.00</td>
<td valign="middle" align="center">67.79</td>
<td valign="middle" align="center">
<bold>73.06</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">61.47</td>
<td valign="middle" align="center">65.66</td>
<td valign="middle" align="center">71.52</td>
<td valign="middle" align="center">70.78</td>
<td valign="middle" align="center">66.64</td>
<td valign="middle" align="center">
<bold>73.30</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">59.90</td>
<td valign="middle" align="center">64.09</td>
<td valign="middle" align="center">71.70</td>
<td valign="middle" align="center">70.70</td>
<td valign="middle" align="center">63.16</td>
<td valign="middle" align="center">
<bold>72.74</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="8" align="center">Insect Pest</td>
<td valign="middle" rowspan="4" align="center">Insect1</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">61.45</td>
<td valign="middle" align="center">71.68</td>
<td valign="middle" align="center">83.89</td>
<td valign="middle" align="center">80.69</td>
<td valign="middle" align="center">81.83</td>
<td valign="middle" align="center">
<bold>95.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">56.41</td>
<td valign="middle" align="center">67.39</td>
<td valign="middle" align="center">79.26</td>
<td valign="middle" align="center">75.68</td>
<td valign="middle" align="center">76.72</td>
<td valign="middle" align="center">
<bold>90.61</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">58.07</td>
<td valign="middle" align="center">72.97</td>
<td valign="middle" align="center">85.25</td>
<td valign="middle" align="center">81.69</td>
<td valign="middle" align="center">82.12</td>
<td valign="middle" align="center">
<bold>96.14</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">55.42</td>
<td valign="middle" align="center">68.00</td>
<td valign="middle" align="center">81.21</td>
<td valign="middle" align="center">76.94</td>
<td valign="middle" align="center">78.30</td>
<td valign="middle" align="center">
<bold>91.31</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Insect2</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">63.98</td>
<td valign="middle" align="center">72.32</td>
<td valign="middle" align="center">80.67</td>
<td valign="middle" align="center">80.47</td>
<td valign="middle" align="center">83.94</td>
<td valign="middle" align="center">
<bold>94.95</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">64.08</td>
<td valign="middle" align="center">75.52</td>
<td valign="middle" align="center">81.54</td>
<td valign="middle" align="center">80.90</td>
<td valign="middle" align="center">84.25</td>
<td valign="middle" align="center">
<bold>93.94</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">62.98</td>
<td valign="middle" align="center">73.61</td>
<td valign="middle" align="center">81.08</td>
<td valign="middle" align="center">81.23</td>
<td valign="middle" align="center">83.66</td>
<td valign="middle" align="center">
<bold>94.96</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">62.62</td>
<td valign="middle" align="center">72.69</td>
<td valign="middle" align="center">80.75</td>
<td valign="middle" align="center">80.58</td>
<td valign="middle" align="center">83.80</td>
<td valign="middle" align="center">
<bold>92.95</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values indicate the best performance. Label. (Original Label Words), Synonym (Synonym Expansion), -Prob. (without Probability Prediction), -FastText. (without FastText Similarity), -Context. (without Context Information).</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>To further illustrate the effectiveness of the knowledge-enhanced verbalizer, we provide a visualization of the label word sets associated with different categories under the proposed optimization strategies. <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref> presents representative results from two datasets (PlantWild Apple and GojiPest Insect1). Each row corresponds to a category label, and the words in the row denote the candidate verbalizers. The symbols, which are denoted as square, circle, and star, represent filtering decisions made by removing one of the strategies, namely probability prediction (-Prob.), FastText similarity (-FastText.), and context information (-Context.), respectively. Since we take the union of the three strategies, a word is filtered out only when it is removed by all three, which results in the light-colored blocks, while the dark-colored blocks indicate the retained high-quality label words. As shown in the figure, the full model effectively preserves more informative and semantically relevant label words, while noisy or irrelevant words are gradually removed through the joint optimization strategies. This visualization provides clear evidence of how the proposed verbalizer construction improves both the richness and quality of label word sets, thereby enhancing the overall model performance.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Visualization of the knowledge-enhanced verbalizer on PlantWild Apple and GojiPest Insect1.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1668642-g003.tif">
<alt-text content-type="machine-generated">Heatmap visualization showing factors related to plant diseases and pests. The top section, labeled &#x201c;PlantWild_Apple&#x201d;, includes keywords like &#x201c;agricultural&#x201d;, &#x201c;humidity&#x201d;, &#x201c;diseases&#x201d;, and &#x201c;fungal&#x201d; associated with issues like &#x201c;black rot&#x201d;, &#x201c;mosaic virus&#x201d;, &#x201c;rust&#x201d;, and &#x201c;scab&#x201d;. The bottom section, labeled &#x201c;GojiPest_Insect1&#x201d;, encompasses terms such as &#x201c;agricultural&#x201d;, &#x201c;climate&#x201d;, &#x201c;harvest&#x201d;, and &#x201c;pests&#x201d; associated with various pests like &#x201c;chihuo&#x201d;, &#x201c;daqingyechan&#x201d;, &#x201c;funichong&#x201d;, and &#x201c;heimangchun&#x201d;. Each term is marked with visual symbols for impact representation.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4_6_2">
<label>4.6.2</label>
<title>Influence of input data description types</title>
<p>To complement the architectural ablation study and to validate the practical motivation of our work, we further compare the impact of different types of input data on model performance, specifically examining &#x201c;Original&#x201d;, &#x201c;Vague&#x201d;, and &#x201c;Expand&#x201d; (our method) datasets. The &#x201c;Original&#x201d; refers to the accurate and detailed descriptions from the dataset, while the &#x201c;Vague&#x201d; consists of intentionally blurred text, simulating descriptions that might be provided by non-experts. The &#x201c;Expand&#x201d; represents the vague descriptions that 501 are further processed using our method, which leverages prompt-based learning to enhance and refine the 502 textual information, improving its accuracy and specificity.</p>
<p>The experimental results, as presented in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>, demonstrate that the model exhibits a general decline in performance across all tasks when vague descriptions are used. For instance, in the Corn task, the accuracy drops from 79.83% (Original) to 69.75% (Vague), reflecting the negative impact of vague descriptions on model performance. Similarly, in the Apple and Tomato tasks, the use of vague descriptions results in a noticeable decline in performance. This suggests that vague text introduces additional uncertainty, limiting the model&#x2019;s ability to classify accurately.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>The results for different types of data across two datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Datasets</th>
<th valign="middle" rowspan="2" align="center" colspan="2">Task</th>
<th valign="middle" rowspan="2" align="center">Metrics</th>
<th valign="middle" colspan="3" align="center">Methods</th>
</tr>
<tr>
<th valign="middle" align="center">Original</th>
<th valign="middle" align="center">Vague</th>
<th valign="middle" align="center">Expand(Ours)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="16" align="center">PlantWild</td>
<td valign="middle" rowspan="4" colspan="2" align="center">Apple</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">88.08</td>
<td valign="middle" align="center">80.46</td>
<td valign="middle" align="center">
<bold>95.19</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">88.44</td>
<td valign="middle" align="center">89.69</td>
<td valign="middle" align="center">
<bold>95.97</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">87.06</td>
<td valign="middle" align="center">87.90</td>
<td valign="middle" align="center">
<bold>95.20</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">87.74</td>
<td valign="middle" align="center">88.43</td>
<td valign="middle" align="center">
<bold>95.32</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" colspan="2" align="center">Corn</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">79.83</td>
<td valign="middle" align="center">69.75</td>
<td valign="middle" align="center">
<bold>80.67</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">81.99</td>
<td valign="middle" align="center">71.60</td>
<td valign="middle" align="center">
<bold>82.78</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">80.00</td>
<td valign="middle" align="center">69.68</td>
<td valign="middle" align="center">
<bold>80.83</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">78.47</td>
<td valign="middle" align="center">69.72</td>
<td valign="middle" align="center">
<bold>80.66</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" colspan="2" align="center">Cucumber</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">79.62</td>
<td valign="middle" align="center">74.58</td>
<td valign="middle" align="center">
<bold>83.05</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">78.64</td>
<td valign="middle" align="center">74.55</td>
<td valign="middle" align="center">
<bold>82.82</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">80.09</td>
<td valign="middle" align="center">74.60</td>
<td valign="middle" align="center">
<bold>82.99</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">79.36</td>
<td valign="middle" align="center">74.28</td>
<td valign="middle" align="center">
<bold>82.66</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" colspan="2" align="center">Tomato</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">70.67</td>
<td valign="middle" align="center">67.23</td>
<td valign="middle" align="center">
<bold>73.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">71.08</td>
<td valign="middle" align="center">71.79</td>
<td valign="middle" align="center">
<bold>73.06</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">70.83</td>
<td valign="middle" align="center">67.44</td>
<td valign="middle" align="center">
<bold>73.30</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">70.95</td>
<td valign="middle" align="center">67.58</td>
<td valign="middle" align="center">
<bold>72.74</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="8" align="center">Insect Pest</td>
<td valign="middle" rowspan="4" colspan="2" align="center">Insect1</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">87.86</td>
<td valign="middle" align="center">72.44</td>
<td valign="middle" align="center">
<bold>95.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">85.77</td>
<td valign="middle" align="center">72.69</td>
<td valign="middle" align="center">
<bold>90.61</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">88.31</td>
<td valign="middle" align="center">74.84</td>
<td valign="middle" align="center">
<bold>96.14</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">87.02</td>
<td valign="middle" align="center">73.75</td>
<td valign="middle" align="center">
<bold>91.31</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="4" colspan="2" align="center">Insect2</td>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">86.08</td>
<td valign="middle" align="center">72.97</td>
<td valign="middle" align="center">
<bold>94.95</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">84.76</td>
<td valign="middle" align="center">72.55</td>
<td valign="middle" align="center">
<bold>93.94</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">85.92</td>
<td valign="middle" align="center">73.06</td>
<td valign="middle" align="center">
<bold>94.96</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">85.34</td>
<td valign="middle" align="center">72.80</td>
<td valign="middle" align="center">
<bold>92.95</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bolder ones mean better.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>However, when &#x201c;Expand&#x201d; data is used, where vague descriptions are further processed, the model performance improves significantly. For example, in the Apple task, accuracy increases from 88.08 (Original) to 95.19 (Expand). Similarly, for the Corn, Cucumber, and Tomato tasks, applying our method leads to significant improvements in accuracy. This demonstrates that our expansion method effectively recovers and enhances the information in vague descriptions, improving the model&#x2019;s handling of such inputs. In some tasks, the performance even surpasses that of the original data. This highlights the efficacy of our approach in enhancing classification performance, particularly in tasks such as pest detection, where ambiguous descriptions pose extra difficulties.</p>
</sec>
</sec>
<sec id="s4_7">
<label>4.7</label>
<title>Influence of the templates</title>
<p>In this experiment, the design of templates was pivotal in influencing the model&#x2019;s performance. To evaluate the effect of various hand-crafted and soft templates on classification tasks related to plant diseaseand pest descriptions, we created and tested several templates, the specifics of which are outlined in <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>. These tasks involve complex short-text descriptions, requiring the model to extract meaningful features and information based on the guidance provided by the templates.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>The different templates on two datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Hard/Soft</th>
<th valign="middle" align="center">id</th>
<th valign="middle" align="center">Template</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">Manual</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">A {"mask"} condition: {"placeholder": "text-a"}</td>
</tr>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">{"placeholder": "text a"} The type is {"mask"}</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">{"placeholder": "text-a"} The issue is classified as {"mask"}</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">{"placeholder": "text a"} A {"mask"} disease</td>
</tr>
<tr>
<td valign="middle" align="center">Soft</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">{"soft": "&lt; soft&gt;"}{ "mask": "&lt; mask &gt;"}{ "soft": "&lt; soft&gt;"}{ "placeholder": "text-a"}</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x201c;Hard&#x201d; refers to hand-crafted templates.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The experimental results, displayed in <xref ref-type="table" rid="T7">
<bold>Table&#xa0;7</bold>
</xref>, demonstrate that specific hand-crafted templates successfully direct the model in grasping the essential elements of the tasks, especially in tasks like Insect2, where the model effectively identifies the key features of pest descriptions. However, as the complexity of the tasks and the diversity of the datasets increase, the limitations of using a single, fixed template become evident. For instance, vague or incomplete descriptions may hinder hand-crafted templates from fully leveraging the potential of the data. Consequently, we introduced soft template generation in our approach to improve the model&#x2019;s ability to process uncertain and ambiguous text. The experimental results demonstrate that, even with limited training data, soft templates can be precisely adapted to the data, thereby creating an optimal theoretical prompt that considerably boosts classification accuracy.</p>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>The 15-shot results of accuracy with different templates on two datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Datasets</th>
<th valign="middle" rowspan="2" align="center">Task</th>
<th valign="middle" colspan="5" align="center">Template</th>
<th valign="middle" rowspan="2" align="center">Ours</th>
</tr>
<tr>
<th valign="middle" align="center">0</th>
<th valign="middle" align="center">1</th>
<th valign="middle" align="center">2</th>
<th valign="middle" align="center">3</th>
<th valign="middle" align="center">Avg</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">PlantWild</td>
<td valign="middle" align="center">Apple</td>
<td valign="middle" align="center">88.46</td>
<td valign="middle" align="center">75.00</td>
<td valign="middle" align="center">89.42</td>
<td valign="middle" align="center">76.92</td>
<td valign="middle" align="center">82.45</td>
<td valign="middle" align="center">
<bold>95.19</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Corn</td>
<td valign="middle" align="center">73.11</td>
<td valign="middle" align="center">66.39</td>
<td valign="middle" align="center">74.79</td>
<td valign="middle" align="center">68.91</td>
<td valign="middle" align="center">70.80</td>
<td valign="middle" align="center">
<bold>80.67</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Cucumber</td>
<td valign="middle" align="center">78.81</td>
<td valign="middle" align="center">72.88</td>
<td valign="middle" align="center">79.66</td>
<td valign="middle" align="center">72.03</td>
<td valign="middle" align="center">75.85</td>
<td valign="middle" align="center">
<bold>83.05</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Tomato</td>
<td valign="middle" align="center">70.59</td>
<td valign="middle" align="center">54.62</td>
<td valign="middle" align="center">63.03</td>
<td valign="middle" align="center">57.98</td>
<td valign="middle" align="center">61.56</td>
<td valign="middle" align="center">
<bold>73.11</bold>
</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">Insect Pest</td>
<td valign="middle" align="center">Insect1</td>
<td valign="middle" align="center">92.17</td>
<td valign="middle" align="center">89.54</td>
<td valign="middle" align="center">91.22</td>
<td valign="middle" align="center">84.43</td>
<td valign="middle" align="center">89.34</td>
<td valign="middle" align="center">
<bold>95.11</bold>
</td>
</tr>
<tr>
<td valign="middle" align="center">Insect2</td>
<td valign="middle" align="center">91.50</td>
<td valign="middle" align="center">85.40</td>
<td valign="middle" align="center">87.02</td>
<td valign="middle" align="center">80.71</td>
<td valign="middle" align="center">86.16</td>
<td valign="middle" align="center">91.31</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the highest accuracy achieved for each task.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s4_8">
<label>4.8</label>
<title>Parameter sensitivity</title>
<p>We conducted further experiments to evaluate the effect of different hyperparameters, such as learning rate and batch size, on the experimental results. The findings are presented in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. The learning rate governs the magnitude of parameter adjustments during model training. Based on the experimental results, the best performance across most tasks was obtained with a learning rate of 3e-5. This indicates that, within a specific range, a higher learning rate can expedite model convergence and assist the model in adapting to variations in the data. However, for some tasks, setting the learning rate too high can lead to instability during training, emphasizing the importance of fine-tuning the learning rate based on the specific requirements of each task.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Analysis of model parameter sensitivity on three datasets. <bold>(A)</bold> Effects of different learning rates on the accuracy across three datasets (Insect1, Insect2, Apple, Corn, Cucumber, Tomato). <bold>(B)</bold> Effects of different batch sizes on the accuracy across the same three datasets. Different colors represent different categories: orange for Insect1, green for Insect2, blue for Apple, red for Corn, purple for Cucumber, and brown for Tomato.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1668642-g004.tif">
<alt-text content-type="machine-generated">Graph A depicts the effect of learning rates on accuracy across datasets labeled Insect1, Insect2, Apple, Corn, Cucumber, and Tomato. Graph B shows the effect of batch sizes on the same datasets. Both graphs display accuracy percentages on the y-axis with respective variables on the x-axis.</alt-text>
</graphic>
</fig>
<p>In addition to learning rate, batch size is another critical hyper-parameters that influences training dynamics, memory usage, and model convergence. Our findings show that the model performed optimally with a batch size of 32. This suggests that, within an appropriate range, smaller batch sizes can facilitate faster convergence, helping the model adapt quickly to training data, especially for tasks with more complex patterns. On the other hand, larger batch sizes contribute to more stable training and enhanced performance in certain tasks. These results indicate that different datasets and tasks may require distinct batch sizes to achieve the best performance.</p>
<p>Furthermore, we assessed the impact of training epochs on model performance, as shown in <xref ref-type="table" rid="T8">
<bold>Table&#xa0;8</bold>
</xref>.</p>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>The effect of different training epochs on the accuracy of two datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Datasets</th>
<th valign="middle" rowspan="2" align="center">Task</th>
<th valign="middle" colspan="4" align="center">Epochs</th>
</tr>
<tr>
<th valign="middle" align="center">5</th>
<th valign="middle" align="center">10</th>
<th valign="middle" align="center">15</th>
<th valign="middle" align="center">20</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">PlantWild</td>
<td valign="middle" align="center">Apple</td>
<td valign="middle" align="center">90.38</td>
<td valign="middle" align="center">93.27</td>
<td valign="middle" align="center">95.19</td>
<td valign="middle" align="center">94.23</td>
</tr>
<tr>
<td valign="middle" align="center">Corn</td>
<td valign="middle" align="center">68.07</td>
<td valign="middle" align="center">71.43</td>
<td valign="middle" align="center">80.67</td>
<td valign="middle" align="center">78.99</td>
</tr>
<tr>
<td valign="middle" align="center">Cucumber</td>
<td valign="middle" align="center">73.73</td>
<td valign="middle" align="center">75.42</td>
<td valign="middle" align="center">83.05</td>
<td valign="middle" align="center">75.42</td>
</tr>
<tr>
<td valign="middle" align="center">Tomato</td>
<td valign="middle" align="center">66.39</td>
<td valign="middle" align="center">71.43</td>
<td valign="middle" align="center">73.11</td>
<td valign="middle" align="center">72.27</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">Insect Pest</td>
<td valign="middle" align="center">Insect1</td>
<td valign="middle" align="center">88.40</td>
<td valign="middle" align="center">92.06</td>
<td valign="middle" align="center">95.11</td>
<td valign="middle" align="center">90.08</td>
</tr>
<tr>
<td valign="middle" align="center">Insect2</td>
<td valign="middle" align="center">86.43</td>
<td valign="middle" align="center">91.37</td>
<td valign="middle" align="center">94.95</td>
<td valign="middle" align="center">88.32</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The results indicate that while model performance improves with more epochs, excessive training can lead to diminishing returns. Specifically, in the Apple task from the PlantWild dataset, the model achieved an optimal accuracy of 95.19% after 15 epochs, with a slight decrease to 94.23% at 20 epochs. Similar trends were observed for the Corn and Cucumber tasks, where the best performance was also achieved at 15 epochs, with no significant gain at 20 epochs. These findings suggest that a moderate number of epochs promotes model convergence and generalization, while too many epochs may lead to overfitting. This pattern was further confirmed in the Insect Pest dataset, where 15 epochs also yielded the best results in both Insect1 and Insect2 tasks. In conclusion, selecting an appropriate number of epochs is crucial for optimizing model performance, with 15 epochs proving to be ideal for most tasks.</p>
</sec>
<sec id="s4_9">
<label>4.9</label>
<title>Validation on a colloquial description dataset</title>
<p>In this section, we constructed a small-sample colloquial description dataset by collecting Q&amp;A pairs from the agricultural platform &#x201c;Ask Extension<xref ref-type="fn" rid="fn2">
<sup>2</sup>
</xref>&#x201d; and carefully curating them through filtering and grained annotation. The dataset contains 1,000 samples, covering five categories of plant diseases and pests: Maize Leaf Spot, Rice Blast, Rice Planthopper, and Wheat Powdery Mildew.</p>
<p>On this dataset, we conducted validation experiments using fine-tuned PLMs (AgriBERT), prompt-based learning methods (PL and P-Tuning), the LLM Qwen, as well as our proposed method. The experimental settings and parameters were kept consistent with those in the main experiments. The results are reported in <xref ref-type="table" rid="T9">
<bold>Table&#xa0;9</bold>
</xref>. It can be observed that our method outperforms all other approaches across Accuracy, Precision, Recall, and F1, achieving an F1 Score of 79.70%, which is significantly higher than the second-best method, AgriBERT. In contrast, PL and P-Tuning perform poorly on this small-sample colloquial dataset, indicating the limited generalization capability of traditional prompt-based learning. Although Qwen performs better than the prompt-based learning methods, its adaptation to colloquial data is still insufficient without fine-tuning. Overall, our method demonstrates clear advantages in handling colloquial and standard expressions in classification tasks, providing more accurate and stable predictions across all sample categories.</p>
<table-wrap id="T9" position="float">
<label>Table&#xa0;9</label>
<caption>
<p>Performance of different methods on the newly constructed colloquial dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Metrics</th>
<th valign="middle" align="center">Agribert</th>
<th valign="middle" align="center">PL</th>
<th valign="middle" align="center">P-Tuning</th>
<th valign="middle" align="center">Qwen</th>
<th valign="middle" align="center">Ours</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Acc</td>
<td valign="middle" align="center">75.50</td>
<td valign="middle" align="center">63.00</td>
<td valign="middle" align="center">66.00</td>
<td valign="middle" align="center">64.00</td>
<td valign="middle" align="center">79.50</td>
</tr>
<tr>
<td valign="middle" align="center">Pre</td>
<td valign="middle" align="center">76.22</td>
<td valign="middle" align="center">56.28</td>
<td valign="middle" align="center">59.21</td>
<td valign="middle" align="center">67.13</td>
<td valign="middle" align="center">80.16</td>
</tr>
<tr>
<td valign="middle" align="center">Rec</td>
<td valign="middle" align="center">75.50</td>
<td valign="middle" align="center">63.00</td>
<td valign="middle" align="center">66.00</td>
<td valign="middle" align="center">64.00</td>
<td valign="middle" align="center">79.50</td>
</tr>
<tr>
<td valign="middle" align="center">F1</td>
<td valign="middle" align="center">75.17</td>
<td valign="middle" align="center">58.56</td>
<td valign="middle" align="center">61.25</td>
<td valign="middle" align="center">63.62</td>
<td valign="middle" align="center">79.70</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions and future work</title>
<p>In this paper, we proposed a plant pest and disease classification based on colloquial descriptions by leveraging soft prompt-tuning, which combined AgriBERT-based entity recognition and AgriKG retrieval for knowledge enhancement of input. Then, a soft prompt-tuning method with an external knowledge extension verbalizer is employed for detection. The experimental findings validate that our method outperforms baseline models, including state-of-the-art large language models (LLMs), in detection performance.</p>
<p>In future work, we plan to expand our research in two main directions. Firstly, we will investigate more effective strategies for verbalizer construction, including advanced approaches for generation, filtering, and integration. Secondly, we intend to explore multi-model methods, including computer vision, to derive more robust representations, which can further advance the performance of plant pests and diseases classification. In particular, integrating our model with image description models such as PlanText and leveraging databases like PlantPAD could enable an end-to-end agricultural assistance system that combines descriptive queries with visual observations for more reliable diagnostic recommendations.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <uri xlink:href="https://www.agridata.cn/data.html#/datadetail?id=289614">https://www.agridata.cn/data.html#/datadetail?id=289614</uri>; <uri xlink:href="https://github.com/tqwei05/MVPDR">https://github.com/tqwei05/MVPDR</uri>.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>XLL: Software, Writing &#x2013; original draft, Data curation, Formal Analysis, Visualization, Methodology, Validation, Investigation, Resources, Writing &#x2013; review &amp; editing, Conceptualization, Supervision, Funding acquisition, Project administration. XBL: Conceptualization, Resources, Supervision, Validation, Data curation, Project administration, Writing &#x2013; review &amp; editing, Investigation, Methodology, Funding acquisition, Software, Formal Analysis, Visualization. YZ: Writing &#x2013; review &amp; editing, Software, Project administration, Methodology, Supervision, Visualization, Investigation, Conceptualization, Formal Analysis, Funding acquisition, Validation, Resources, Data curation.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research and/or publication of this article. This research is partially supported by the Key Research and Development Program of Jiangsu Province in China (BE2023315), Yangzhou Science and Technology Plan Project City School Cooperation Special Project (YZ2023199), Open Project Program of Joint International Research Laboratory of Agriculture and Agri-Product Safety (JILAR-KF202104).</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>Author XLL was employed by Engineering Design and Research Institute Co., Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://relatedwords.org">https://relatedwords.org</ext-link>
</p>
</fn>
<fn id="fn2">
<label>2</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://ask.extension.org/">https://ask.extension.org/</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Agribert: A joint entity relation extraction model based on agricultural text</article-title>,&#x201d; in <source>International Conference on Knowledge Science, Engineering and Management</source> (<publisher-loc>Singapore</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>254</fpage>&#x2013;<lpage>266</lpage>.</citation></ref>
<ref id="B2">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Kuang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Agrikg: An agricultural knowledge graph and its applications</article-title>,&#x201d; in <source>Database Systems for Advanced Applications: DASFAA 2019 International Workshops: BDMS, BDQM, and GDMA, Chiang Mai, Thailand, April 22&#x2013;25, 2019, Proceedings 24</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>533</fpage>&#x2013;<lpage>537</lpage>.</citation></ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Devlin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>M.-W.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Toutanova</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Bert: Pre-training of deep bidirectional transformers for language understanding</article-title>.&#x201d; in <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source>, <fpage>4171</fpage>&#x2013;<lpage>4186</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
</citation></ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Domingues</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Brandao</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Ferreira</surname> <given-names>J. C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Machine learning for detection and prediction of crop diseases and pests: A comprehensive survey</article-title>. <source>Agriculture</source> <volume>12</volume>, <fpage>1350</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture12091350</pub-id>
</citation></ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Donatelli</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Magarey</surname> <given-names>R. D.</given-names>
</name>
<name>
<surname>Bregaglio</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Willocquet</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Whish</surname> <given-names>J. P.</given-names>
</name>
<name>
<surname>Savary</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Modelling 610 the impacts of pests and diseases on agricultural systems</article-title>. <source>Agric. Syst.</source> <volume>155</volume>, <fpage>213</fpage>&#x2013;<lpage>224</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.agsy.2017.01.019</pub-id>, PMID: <pub-id pub-id-type="pmid">28701814</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Plantpad: a platform for large-scale image phenomics analysis of disease in plant science</article-title>. <source>Nucleic Acids Res.</source> <volume>52</volume>, <fpage>D1556</fpage>&#x2013;<lpage>D1568</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/nar/gkad917</pub-id>, PMID: <pub-id pub-id-type="pmid">37897364</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A multimodal approach for advanced pest detection and classification</article-title>. arXiv preprint arXiv:2312.10948. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2312.10948</pub-id>
</citation></ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Fisch</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Making pre-trained language models better few-shot learners</article-title>.&#x201d; in <source>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</source>, <fpage>3816</fpage>&#x2013;<lpage>3830</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.295</pub-id>
</citation></ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Ptr: Prompt tuning with rules for text classification</article-title>. <source>AI Open</source> <volume>3</volume>, <fpage>182</fpage>&#x2013;<lpage>192</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.aiopen.2022.11.003</pub-id>
</citation></ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Knowledgeable prompt-tuning: Incorporating knowledge into prompt verbalizer for text classification</article-title>.&#x201d; <source>in Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</source>, <fpage>2225</fpage>&#x2013;<lpage>2240</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2022.acl-long.158</pub-id>
</citation></ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>A. Q.</given-names>
</name>
<name>
<surname>Sablayrolles</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Mensch</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bamford</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Chaplot</surname> <given-names>D. S.</given-names>
</name>
<name>
<surname>Casas</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Mistral 7b</article-title>. arXiv preprint arXiv:2310.06825. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2310.06825</pub-id>
</citation></ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kale</surname> <given-names>M. R.</given-names>
</name>
<name>
<surname>Shitole</surname> <given-names>M. S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Analysis of crop disease detection with svm, knn and random forest classification</article-title>. <source>Inf. Technol. Industry</source> <volume>9</volume>, <fpage>364</fpage>&#x2013;<lpage>372</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.17762/itii.v9i1.140</pub-id>
</citation></ref>
<ref id="B13">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kim</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Convolutional neural networks for sentence classification</article-title>,&#x201d; in <conf-name> Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP). (Doha, Qatar: Association for Computational Linguistics), </conf-name>. <fpage>1746</fpage>&#x2013;<lpage>1751</lpage>.</citation></ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X. L.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Prefix-tuning: Optimizing continuous prompts for generation</article-title>.&#x201d; in <source>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing</source> (Volume 1: Long Papers), <fpage>4582</fpage>&#x2013;<lpage>4597</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.353</pub-id>
</citation></ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Pest and disease management in agricultural production with artificial intelligence: Innovative applications and development trends</article-title>. <source>Adv. Resour. Res.</source> <volume>4</volume>, <fpage>381</fpage>&#x2013;<lpage>401</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.50908/arr.4.3_381</pub-id>
</citation></ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Giunchiglia</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Guan</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2025</year>b). <article-title>A simple graph contrastive learning framework for short text classification</article-title>. <source>Proc. AAAI Conf. Artif. Intell</source>. <volume>39</volume>, <fpage>19015</fpage>&#x2013;<lpage>19023</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1609/aaai.v39i18.34093</pub-id>
</citation></ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2025</year>a). <article-title>A text-speech multimodal chinese named entity recognition model for crop diseases and pests</article-title>. <source>Sci. Rep.</source> <volume>15</volume>, <fpage>5429</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-025-88874-9</pub-id>, PMID: <pub-id pub-id-type="pmid">39948134</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Tam</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). &#x201c;<article-title>P-tuning: Prompt tuning can be comparable to fine-tuning across scales and tasks</article-title>,&#x201d; in <source>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</source>, (<publisher-name>Dublin, Ireland: Association for Computational Linguistics</publisher-name>). <fpage>61</fpage>&#x2013;<lpage>68</lpage>.</citation></ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Plant diseases and pests detection based on deep learning: a review</article-title>. <source>Plant Methods</source> <volume>17</volume>, <fpage>1</fpage>&#x2013;<lpage>18</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13007-021-00722-9</pub-id>, PMID: <pub-id pub-id-type="pmid">33627131</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Hayashi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Neubig</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Pre-train, prompt, and predict: A systematic survey of prompting methods in natural language processing</article-title>. <source>ACM Computing Surveys</source> <volume>55</volume>, <fpage>1</fpage>&#x2013;<lpage>35</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1145/3560815</pub-id>
</citation></ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nayagam</surname> <given-names>M. G.</given-names>
</name>
<name>
<surname>Vijayalakshmi</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Somasundaram</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Mukunthan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yogaraja</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Partheeban</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Control of pests and diseases in plants using iot technology</article-title>. <source>Measurement: Sens.</source> <volume>26</volume>, <fpage>100713</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.measen.2023.100713</pub-id>
</citation></ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Pedersen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Patwardhan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Michelizzi</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2004</year>). &#x201c;<article-title>WordNet::Similarity - measuring the relatedness of concepts</article-title>,&#x201d; in <source>Demonstration Papers at HLT-NAACL 2004</source> (<publisher-name>Association for Computational Linguistics</publisher-name>, <publisher-loc>Boston, Massachusetts, USA</publisher-loc>), <fpage>38</fpage>&#x2013;<lpage>41</lpage>.</citation></ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Petroni</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Rocktaschel</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Lewis</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Bakhtin</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Miller</surname> <given-names>A. H.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). &#x201c;<article-title>Language models as knowledge bases</article-title>?&#x201d; in <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>, <fpage>2463</fpage>&#x2013;<lpage>2473</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/D19-1250</pub-id>
</citation></ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rodriguez-Garcia</surname> <given-names>M. A.</given-names>
</name>
<name>
<surname>Garc&#x131;a-Sanchez</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Valencia-Garcia</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Knowledge-based system for crop pests and diseases recognition</article-title>. <source>Electronics</source> <volume>10</volume>, <fpage>905</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/electronics10080905</pub-id>
</citation></ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schick</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Schmid</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Schutze</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Automatically identifying words that can serve as labels for few-shot text classification</article-title>.&#x201d; in <source>Proceedings of the 28th International Conference on Computational Linguistics (COLING)</source>, <fpage>5569</fpage>&#x2013;<lpage>5578</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2020.coling-main.488</pub-id>
</citation></ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schick</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Schutze</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Exploiting cloze questions for few shot text classification and natural language inference</article-title>.&#x201d; in <source>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</source>, <fpage>255</fpage>&#x2013;<lpage>269</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2021.eacl-main.20</pub-id>
</citation></ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shin</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Razeghi</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Logan</surname> <given-names>R. L.</given-names>
<suffix>IV</suffix>
</name>
<name>
<surname>Wallace</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Autoprompt: Eliciting knowledge from language models with automatically generated prompts</article-title>.&#x201d; in <source>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>, <fpage>4222</fpage>&#x2013;<lpage>4235</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.346</pub-id>
</citation></ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shoaib</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Shah</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Ei-Sappagh</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ullah</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Alenezi</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>An advanced deep learning models-based plant disease detection: A review of recent research</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>, <elocation-id>1158933</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1158933</pub-id>, PMID: <pub-id pub-id-type="pmid">37025141</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Speer</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Chin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Havasi</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Conceptnet 5.5: an open multilingual graph of general knowledge</article-title>,&#x201d; in <conf-name>Thirty-First AAAI Conference on Artificial Intelligence &#x2013; Special Track on Cognitive Systems</conf-name>, (<publisher-loc>Hilton San Francisco, San Francisco, California, USA</publisher-loc>: <publisher-name> AAAI Press</publisher-name>). <fpage>4444</fpage>&#x2013;<lpage>4451</lpage>.</citation></ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Spence</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Hill</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Morris</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>How the global threat of pests and diseases impacts plants, people, and the planet</article-title>. <source>Plants People Planet</source> <volume>2</volume>, <fpage>5</fpage>&#x2013;<lpage>13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ppp3.10088</pub-id>
</citation></ref>
<ref id="B31">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sumaya</surname> <given-names>A. I.</given-names>
</name>
<name>
<surname>Forhad</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Al Rafi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Rahman</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Bhuyan</surname> <given-names>M. H.</given-names>
</name>
<name>
<surname>Tareq</surname> <given-names>Q.</given-names>
</name>
</person-group> (<year>2024</year>). &#x201c;<article-title>Comparative analysis of alexnet, googlenet, vgg19, resnet50, and resnet101 for improved plant disease detection through convolutional neural networks</article-title>,&#x201d; in <conf-name>2024 2nd International Conference on Artificial Intelligence, Blockchain, and Internet of Things (AIBThings), (Mt Pleasant, MI, USA: IEEE),</conf-name> <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation></ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Toscano-Miranda</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Toro</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Aguilar</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Caro</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Marulanda</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Trebilcok</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Artificial-intelligence and sensing techniques for the management of insect pests and diseases in cotton: a systematic literature review</article-title>. <source>J. Agric. Sci.</source> <volume>160</volume>, <fpage>16</fpage>&#x2013;<lpage>31</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1017/S002185962200017X</pub-id>
</citation></ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Touvron</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Lavril</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Izacard</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Martinet</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lachaux</surname> <given-names>M.-A.</given-names>
</name>
<name>
<surname>Lacroix</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Llama: Open and efficient foundation language models</article-title>. arXiv preprint arXiv:2302.13971. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2302.13971</pub-id>
</citation></ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Rao</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>A.</given-names>
</name>
<name>
<surname>He</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jiao</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>a). <article-title>Named entity recognition (ner) for chinese agricultural diseases and pests based on discourse topic and attention mechanism</article-title>. <source>Evol. Intell.</source> <volume>17</volume>, <fpage>457</fpage>&#x2013;<lpage>466</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s12065-022-00727-w</pub-id>
</citation></ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Leonardis</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2024</year>b). <article-title>Agri-llava: Knowledge-infused large multimodal assistant on agricultural pests and diseases</article-title>. arXiv preprint arXiv:2412.02158. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2412.02158</pub-id>
</citation></ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>An</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Citrus diseases and pests image-text retrieval based on multi-modal transformer</article-title>,&#x201d; in <conf-name>2023 International Conference on High Performance Big Data and Intelligent Systems (HDIS)</conf-name>, (<publisher-name>IEEE</publisher-name>), <fpage>66</fpage>&#x2013;<lpage>70</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2306.04933</pub-id>
</citation></ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Infoprompt: Information-theoretic soft prompt tuning for natural language understanding</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>36</volume>, <fpage>61060</fpage>&#x2013;<lpage>61084</lpage>.</citation></ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xing</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>H. J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Crop pests and diseases recognition using danet with tldp</article-title>. <source>Comput. Electron. Agric.</source> <volume>199</volume>, <fpage>107144</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.107144</pub-id>
</citation></ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Lan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Citrus huanglongbing detection based on multi-modal feature fusion learning</article-title>. <source>Front. Plant Sci.</source> <volume>12</volume>, <elocation-id>809506</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2021.809506</pub-id>, PMID: <pub-id pub-id-type="pmid">35027917</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yueteng</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Xueyan</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yandong</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Fengjun</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Recognition of plant leaf diseases and insect pests based on improved resnet</article-title>. <source>J. Chin. Agric. Mechanization</source> <volume>42</volume>, <fpage>175</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13733/j.jcam.issn.20955553.2021.12.26</pub-id>
</citation></ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Plantext: Gradually masked guidance to align image phenotypes with trait descriptions for plant disease texts</article-title>. <source>Plant Phenom.</source> <volume>6</volume>, <fpage>0272</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.34133/plantphenomics.0272</pub-id>, PMID: <pub-id pub-id-type="pmid">39600967</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yue</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Qiang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A hybrid classification method via character embedding in chinese short text with few words</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>92120</fpage>&#x2013;<lpage>92128</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2020.2994450</pub-id>
</citation></ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Qiang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Soft prompt-tuning with self-resource verbalizer for short text streams</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>139</volume>, <fpage>109589</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2024.109589</pub-id>
</citation></ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Qiang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Prompt-learning for short text classification</article-title>. <source>IEEE Trans. Knowledge Data Eng.</source> <volume>36</volume>, <fpage>5328</fpage>&#x2013;<lpage>5339</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TKDE.2023.3332787</pub-id>
</citation></ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Qiang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Representation learning via an integrated autoencoder for unsupervised domain adaptation</article-title>. <source>Front. Comput. Sci.</source> <volume>17</volume>, <fpage>175334</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11704-022-1349-5</pub-id>
</citation></ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xindong</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Transfer learning with deep manifold regularized auto-encoders</article-title>. <source>Neurocomputing</source> <volume>369</volume>, <fpage>145</fpage>&#x2013;<lpage>154</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.neucom.2019.08.078</pub-id>
</citation></ref>
</ref-list>
</back>
</article>