<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2025.1663891</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A Chinese question and answer system for liver cancer based on knowledge graph and large language mode</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Wu</surname> <given-names>Haoqi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Zhang</surname> <given-names>Min</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Wang</surname> <given-names>Hailing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2262686/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Jiang</surname> <given-names>Xiaoyan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Gao</surname> <given-names>Yongbin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Huang</surname> <given-names>Rong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Fang</surname> <given-names>Zhijun</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Hu</surname> <given-names>Xiaojun</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1934275/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Fan</surname> <given-names>Yingfang</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/647827/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Electronic and Electrical Engineering, Shanghai University Of Engineering Science</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Endocrinology, Ninth Hospital of Xi&#x00027;an</institution>, <addr-line>Xian</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>The Hepatobiliary Surgery, Third Affiliated Hospital, Southern Medical University</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Yunlong Mi, Central South University, China</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Shitang Ma, West Anhui University, China</p>
<p>Wei Jun Dan Ong, National University Health System, Singapore</p>
<p>Yidan Sun, Washington University in St. Louis, United States</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Hailing Wang <email>wanghailing&#x00040;sues.edu.cn</email></corresp>
<corresp id="c002">Yingfang Fan <email>fanyf068700&#x00040;sina.com</email></corresp>
<fn fn-type="equal" id="fn002"><p>&#x02020;These authors have contributed equally to this work</p></fn></author-notes>
<pub-date pub-type="epub">
<day>10</day>
<month>10</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>8</volume>
<elocation-id>1663891</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>09</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Wu, Zhang, Wang, Jiang, Gao, Huang, Fang, Hu and Fan.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Wu, Zhang, Wang, Jiang, Gao, Huang, Fang, Hu and Fan</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>The liver cancer question-and-answer (Q&#x00026;A) system is primarily intended to help patients access disease-related information more conveniently. However, there is currently no Q&#x00026;A system specifically developed for liver cancer. Additionally, most existing Q&#x00026;A systems lack real clinical data and have limited capability in understanding Chinese questions.</p></sec>
<sec>
<title>Methods</title>
<p>This paper proposes a Chinese liver cancer question-answering system based on knowledge graphs and Large Language Models (LLMs). To unify information from diverse sources, the system employs a knowledge graph to store entities and inter-entity relationships extracted from patients&#x00027; clinical electronic medical records and the professional medical website xywy.com, which serves as the foundation for the system&#x00027;s responses. Specifically, ChatGLM3.5 is utilized to extract entity information from questions, while BERT is applied to understand users&#x00027; intent. Subsequently, the system retrieves corresponding information from the knowledge graph. Finally, the retrieved information is integrated, and a natural language response is generated as the answer to the question.</p></sec>
<sec>
<title>Results</title>
<p>The experimental results indicate that in terms of intent classification, our system achieves a precision of 92.34%, representing an improvement of 1.38% over the BERT model and 4.32% over the GEBERT model. In terms of response relevance, the system&#x00027;s outputs are more aligned with patients&#x00027; daily speech patterns and exhibit higher relevance to the target questions.</p></sec>
<sec>
<title>Discussion</title>
<p>In conclusion, the improved method significantly enhances the usefulness and reliability of the liver cancer Q&#x00026;A system.</p></sec></abstract>
<kwd-group>
<kwd>large language model</kwd>
<kwd>question and answer system</kwd>
<kwd>liver cancer</kwd>
<kwd>knowledge graph</kwd>
<kwd>data integration</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="7"/>
<equation-count count="2"/>
<ref-count count="26"/>
<page-count count="12"/>
<word-count count="8231"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Machine Learning and Artificial Intelligence</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Liver cancer, a malignant tumor that severely endangers the lives and health of Chinese people, presents a worrying picture in terms of incidence and mortality. According to data released by the National Cancer Center of China, the number of new liver cancer cases ranks fourth in the country, while the number of deaths caused by this malignant tumor ranks second nationwide (<xref ref-type="bibr" rid="B4">Chinese Society of Liver Cancer, 2025</xref>). Therefore, providing more effective medical services to these patients so that they can quickly retrieve information about the disease has become a critical issue. With the popularity of the Internet, people increasingly rely on web search to seek answers to their medical questions. In this context, personalized Q&#x00026;A systems have emerged as a more effective method for retrieving health-related information (<xref ref-type="bibr" rid="B21">Luo et al., 2022</xref>). These systems aim to offer more convenient access to medical knowledge, with key technologies involving knowledge storage and question parsing. However, as a cancer type with a high mortality rate in China, liver cancer cannot be provided with better Q&#x00026;A services for patients by existing medical Q&#x00026;A systems, such as general models based on electronic health records (<xref ref-type="bibr" rid="B25">Yang X. et al., 2022</xref>) and open-domain medical reasoning models (<xref ref-type="bibr" rid="B19">Li&#x000E9;vin et al., 2024</xref>), which are not designed for clinical information of liver cancer.</p>
<p>Personalized medical Q&#x00026;A systems (<xref ref-type="bibr" rid="B23">Oduro-Afriyie and Jamil, 2023</xref>; <xref ref-type="bibr" rid="B6">Cui et al., 2017</xref>) have gradually become part of people&#x00027;s daily lives as they provide tailored responses based on the patient&#x00027;s specific conditions, thereby improving patient outcomes. Several techniques, including machine learning, natural language processing, and data mining, have been utilized to enhance system performance (<xref ref-type="bibr" rid="B3">Chen et al., 2022</xref>; <xref ref-type="bibr" rid="B20">Lu et al., 2022</xref>). However, these methods often require retraining to adapt to new data and usage environments, limiting their ability to cope with dynamically changing information.</p>
<p>Traditional Q&#x00026;A systems rely heavily on publicly available unstructured data, increasing the complexity of answer retrieval. The introduction of knowledge graphs provides a structured approach to querying large amounts of data efficiently, thereby improving answer retrieval speed in Q&#x00026;A systems (<xref ref-type="bibr" rid="B16">Li et al., 2020</xref>). Knowledge graph-based Q&#x00026;A systems have gained significant attention for their ability to facilitate data access (<xref ref-type="bibr" rid="B15">Lan et al., 2021</xref>). Although personalized knowledge graphs have been validated for their value in chronic diseases (<xref ref-type="bibr" rid="B11">Gentile et al., 2019</xref>), thyroid diseases (<xref ref-type="bibr" rid="B2">Chai, 2020</xref>), and herbal medicine information (<xref ref-type="bibr" rid="B26">Yang Y. et al., 2022</xref>), there remains a significant gap in the field of liver cancer. In addition, existing medical Q&#x00026;A systems primarily use publicly available data and seldom incorporate private information such as electronic medical records (<xref ref-type="bibr" rid="B23">Oduro-Afriyie and Jamil, 2023</xref>). This limits their ability to provide personalized recommendations. Integrating patients&#x00027; medical records into knowledge graphs while ensuring data security and preventing privacy breaches remains a significant challenge.</p>
<p>Knowledge graphs have been widely applied in various domains and have proven effective in medical Q&#x00026;A systems (<xref ref-type="bibr" rid="B17">Li et al., 2023</xref>, <xref ref-type="bibr" rid="B18">2024</xref>). However, most existing knowledge graphs are constructed using publicly available data, neglecting valuable patient-specific information such as medical history (<xref ref-type="bibr" rid="B9">Dutt et al., 2022</xref>; <xref ref-type="bibr" rid="B11">Gentile et al., 2019</xref>; <xref ref-type="bibr" rid="B13">Gyrard et al., 2018</xref>; <xref ref-type="bibr" rid="B24">Shirai et al., 2021</xref>). Moreover, responses generated from these systems tend to be mechanistic and lack fluency. To address these shortcomings, our proposed system integrates both public and private medical data to offer personalized healthcare Q&#x00026;A services. Furthermore, by leveraging natural language processing techniques and large language models (LLMs), we enhance the readability and naturalness of the generated responses.</p>
<p>With the widespread adoption of LLMs in various fields, they have demonstrated strong generalization abilities and have been applied to diverse research areas. Compared with traditional machine learning methods, LLMs offer better interpretability and increasingly advanced natural language processing capabilities. In recent years, LLMs such as GPT-3 (<xref ref-type="bibr" rid="B1">Brown et al., 2020</xref>), BERT (<xref ref-type="bibr" rid="B7">Devlin et al., 2019</xref>), and ChatGLM3.5 (<xref ref-type="bibr" rid="B8">Du et al., 2022</xref>) have achieved impressive results in healthcare applications, including medical Q&#x00026;A (<xref ref-type="bibr" rid="B5">Chowdhery et al., 2024</xref>; <xref ref-type="bibr" rid="B12">Guu et al., 2020</xref>; <xref ref-type="bibr" rid="B19">Li&#x000E9;vin et al., 2024</xref>; <xref ref-type="bibr" rid="B25">Yang X. et al., 2022</xref>). However, although current medical question-and-answer systems perform excellently in general disease Q&#x00026;A, due to the uniqueness of different diseases, the accurate understanding of individual patients&#x00027; conditions by general medical question-and-answer systems may lead to inaccurate or misleading suggestions (<xref ref-type="bibr" rid="B22">Maynez et al., 2020</xref>). Despite their advantages in extracting knowledge and providing insightful responses, LLMs lack awareness of individual patient conditions, which can lead to inaccurate or misleading recommendations (<xref ref-type="bibr" rid="B22">Maynez et al., 2020</xref>). Additionally, directly providing patient data to LLMs poses significant privacy risks. Our proposed system addresses these challenges by incorporating LLMs&#x00027; natural language processing capabilities while integrating electronic medical records in a privacy-preserving manner.</p>
<p>To overcome the limitations of existing methods, this paper proposes a Chinese Q&#x00026;A system for liver cancer patients that combines Large Language models (LLMs) with knowledge graphs. The system utilizes electronic health records and various disease-related data as knowledge sources while anonymizing personal identity information to prevent data leakage. The system uses LLMs to extract entity information from user queries and employs BERT models to analyze user intent, thereby enhancing its ability to understand user queries. It can more accurately retrieve relevant information from the knowledge graph. Finally, leveraging the advanced language processing capabilities of ChatGLM3.5, the system integrates the retrieved information to improve the coherence and fluency of responses. This system provides liver cancer patients with an efficient tool to consult about their condition and receive simple, reliable advice on treatment and lifestyle adjustments.</p></sec>
<sec sec-type="materials and methods" id="s2">
<title>2 Materials and methods</title>
<p>In this section, we introduce the construction of a liver cancer knowledge graph and the architecture of a Chinese liver cancer Q&#x00026;A system. The overall structure of the Chinese liver cancer Q&#x00026;A system is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. It is mainly driven by four functions: liver cancer knowledge graph, question parsing, query processing and natural language response generation.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Overall architecture of personalized Q&#x00026;A system based on the knowledge graph.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1663891-g0001.tif">
<alt-text>Flowchart illustrating a question-answering system. A question is parsed to extract entities and intent, identifying entity ID 1576418_1 and intent as surgery. The system processes the query using data sources from xywy.com and PEMS, which are preprocessed into a knowledge graph. Natural language response generation is done by ChatGLM, leading to a final answer.</alt-text>
</graphic>
</fig>
<sec>
<title>2.1 Knowledge graph construction</title>
<p>This section describes the data sources for the Liver Cancer Knowledge Graph, and how to process the data for knowledge graph construction.</p>
<sec>
<title>2.1.1 Data sources</title>
<p>Data from two main sources is integrated: patient electronic medical records (PEMRs) provided by Zhujiang Hospital of Southern Medical University, and disease information extracted from xywy.com.</p>
<p>From 2010 to 2022, Zhujiang Hospital of Southern Medical University provided 144 PEMRs of HCC that were confirmed by surgery or puncture pathology. The PEMRs contain the patient&#x00027;s personal information, the patient&#x00027;s admission record (patient&#x00027;s personal information, complaints, doctor&#x00027;s examination results, etc.), the disease course record (illness, examination records, etc.), the surgery record (surgery time, blood transfusion, name of the surgery, etc.), and the discharge summary (the patient&#x00027;s overall situation from admission to discharge). The PEMRs are a complete and accurate record of the patient&#x00027;s illness, and careful attention should be taken to safeguard the patient&#x00027;s privacy and prevent their private information from being released during processing.</p>
<p>The description of disease information on xywy.com involves many aspects, including basic information about the disease (name of the disease, causes of its onset, clinical manifestations, etc.), diagnosis and examination (diagnostic methods such as laboratory tests, imaging tests; examination items such as blood tests, urine tests, etc.), treatment and rehabilitation (treatment methods such as medication, surgical treatments, etc.), rehabilitation recommendations (dietary adjustments, lifestyle changes, etc.), prevention and health care (preventive measures and health care knowledge of the disease), and prevention and health care (disease prevention measures and health care knowledge).</p></sec>
<sec>
<title>2.1.2 Data preprocessing</title>
<p>Raw unstructured medical records and semi-structured website data are converted into structured form for knowledge graph construction using different approaches, respectively.</p>
<list list-type="simple">
<list-item><p>1. PEMRs preprocessing</p></list-item></list>
<p>Patient&#x00027;s electronic medical records are stored in the form of unstructured text, to store them in the form of knowledge graph, they need to be processed into a specific structured form for storage. In order to improve the processing speed and quality, we construct a named entity recognition dataset based on patient&#x00027;s EHRs for model training so as to batch process PEMRs.</p>
<p>The entity annotation tool YEDDA is utilized to annotate eight types of data for each type of entity in the PEMR. Among them, ID is used to uniquely label the patient; body is the patient&#x00027;s self-reported physical condition; bodyexam is the patient&#x00027;s physical examination result; symptom is the patient&#x00027;s symptom; disease is the patient&#x00027;s acquired disease; result is the test result of the examination item; surgery is the patient&#x00027;s surgical record; and cure is the patient&#x00027;s disease treatment. Using this method, a total of 44 PEMRs of patients were processed and labeled in BIO format. And they are divided into training set, validation set and test set according to the ratio of 8:1:1, which is used as the basis for training and evaluation of named entity recognition model. Thus, batch processing of PEMRs was realized.</p>
<p>We use RoBERT&#x0002B;BiLSTM&#x0002B;CRF to construct the named entity recognition model. The model&#x00027;s hyperparameters are set as follows: batch-size is 40, epoch is 20, learning rate is 10<sup>&#x02212;5</sup>, LSTM hidden layer size is 128, and the maximum sentence length is 50. RoBERT model is a pre-training model more similar to the BERT model. A dynamic masking mechanism is used, especially in the Chinese context, which employs the use of word segmentation processing, which facilitates the model to capture more word-level information. BiLSTM-CRF model, where BiLSTM is used to extract high-level feature representations of the text, while the CRF layer is responsible for selecting the most appropriate labels for the whole sequence based on these features. This combination takes full advantage of BiLSTM&#x00027;s strength in capturing long-distance dependencies and CRF&#x00027;s power in sequence modeling, making the BiLSTM-CRF model excel in a variety of NER tasks.</p>
<p>The named entity recognition model enables rapid completion of the processing of the remaining patient&#x00027;s electronic medical records. After extracting the data information, we perform ID anonymization through a hash function (SHA-256) to map the original patient ID to an irreversible string, ensuring that the anonymized ID cannot be reversed to the original identifier while maintaining uniqueness for distinguishing different patients. The processed records, with anonymized IDs, are stored in json format for subsequent knowledge graph construction.</p>
<p>In handling patients&#x00027; electronic medical records, we strictly adhere to the ethical guidelines for medical research. All patients included in the study signed an informed consent form upon admission, agreeing to the use of their clinical data for relevant clinical research. For data involving patients&#x00027; personal information, all such data undergoes anonymization before being used in knowledge graph construction; electronic medical record materials are used exclusively for this study and will never be utilized for any purposes unrelated to the research.</p>
<list list-type="simple">
<list-item><p>2. Publicly website database preprocessing</p></list-item>
</list>
<p>Disease data in <ext-link ext-link-type="uri" xlink:href="http://xywy.com">http://xywy.com</ext-link> is semi-structured text and medical data is crawled using crawler technology. Through in-depth analysis of the html text information in the crawled webpage, entity information such as medicine, symptom and disease is extracted and processed into structured data. Medical text involves a large number of proper nouns, which need to be further sliced and diced. Medical text involves a large number of proper nouns, which requires further slicing and dicing of the text content. Taking the content corresponding to the complications of &#x0201C;cold&#x0201D; as an example, the corresponding text before word splitting is &#x0201C;rhinitis, otitis media, tonsillitis&#x0201D;, and according to the maximum bidirectional word splitting algorithm, a method that combines forward maximum matching and backward maximum matching to segment text, first performing forward maximum matching from the start of the text, then backward maximum matching from the end, and determining the optimal segmentation result by comparing and integrating the two results based on predefined rules (<xref ref-type="bibr" rid="B10">Gai et al., 2014</xref>). It is necessary to slice the text again to extract a single noun. The processed data is stored as structured data in json format for subsequent knowledge graph construction, which contains 7 categories of data examination, department, disease, drug, food, manufacturer and symptom.</p>
<list list-type="simple">
<list-item><p>3. Data specification</p></list-item></list>
<p>In addition, there may be ambiguities such as irregular or inaccurate naming in the data from electronic medical records and websites. To eliminate such ambiguities, this study will also use the Common Clinical Medical Terms (2023 Edition). It is a professional medical reference book designed to provide accurate and systematic medical terminology references for medical professionals, students, and workers in related medical and health fields. This book contains a large number of medical terms, covering various branches of clinical medicine, such as internal medicine, surgery, obstetrics and gynecology, pediatrics, emergency medicine, preventive medicine, etc.</p></sec>
<sec>
<title>2.1.3 Knowledge graph</title>
<p>Based on the processed data in JSON format, we extract triple information and import it into the graph database Neo4j. To ensure the comprehensiveness and accuracy of the knowledge graph, we perform knowledge fusion to address issues such as non-standard entity naming and mismatches between electronic medical records (EMRs) and website data. Specifically, we use the TF-IDF algorithm to calculate the cosine similarity between entities in EMRs and those in the website, thereby achieving entity alignment. For entities that cannot be directly matched (&#x0201C;gallbladder stones with chronic cholecystitis&#x0201D; in EMRs), we conduct word segmentation on them according to Common Clinical Medical Terms (2023 Edition), splitting them into standard entities (&#x0201C;gallbladder stones&#x0201D; and &#x0201C;chronic cholecystitis&#x0201D;) for normalization. After alignment, we remove duplicate nodes and establish new relationships to integrate entities from these two sources.</p>
<p>The final knowledge graph encompasses 13 entity types and 17 inter-entity relationships (<xref ref-type="table" rid="T1">Table 1</xref>). Relationship types include:</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Knowledge graph data information.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Entity type</bold></th>
<th valign="top" align="center"><bold>Number</bold></th>
<th valign="top" align="left"><bold>Relationship type</bold></th>
<th valign="top" align="center"><bold>Number</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">ID</td>
<td valign="top" align="center">144</td>
<td valign="top" align="left">Recommand_eat</td>
<td valign="top" align="center">40,221</td>
</tr> <tr>
<td valign="top" align="left">Disease</td>
<td valign="top" align="center">9052</td>
<td valign="top" align="left">No_eat</td>
<td valign="top" align="center">22,247</td>
</tr> <tr>
<td valign="top" align="left">Check</td>
<td valign="top" align="center">3353</td>
<td valign="top" align="left">Do_eat</td>
<td valign="top" align="center">22,238</td>
</tr> <tr>
<td valign="top" align="left">Body</td>
<td valign="top" align="center">147</td>
<td valign="top" align="left">Belongs_to</td>
<td valign="top" align="center">8,844</td>
</tr> <tr>
<td valign="top" align="left">Bodyexam</td>
<td valign="top" align="center">114</td>
<td valign="top" align="left">Common_drug</td>
<td valign="top" align="center">14,649</td>
</tr> <tr>
<td valign="top" align="left">Cure</td>
<td valign="top" align="center">177</td>
<td valign="top" align="left">Drugs_of</td>
<td valign="top" align="center">17,315</td>
</tr> <tr>
<td valign="top" align="left">Department</td>
<td valign="top" align="center">54</td>
<td valign="top" align="left">Recommand_drug</td>
<td valign="top" align="center">59,467</td>
</tr> <tr>
<td valign="top" align="left">Drug</td>
<td valign="top" align="center">3828</td>
<td valign="top" align="left">Has_symptom</td>
<td valign="top" align="center">5,998</td>
</tr> <tr>
<td valign="top" align="left">Food</td>
<td valign="top" align="center">4870</td>
<td valign="top" align="left">Need_check</td>
<td valign="top" align="center">39,422</td>
</tr> <tr>
<td valign="top" align="left">Producer</td>
<td valign="top" align="center">17201</td>
<td valign="top" align="left">Acompany_with</td>
<td valign="top" align="center">12,029</td>
</tr> <tr>
<td valign="top" align="left">Result</td>
<td valign="top" align="center">324</td>
<td valign="top" align="left">Rel_body</td>
<td valign="top" align="center">240</td>
</tr> <tr>
<td valign="top" align="left">Surgery</td>
<td valign="top" align="center">341</td>
<td valign="top" align="left">Rel_symptom</td>
<td valign="top" align="center">585</td>
</tr> <tr>
<td valign="top" align="left">Symptom</td>
<td valign="top" align="center">6464</td>
<td valign="top" align="left">Rel_bodyexam</td>
<td valign="top" align="center">1,536</td>
</tr>
 <tr>
<td/>
<td/>
<td valign="top" align="left">Rel_disease</td>
<td valign="top" align="center">694</td>
</tr>
 <tr>
<td/>
<td/>
<td valign="top" align="left">Rel_result</td>
<td valign="top" align="center">377</td>
</tr>
 <tr>
<td/>
<td/>
<td valign="top" align="left">Rel_surgery</td>
<td valign="top" align="center">752</td>
</tr>
 <tr>
<td/>
<td/>
<td valign="top" align="left">Rel_cure</td>
<td valign="top" align="center">288</td>
</tr>
<tr>
<td valign="top" align="left">Total number of entity</td>
<td valign="top" align="center">46,069</td>
<td valign="top" align="left">total number of relationship</td>
<td valign="top" align="center">246,904</td>
</tr></tbody>
</table>
</table-wrap>
<p>(1) Disease-food: recommand_eat (recommended foods to eat), no_eat (foods to avoid), do_eat (suitable foods);</p>
<p>(2) Disease-related: belongs_to (affiliated department), common_drug (commonly used drugs), drugs_of (related drugs), recommand_drug (recommended drugs), has_symptom (symptom manifestations), need_check (required examinations), acompany_with (accompanying symptoms);</p>
<p>(3) Patient records: rel_body (self - reported condition), rel_symptom (symptom - related), rel_bodyexam (physical examination results), rel_disease (related diseases), rel_result (test results), rel_surgery (surgery - related), rel_cure (treatment methods)</p></sec></sec>
<sec>
<title>2.2 Knowledge graph and LLM based Q&#x00026;A system</title>
<sec>
<title>2.2.1 Question parsing</title>
<list list-type="simple">
<list-item><p>1. Extract Entity</p></list-item></list>
<p>It is the key technology for the implementation of personalized Q&#x00026;A system. Determining the central entity in the question sentence is the core step to realize the query, and the answer to the question is directly or indirectly related to the central entity in the question sentence. Therefore, after determining the central entity contained in the question sentence, the answer to the question can be queried according to the connection relationship between the entities.</p>
<p>This paper designs a named entity recognition module based on ChatGLM, which extracts entities from questions through an explicit function call mechanism: we predefine an external entity extraction function and pass it to ChatGLM. Among them, the tools parameter specifically specifies the types of entities to be extracted (as shown in <xref ref-type="table" rid="T2">Table 2</xref>) and their constraints through description which defines the detailed characteristics of entities and type which defines entity category labels. When processing a question, ChatGLM will call this predefined function according to instructions and output the extracted entities in a structured format that strictly conforms to the specified entity types and constraints. Compared with the extraction method directly based on prompt words, it can enforce standardized output. In addition, the explicit constraint definitions in the function will guide the model to focus on the description and type of entities, reducing the misrecognition of irrelevant entities.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Tools parameter settings.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Name</bold></th>
<th valign="top" align="left"><bold>Description</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Get_people_answer</td>
<td valign="top" align="left">Answer a variety of medical-related questions based on Patient questions</td>
</tr> <tr>
<td valign="top" align="left">Id</td>
<td valign="top" align="left">Patient id number</td>
</tr> <tr>
<td valign="top" align="left">Body</td>
<td valign="top" align="left">Patient&#x00027;s physical condition or complaint</td>
</tr> <tr>
<td valign="top" align="left">Bodyexam</td>
<td valign="top" align="left">Patient checkup information</td>
</tr> <tr>
<td valign="top" align="left">Check</td>
<td valign="top" align="left">Patient checks or diseases requiring checks</td>
</tr> <tr>
<td valign="top" align="left">Cure</td>
<td valign="top" align="left">Patient cure or cure for a disease</td>
</tr> <tr>
<td valign="top" align="left">Disease</td>
<td valign="top" align="left">Name of the disease or disease that the patient has</td>
</tr> <tr>
<td valign="top" align="left">Result</td>
<td valign="top" align="left">Patient results</td>
</tr> <tr>
<td valign="top" align="left">Surgery</td>
<td valign="top" align="left">Patient surgery records</td>
</tr> <tr>
<td valign="top" align="left">Department</td>
<td valign="top" align="left">Department of medicine</td>
</tr> <tr>
<td valign="top" align="left">Drug</td>
<td valign="top" align="left">Commonly used drugs</td>
</tr> <tr>
<td valign="top" align="left">Food</td>
<td valign="top" align="left">Name of food</td>
</tr> <tr>
<td valign="top" align="left">Symptom</td>
<td valign="top" align="left">Symptoms associated with the patient or disease</td>
</tr>
<tr>
<td valign="top" align="left">Deny</td>
<td valign="top" align="left">Negative word</td>
</tr></tbody>
</table>
</table-wrap>
<p>Take the question &#x0201C;What disease does 1576418_1 have?&#x0201D; as an example, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. After the user inputs the question, it is matched with the descriptions of the preset parameters in the tools in the ChatGLM. &#x0201C;1576418_1&#x0201D; is matched with the parameter &#x0201C;id&#x0201D; described as the patient id; &#x0201C;What disease&#x0201D; matches the parameter &#x0201C;disease&#x0201D; described as the name of the patient&#x00027;s disease. Thus, the content of the problem that matches the parameter description is extracted, and using this way of extracting entities, not only entities with actual meanings in the problem can be identified, but also questionable words that are associated with the parameter description can be identified.</p>
<list list-type="simple">
<list-item><p>2. Intent Identification</p></list-item></list>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Named entity extraction.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1663891-g0002.tif">
<alt-text>Flowchart depicting the process of using GLM tools. The input &#x0201C;1576418_1&#x05F97;&#x04E86;&#x04EC0;&#x04E48;&#x075C5;&#x0201D; is processed through GLM with a function called &#x0201C;get_people_answer&#x0201D;. The parameters include a type &#x0201C;object&#x0201D; and properties for extracting entities. The output is {&#x0201D;disease&#x0201D;:&#x0201D;True&#x0201D;, &#x0201C;id&#x0201D;:&#x0201D;1576418_1&#x0201D;}.</alt-text>
</graphic>
</fig>
<p>This system is a medical-based Q&#x00026;A system, which saves only the knowledge about medical treatment in the knowledge graph, in order to improve the understanding of the user&#x00027;s intent and to limit the scope of questions answered by the ChatGLM. Therefore, this system adds the BERT model to identify the user&#x00027;s intention while filtering questions that are not related to medical care.</p>
<p>The training process is shown in <xref ref-type="fig" rid="F3">Figure 3</xref>, where the training data is input into BERT, and then the result of BERT is input into the fully connected layer to get the pair classification result intent. According to the change of the number of labels in the dataset, the training only needs to update the parameters of the fully connected layer. Typically, using BERT for the intention recognition task, the classification word vector H of the BERT result is used to make a simple classifier based on softmax to predict the probability of the label L of the category:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi><mml:mo>|</mml:mo><mml:mi>H</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mo>=</mml:mo></mml:mtd><mml:mtd><mml:mi>s</mml:mi><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mi>t</mml:mi><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>W</mml:mi><mml:mi>H</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Intent identification module.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-08-1663891-g0003.tif">
<alt-text>Flowchart showing a language processing model for disease identification. A question is input at the bottom, passing through an embedding layer, then a BERT layer with transformers, to a fully connected layer. The output at the top is labeled &#x0201C;id_disease&#x0201D; under the Intent section.</alt-text>
</graphic>
</fig>
<p>W is the parameter matrix for the classification task, which is ultimately fine-tuned by maximizing the logarithmic probability of labels for all parameters in BERT and W. Modify it to get the probability of each label using the fully connected layer:</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi><mml:mo>|</mml:mo><mml:mi>H</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd><mml:mtd><mml:mo>=</mml:mo></mml:mtd><mml:mtd><mml:mi>F</mml:mi><mml:mi>C</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>H</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>The output dimension is the number of label categories, in this case it is a multiple classification task where the label with the highest probability is finally selected as the result of classification. In the medical Q&#x00026;A domain, the intent is the patient&#x00027;s intention, and by understanding the intention, the scope of the question answer is de-qualified and along with the result of named entity extraction, it is judged whether a professional answer can be given or not.</p>
<p>To quickly construct diverse question types under different intents, we leverage ChatGLM3.5 to achieve rapid generation of sentence types. Specifically, we design prompts to guide the model in generating the required corpus. For example: &#x0201C;Assume you are an experienced deep learning trainer and now want to build a batch of datasets for intent recognition classification. The patient&#x00027;s questions should be as varied and colloquial as possible, with sentences of varying lengths and avoiding repetitive structures. Specific medical entities involved in the corpus templates should be replaced with placeholders; for instance, disease names are replaced with [disease]. List the questions directly in items, try to include as many as possible, with at least 100 questions. Each sentence must end with a question mark. Example: What is [disease]?&#x0201D; Using the above method, we can quickly obtain intent-based question templates (as shown in <xref ref-type="table" rid="T3">Table 3</xref>). These intent categories correspond to the 25 inter-entity relationship types in the knowledge graph, covering multiple dimensions such as food, drugs, symptom prevention, treatment cycle, susceptible groups, disease-related examination items, affiliated departments, and complications. They match the relationship types in the predefined template library of the query processing module, providing a rich and contextually relevant corpus foundation for the training of the intent recognition model.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Medical Intent Form.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Intent name</bold></th>
<th valign="top" align="left"><bold>Example</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Food</td>
<td valign="top" align="left">I have [disease], can I drink [food]?</td>
</tr> <tr>
<td valign="top" align="left">Medicine</td>
<td valign="top" align="left">Please recommend some effective medicines for [disease]?</td>
</tr> <tr>
<td valign="top" align="left">Symptom Prevention</td>
<td valign="top" align="left">How to prevent the occurrence of [disease]?</td>
</tr> <tr>
<td valign="top" align="left">Treatment Cycle</td>
<td valign="top" align="left">If I have [disease], how long do I need treatment?</td>
</tr>
<tr>
<td valign="top" align="left">Affiliated Department</td>
<td valign="top" align="left">Which department does [disease] belong to?</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>2.2.2 Query processing</title>
<p>The patient input question has been analyzed for user intent after the question parsing module, and the query statement needs to be constructed based on the intent has been searched for the answer to the question from the constructed knowledge graph. This system constructs the query statement by template matching and then searches for the answer in the knowledge graph. The specific process is as follows:</p>
<list list-type="order">
<list-item><p>Match the corresponding Cipher query statement templates from a predefined template library. This template library contains 25 templates, categorized by inter-entity relationships and entity attributes. For example, for the &#x0201C;Disease-Food&#x0201D; entity pair and the &#x0201C;recommand_eat&#x0201D; relationship, the template is MATCH (m:disease)-[r:recommand_eat] &#x02192; (n:food) where m.name = &#x0201C;Entity&#x0201D; return m.name, r.name, n.name;</p></list-item>
<list-item><p>Using the extracted entity names to populate the placeholders in the templates so that the query statement is transformed into a complete query statement;</p></list-item>
<list-item><p>Retrieving the answer to a question through the neo4j query interface;</p></list-item>
<list-item><p>Integrate the node information in the graph through answer templates and return it to ChatGLM for further inference and analysis. If no matching data is found in the knowledge graph, no data will be returned to ChatGLM.</p></list-item>
</list>
<p>The system writes different templates to map user interrogations into query statements for the graph database based on predefined intents and entities. The query statement construction and answer generation is performed once for each intent and entity individually, before finally returning all the generated answers to the ChatGLM.</p></sec>
<sec>
<title>2.2.3 Natural language response generation</title>
<p>The content returned in Section 2.2.2 is a set A from the knowledge graph entities and relations that represent the concepts and actions used to answer the user&#x00027;s question. We view A as a set of scrambled phrases that make up the answer text, and our goal is to compose the answer text by reconstructing them. This takes into account the connectivity between the words. For example, the question text &#x0201C;What did Ming do after he woke up?", and set A &#x0201C;eat breakfast, wash up.&#x0201D; Based on the results of the query, human beings rearrange them and add missing associative words, subjects, etc. such as &#x0201C;he, first, after,&#x0201D; so as to form a complete statement &#x0201C;he washed up first and then ate breakfast.&#x0201D; In line with this approach, ChatGLM is guided to operate through prompt settings, enabling it to match the content in set A with appropriate natural language expressions based on the semantics of patients&#x00027; questions. Meanwhile, since ChatGLM cannot obtain knowledge graph information beyond set A, if it is allowed to call its own knowledge base to search for answers, it may generate complex and diverse results that deviate from the limitations of the knowledge graph. Therefore, specific constraints are added to the prompts to prohibit the model from using any knowledge outside set A, so as to ensure that the final answer is generated entirely based on the information in set A.</p>
<p>Under this mechanism, ChatGLM is restricted to the role of a natural language generator. It only focuses on the information returned by the knowledge graph and reorganizes scattered phrases into complete expressions that conform to the habits of daily language. When the knowledge graph returns no information (that is, set A is empty), ChatGLM will directly prompt the user with &#x0201C;No relevant information found&#x0201D; to ensure the authenticity and reliability of the answer.</p></sec></sec></sec>
<sec id="s3">
<title>3 Results and discussion</title>
<p>This section assesses the validity of the proposed system in two parts: (1) performance evaluation; (2) different model answering effects.</p>
<sec>
<title>3.1 Performance evaluation</title>
<p>To validate the effectiveness of the question parsing module, i.e., to recognize the intention expressed by the patient in the input discourse and the entity information in it, examples of different types of intentions are needed, each of which contains a different type of questioning. In order to quickly construct the types of questioning under different intentions, we let ChatGLM3.5 construct a batch of datasets for performing intention recognition classification by setting specific prompts.</p>
<p>We compared our method with BERT and gemini-1.0-pro&#x0002B;BERT (GEBERT). The same parameter metrics are used in different methods to evaluate their effectiveness in intent recognition. From the results in <xref ref-type="table" rid="T4">Table 4</xref>, the method proposed in this paper is improved in all the metrics, indicating that it can understand the user&#x00027;s intention more effectively. This is because the method proposed in this paper combines the ability to understand the semantic information of BERT and the entity extraction ability of ChatGLM, which enables it to understand the patient&#x00027;s intention more accurately.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Results of comparative experiments.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th/>
<th valign="top" align="left"><bold>Bert</bold></th>
<th valign="top" align="left"><bold>GEBERT</bold></th>
<th valign="top" align="left"><bold>Ours</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Precision</td>
<td valign="top" align="left">0.9096</td>
<td valign="top" align="left">0.8802</td>
<td valign="top" align="left"><bold>0.9234</bold></td>
</tr> <tr>
<td valign="top" align="left">Recall</td>
<td valign="top" align="left">0.9056</td>
<td valign="top" align="left">0.8542</td>
<td valign="top" align="left"><bold>0.9158</bold></td>
</tr>
<tr>
<td valign="top" align="left">F1</td>
<td valign="top" align="left">0.9048</td>
<td valign="top" align="left">0.8553</td>
<td valign="top" align="left"><bold>0.9196</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Bold values indicate the best results.</p>
</table-wrap-foot>
</table-wrap>
<p>To objectively verify the question-answering performance of the system, we had the system answer single-choice questions related to liver cancer expertise to conduct an objective evaluation of its performance. We compared our proposed method with ChatGLM and gemini-1.0-pro&#x0002B;BERT (GEBERT). In this experiment, 100 single-choice questions with different difficulty levels were designed, and the accuracy of the question settings was verified by professional doctors. These questions were classified into 34 single-hop questions (SQ), 33 double-hop questions (DQ), and 33 multi-hop questions (MQ).</p>
<p>The classification criteria are as follows: Single-hop questions (SQ) involve direct retrieval of a single entity or relationship (e.g., &#x0201C;Which of the following is a common symptom of liver cancer?"); Double-hop questions (DQ) require the integration of multiple related entities or one reasoning jump (e.g., &#x0201C;Which examinations are typically required for diagnosing early liver cancer?"); Multi-hop questions (MQ) involve multi-step reasoning, complex relationship chains, or high-level knowledge in the domain (e.g., &#x0201C;What is the mechanism by which drug X inhibits the proliferation of liver cancer cells?").</p>
<p>Different models were allowed to answer these questions to calculate scores to evaluate the objective performance of the system. The results of the experiment are shown in <xref ref-type="table" rid="T5">Table 5</xref>, where the numbers in parentheses indicate the number of questions answered correctly and the accuracy rate is the number of questions answered correctly divided by the total number of questions.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Scoring of multiple choice questions.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th/>
<th valign="top" align="left" colspan="4"><bold>Accuracy</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#727779;color:#ffffff">
<td/>
<td valign="top" align="left"><bold>SQ</bold></td>
<td valign="top" align="left"><bold>DQ</bold></td>
<td valign="top" align="left"><bold>MQ</bold></td>
<td valign="top" align="left"><bold>ALL</bold></td>
</tr>
 <tr>
<td valign="top" align="left">ChatGLM</td>
<td valign="top" align="left">0.76</td>
<td valign="top" align="left">0.64</td>
<td valign="top" align="left">0.55</td>
<td valign="top" align="left">0.65</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">&#x02212;26</td>
<td valign="top" align="left">&#x02212;21</td>
<td valign="top" align="left">&#x02212;18</td>
<td valign="top" align="left">&#x02212;65</td>
</tr> <tr>
<td valign="top" align="left">GEBERT</td>
<td valign="top" align="left">0.79</td>
<td valign="top" align="left">0.76</td>
<td valign="top" align="left">0.61</td>
<td valign="top" align="left">0.72</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">&#x02212;27</td>
<td valign="top" align="left">&#x02212;25</td>
<td valign="top" align="left">&#x02212;20</td>
<td valign="top" align="left">&#x02212;72</td>
</tr> <tr>
<td valign="top" align="left">Ours</td>
<td valign="top" align="left">0.85</td>
<td valign="top" align="left">0.73</td>
<td valign="top" align="left">0.64</td>
<td valign="top" align="left">0.74</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">&#x02212;29</td>
<td valign="top" align="left">&#x02212;24</td>
<td valign="top" align="left">&#x02212;21</td>
<td valign="top" align="left">74</td>
</tr></tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T5">Table 5</xref>, the response accuracy of the models gradually decreases as the difficulty of the questions increases. The experimental results indicate that ChatGLM performs relatively weakly overall, with an accuracy of 0.55 on multi-hop questions (MQ), showing poor performance in handling more difficult problems. GEBERT performs slightly better than ChatGLM and has relatively good stability. Our method performs better across all question types. Although its accuracy on double-hop questions (DQ) is slightly lower than that of GEBERT, the proposed method still maintains a high accuracy rate, with an overall accuracy of 0.74, which is better than both ChatGLM and GEBERT. In summary, our method outperforms the other two models in terms of answering accuracy, demonstrating its effectiveness and advantages in question-answering tasks.</p>
<p>In order to validate the functionality of different modules of the system, ablation experiments are conducted here to validate the functionality of two modules, namely, question parsing and natural language generation. For the question parsing module, its role is to categorize the questions and extract the relevant information from the database, which is required to analyze the performance of the module using the problem dataset. For the natural language generation module, the essence of which is to generate more complete questions and answers from the database, the merits of the answer text cannot be judged by metrics, which are analyzed separately through Section 3.2. For the query processing module, the main function is to extract knowledge from the database and transfer it to the natural language generation module, testing only the effect of not including the database.</p>
<p>As shown in <xref ref-type="table" rid="T6">Table 6</xref>, after removing the question parsing module, although the system can still analyze questions based on prompt statements, its accuracy decreases by 3.24% and the F1 score decreases by 2.32%. Its performance is significantly lower than that of the system with the module retained, which verifies the necessity and effectiveness of the question parsing module. Removing the database has no impact on the answering of single-hop questions; however, its impact on double-hop questions and multi-hop questions is relatively obvious. The accuracy of double-hop questions decreases by 8.22%, and that of multi-hop questions decreases by 18.75%. This indicates that injecting relevant knowledge through the database can assist large language models in improving the answering accuracy of complex reasoning tasks, thereby verifying the rationality of adding the database.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Results of ablation experiment.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th/>
<th valign="top" align="left" colspan="3"><bold>Accuracy</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#727779;color:#ffffff">
<td/>
<td valign="top" align="left"><bold>Precision</bold></td>
<td valign="top" align="left"><bold>Recall</bold></td>
<td valign="top" align="left"><bold>F1</bold></td>
</tr> <tr>
<td valign="top" align="left">Ours</td>
<td valign="top" align="left">0.9234</td>
<td valign="top" align="left">0.9158</td>
<td valign="top" align="left">0.9196</td>
</tr> <tr>
<td valign="top" align="left">Ours w/o question parsing</td>
<td valign="top" align="left">0.8935</td>
<td valign="top" align="left">0.9013</td>
<td valign="top" align="left">0.8983</td>
</tr> <tr>
<td/>
<td valign="top" align="left">SQ</td>
<td valign="top" align="left">DQ</td>
<td valign="top" align="left">MQ</td>
</tr> <tr>
<td valign="top" align="left">Ours</td>
<td valign="top" align="left">0.85</td>
<td valign="top" align="left">0.73</td>
<td valign="top" align="left">0.64</td>
</tr> <tr>
<td valign="top" align="left">Ours w/o dataset</td>
<td valign="top" align="left">0.85</td>
<td valign="top" align="left">0.67</td>
<td valign="top" align="left">0.52</td>
</tr>
 <tr>
<td/>
<td valign="top" align="left">&#x02212;29</td>
<td valign="top" align="left">&#x02212;24</td>
<td valign="top" align="left">&#x02212;21</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<title>3.2 Different model answering effects</title>
<p>In this paper, some natural question sentences are constructed to test the answering effect of the system. After inputting the interrogative sentences, the system is tested to be able to answer whether it can return the expected answers in line with the expected answers, and it can also further test the effect of the interrogative sentence parsing module, and some of the test results are shown in <xref ref-type="table" rid="T7">Table 7</xref>.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Personalized Q&#x00026;A system effect test.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left">(Q1) User:</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1576418_1 &#x05F97;&#x04E86;&#x04EC0;&#x04E48;&#x075C5;&#x0FF1F;</td>
</tr>
<tr>
<td valign="top" align="left">(Diseases for 1576418_1?)</td>
</tr> <tr>
<td valign="top" align="left">(A1)</td>
</tr>
<tr>
<td valign="top" align="left">BERT: 1576418_1&#x075C5;&#x04EBA;&#x07684;&#x075BE;&#x075C5;&#x06709;:&#x0809D;&#x056CA;&#x080BF;&#x0FF1B;&#x05DE6;&#x0809D;&#x0539F;&#x053D1;&#x06027;&#x0809D;&#x0764C;&#x0FF1B;&#x053F3;&#x04FA7;&#x080F8;&#x08154;&#x079EF;&#x06DB2;&#x0FF1B;&#x04E59;&#x0809D;&#x075C5;&#x06BD2;&#x0643A;&#x05E26;&#x08005;&#x0FF1B;&#x0809D;&#x0810F;&#x05360;&#x04F4D;&#x0FF1B;&#x0809D;&#x0708E;&#x0540E;&#x0809D;&#x0786C;&#x05316;</td>
</tr>
<tr>
<td valign="top" align="left">(1576418_1 Patient&#x00027;s diseases include: liver cyst; primary hepatocellular carcinoma of the left liver; pleural effusion on the right side; hepatitis B virus carrier; liver occupancy; cirrhosis of the liver after hepatitis)</td>
</tr>
<tr>
<td valign="top" align="left">GEBERT: &#x0809D;&#x056CA;&#x080BF;&#x0FF1B;&#x05DE6;&#x0809D;&#x0539F;&#x053D1;&#x06027;&#x0809D;&#x0764C;&#x0FF1B;&#x053F3;&#x04FA7;&#x080F8;&#x08154;&#x079EF;&#x06DB2;&#x0FF1B;&#x04E59;&#x0809D;&#x075C5;&#x06BD2;&#x0643A;&#x05E26;&#x08005;&#x0FF1B;&#x0809D;&#x0810F;&#x05360;&#x04F4D;&#x0FF1B;&#x0809D;&#x0708E;&#x0540E;&#x0809D;&#x0786C;&#x05316;</td>
</tr>
<tr>
<td valign="top" align="left">(liver cyst; primary hepatocellular carcinoma of the left liver; pleural effusion on the right side; hepatitis B virus carrier; liver occupancy; cirrhosis of the liver after hepatitis)</td>
</tr>
<tr>
<td valign="top" align="left">Ours: &#x075C5;&#x04EBA;1576418_1&#x07684;&#x08BCA;&#x065AD;&#x05305;&#x062EC;&#x04EE5;&#x04E0B;&#x075BE;&#x075C5;:&#x0809D;&#x056CA;&#x080BF;,&#x0809D;&#x0708E;&#x0540E;&#x0809D;&#x0786C;&#x05316;,&#x053F3;&#x04FA7;&#x080F8;&#x08154;&#x079EF;&#x06DB2;,&#x05DE6;&#x0809D;&#x0539F;&#x053D1;&#x06027;&#x0809D;&#x0764C;,&#x04E59;&#x0809D;&#x075C5;&#x06BD2;&#x0643A;&#x05E26;&#x08005;,&#x0809D;&#x0810F;&#x05360;&#x04F4D;&#x03002;&#x08FD9;&#x04E9B;&#x075BE;&#x075C5;&#x09700;&#x08981;&#x07EFC;&#x05408;&#x08BC4;&#x04F30;&#x0548C;&#x06CBB;&#x07597;&#x03002;</td>
</tr>
<tr>
<td valign="top" align="left">(Patient 1576418_1&#x00027;s diagnoses include the following diseases: hepatic cyst, post hepatitis cirrhosis, right pleural effusion, primary hepatocellular carcinoma of the left liver, hepatitis B carrier, and liver occupancy. These disorders require comprehensive evaluation and treatment.)</td>
</tr> <tr>
<td valign="top" align="left">(Q2) User:</td>
</tr>
<tr>
<td valign="top" align="left">1576418_1&#x07684;&#x0624B;&#x0672F;&#x08BB0;&#x05F55;?</td>
</tr>
<tr>
<td valign="top" align="left">(Surgery record for 1576418_1?)</td>
</tr> <tr>
<td valign="top" align="left">(A2)</td>
</tr>
<tr>
<td valign="top" align="left">BERT: 1576418_1&#x075C5;&#x04EBA;&#x07684;&#x0624B;&#x0672F;&#x08BB0;&#x05F55;&#x06709;:&#x08179;&#x08154;&#x0955C;&#x04E0B;&#x063A2;&#x067E5;&#x04E2D;&#x08F6C;&#x05F00;&#x08179;&#x05DE6;&#x0809D;&#x05916;&#x053F6;&#x0809D;&#x0764C;&#x05207;&#x09664;&#x0672F;&#x0FF1B;&#x08F93;&#x08840;&#x091CF;:0ml&#x0FF1B;15:30&#x0FF1B;12:17</td>
</tr>
<tr>
<td valign="top" align="left">(Patient 1576418_1&#x00027;s surgical records include: laparoscopic exploration intermediate to open left extrahepatic lobe hepatectomy for hepatocellular carcinoma; blood transfusion: 0 ml; 15:30; 12:17)</td>
</tr>
<tr>
<td valign="top" align="left">GEBERT: &#x0624B;&#x0672F;&#x065F6;&#x095F4;:12:17-15:30,&#x0624B;&#x0672F;&#x0540D;&#x079F0;:&#x08179;&#x08154;&#x0955C;&#x04E0B;&#x063A2;&#x067E5;&#x04E2D;&#x08F6C;&#x05F00;&#x08179;&#x05DE6;&#x0809D;&#x05916;&#x053F6;&#x0809D;&#x0764C;&#x05207;&#x09664;&#x0672F;,&#x08F93;&#x08840;&#x091CF;:0ml</td>
</tr>
<tr>
<td valign="top" align="left">(Surgery time: 12:17-15:30, surgery name: Laparoscopic exploratory intermediate open left hepatic exophytic hepatectomy for hepatocellular carcinoma, blood transfusion: 0 ml)</td>
</tr>
<tr>
<td valign="top" align="left">Ours: &#x05C0A;&#x0656C;&#x07684;&#x060A3;&#x08005;,&#x05173;&#x04E8E;&#x060A8;&#x08BE2;&#x095EE;&#x07684;&#x0533B;&#x07597;&#x08BB0;&#x05F55;,&#x04EE5;&#x04E0B;&#x0662F; 1576418_1 &#x075C5;&#x04EBA;&#x07684;&#x0624B;&#x0672F;&#x08BB0;&#x05F55;&#x06458;&#x08981;:&#x0624B;&#x0672F;&#x07C7B;&#x0578B;:&#x08179;&#x08154;&#x0955C;&#x04E0B;&#x063A2;&#x067E5;&#x04E2D;&#x08F6C;&#x05F00;&#x08179;&#x05DE6;&#x0809D;&#x05916;&#x053F6;&#x0809D;&#x0764C;&#x05207;&#x09664;&#x0672F;,&#x08F93;&#x08840;&#x091CF;:0 &#x06BEB;&#x05347;,&#x0624B;&#x0672F;&#x05F00;&#x059CB;&#x065F6;&#x095F4;:12:17,&#x0624B;&#x0672F;&#x07ED3;&#x0675F;&#x065F6;&#x095F4;:15:30&#x03002;&#x05982;&#x0679C;&#x060A8;&#x08FD8;&#x06709;&#x04EFB;&#x04F55;&#x095EE;&#x09898;&#x06216;&#x09700;&#x08981;&#x08FDB;&#x04E00;&#x06B65;&#x07684;&#x0533B;&#x07597;&#x04FE1;&#x0606F;,&#x08BF7;&#x08054;&#x07CFB;&#x060A8;&#x07684;&#x0533B;&#x07597;&#x0670D;&#x052A1;&#x063D0;&#x04F9B;&#x08005;&#x03002;</td>
</tr>
<tr>
<td valign="top" align="left">(Dear Patient, regarding your enquiry about the medical record, the following is a summary of the surgical record for patient 1576418_1: Surgery type: laparoscopic exploratory intermediate open resection of hepatocellular carcinoma of the outer lobe of the left liver, blood transfusion: 0 ml, start time of the surgery: 12:17, end time of the surgery: 15:30. If you have any questions or need further medical information, please contact your healthcare provider.)</td>
</tr> <tr>
<td valign="top" align="left">(Q3) User:</td>
</tr>
<tr>
<td valign="top" align="left">1576418_1&#x06240;&#x05F97;&#x075BE;&#x075C5;&#x06709;&#x04EC0;&#x04E48;&#x05E76;&#x053D1;&#x075C7;</td>
</tr>
<tr>
<td valign="top" align="left">(What are the complications of the disease 1576418_1 has?)</td>
</tr>
<tr>
<td valign="top" align="left">(A3)</td>
</tr>
<tr>
<td valign="top" align="left">BERT: 1576418_1&#x075C5;&#x04EBA;&#x07684;&#x075BE;&#x075C5;&#x06709;:&#x0809D;&#x056CA;&#x080BF;&#x0FF1B;&#x05DE6;&#x0809D;&#x0539F;&#x053D1;&#x06027;&#x0809D;&#x0764C;&#x0FF1B;&#x053F3;&#x04FA7;&#x080F8;&#x08154;&#x079EF;&#x06DB2;&#x0FF1B;&#x04E59;&#x0809D;&#x075C5;&#x06BD2;&#x0643A;&#x05E26;&#x08005;&#x0FF1B;&#x0809D;&#x0810F;&#x05360;&#x04F4D;&#x0FF1B;&#x0809D;&#x0708E;&#x0540E;&#x0809D;&#x0786C;&#x05316;</td>
</tr>
<tr>
<td valign="top" align="left">(1576418_1 Patient&#x00027;s diseases include: liver cyst; primary hepatocellular carcinoma of the left liver; pleural effusion on the right side; hepatitis B virus carrier; liver occupancy; cirrhosis of the liver after hepatitis)</td>
</tr>
<tr>
<td valign="top" align="left">GEBERT: &#x07ECF;&#x08FC7;&#x04FE1;&#x0606F;&#x067E5;&#x08BE2;,&#x060A8;&#x07684;&#x075BE;&#x075C5;&#x04FE1;&#x0606F;&#x05982;&#x04E0B;:&#x0809D;&#x056CA;&#x080BF;,&#x05DE6;&#x0809D;&#x0539F;&#x053D1;&#x06027;&#x0809D;&#x0764C;,&#x053F3;&#x04FA7;&#x080F8;&#x08154;&#x079EF;&#x06DB2;,&#x04E59;&#x0809D;&#x075C5;&#x06BD2;&#x0643A;&#x05E26;&#x08005;,&#x0809D;&#x0810F;&#x05360;&#x04F4D;,&#x0809D;&#x0708E;&#x0540E;&#x0809D;&#x0786C;&#x05316;&#x03002;&#x09700;&#x08981;&#x06CE8;&#x0610F;&#x07684;&#x0662F;,&#x06211;&#x067E5;&#x08BE2;&#x05230;&#x07684;&#x04FE1;&#x0606F;&#x04E2D;&#x06CA1;&#x06709;&#x063D0;&#x053CA;&#x060A8;&#x06240;&#x060A3;&#x075BE;&#x075C5;&#x07684;&#x05E76;&#x053D1;&#x075C7;&#x03002;</td>
</tr>
<tr>
<td valign="top" align="left">(As a result of the information query, your disease information is as follows: liver cyst, primary hepatocellular carcinoma of the left liver, pleural effusion on the right side, hepatitis B virus carrier, liver occupancy, and cirrhosis of the liver after hepatitis. It is important to note that the information I have queried does not mention the complications of your disease.)</td>
</tr>
<tr>
<td valign="top" align="left">Ours: &#x075C5;&#x060A3;1576418_1&#x06240;&#x060A3;&#x07684;&#x075BE;&#x075C5;&#x05305;&#x062EC;&#x0809D;&#x056CA;&#x080BF;&#x03001;&#x0809D;&#x0708E;&#x0540E;&#x0809D;&#x0786C;&#x05316;&#x03001;&#x053F3;&#x04FA7;&#x080F8;&#x08154;&#x079EF;&#x06DB2;&#x03001;&#x05DE6;&#x0809D;&#x0539F;&#x053D1;&#x06027;&#x0809D;&#x0764C;&#x03001;&#x04E59;&#x0809D;&#x075C5;&#x06BD2;&#x0643A;&#x05E26;&#x04EE5;&#x053CA;&#x0809D;&#x0810F;&#x05360;&#x04F4D;&#x03002;&#x08FD9;&#x04E9B;&#x075C5;&#x060C5;&#x053EF;&#x080FD;&#x05BFC;&#x081F4;&#x07684;&#x05E76;&#x053D1;&#x075C7;&#x05305;&#x062EC;&#x04F46;&#x04E0D;&#x09650;&#x04E8E;&#x04EE5;&#x04E0B;&#x051E0;&#x079CD;:&#x08179;&#x06C34;,&#x06D88;&#x05316;&#x09053;&#x051FA;&#x08840;,&#x0809D;&#x06027;&#x08111;&#x075C5;,&#x0809D;&#x080BE;&#x07EFC;&#x05408;&#x05F81;,&#x0809D;&#x080BA;&#x07EFC;&#x05408;&#x05F81;,&#x066F4;&#x05BB9;&#x06613;&#x0611F;&#x067D3;&#x05176;&#x04ED6;&#x075C5;&#x06BD2;&#x06216;&#x07EC6;&#x083CC;,&#x05BB9;&#x06613;&#x075B2;&#x052B3;&#x03001;&#x09EC4;&#x075B8;&#x0FF08;&#x076AE;&#x080A4;&#x03001;&#x0773C;&#x0775B;&#x03001;&#x05C3F;&#x06DB2;&#x053D8;&#x09EC4;&#x0FF09;&#x07B49;,&#x0813E;&#x0529F;&#x080FD;&#x04EA2;&#x08FDB;&#x03002;&#x08BF7;&#x06CE8;&#x0610F;,&#x04EE5;&#x04E0A;&#x04FE1;&#x0606F;&#x04EC5;&#x04F9B;&#x053C2;&#x08003;,&#x05177;&#x04F53;&#x07684;&#x06CBB;&#x07597;&#x065B9;&#x06848;&#x0548C;&#x05E76;&#x053D1;&#x075C7;&#x07684;&#x098CE;&#x09669;&#x09700;&#x08981;&#x07ED3;&#x05408;&#x060A3;&#x08005;&#x07684;&#x05177;&#x04F53;&#x060C5;&#x051B5;&#x0548C;&#x0533B;&#x0751F;&#x07684;&#x04E13;&#x04E1A;&#x05EFA;&#x08BAE;&#x03002;&#x05EFA;&#x08BAE;&#x075C5;&#x060A3;&#x053CA;&#x065F6;&#x05C31;&#x0533B;,&#x063A5;&#x053D7;&#x04E13;&#x04E1A;&#x07684;&#x08BCA;&#x065AD;&#x0548C;&#x06CBB;&#x07597;&#x03002;</td>
</tr>
<tr>
<td valign="top" align="left">(Diseases suffered by patient 1576418_1 include hepatic cysts, post hepatitis cirrhosis, right pleural effusion, primary hepatocellular carcinoma of the left liver, hepatitis B virus carriage, and liver occupations. Complications that may result from these conditions include, but are not limited to, the following: ascites, gastrointestinal bleeding, hepatic encephalopathy, hepatorenal syndrome, hepatopulmonary syndrome, greater susceptibility to other viral or bacterial infections, easy fatigue, jaundice (yellowing of the skin, eyes, and urine), and conditions, and hypersplenism. Please note that the above information is for reference only, the specific treatment plan and the risk of complications need to be combined with the patient&#x00027;s specific situation and the doctor&#x00027;s professional advice. Patients are advised to seek prompt medical attention for professional diagnosis and treatment.)</td>
</tr></tbody>
</table>
</table-wrap>
<p>From the answering results of Q1 in <xref ref-type="table" rid="T7">Table 7</xref>, the answering results of the three models do not differ greatly, and the reply information is more complete. From the results of Q2, the ChatGLM model only mechanically replies to the patient with the information queried from the knowledge graph, while the GEBERT and Ours models are able to further analyse the connections between the data, with the Ours response being more in line with natural language conventions. In Q3, there are two other query relationships with backward and forward associations: &#x0201C;Patient-Disease&#x0201D; and &#x0201C;Disease-Complication,&#x0201D; which require that after querying the disease information, we should call the knowledge graph again to query the complication corresponding to the disease, and the system is set up to complete the query only once. Therefore, ChatGLM only replies the information of the disease, and GEBERT model replies on the basis of this information, which is more in line with the habit of natural language. Ours model gives some examples of complications corresponding to the disease, which can be used as a reference to the patient, and the answer is more complete.</p></sec></sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>This study developed a Chinese question-answering system for liver cancer based on a knowledge graph and large language model (LLM). A knowledge graph was constructed by integrating data from hospital electronic medical records and information from professional medical websites. The system employs ChatGLM&#x00027;s function-calling capability combined with the BERT model to achieve question parsing and accurate querying, with ChatGLM ultimately generating natural language responses. Experimental results demonstrate that the system achieves a question parsing accuracy of 92.34% and an overall accuracy of 74% in liver cancer-related multiple-choice tests, outperforming comparative models. Case studies further indicate that the system&#x00027;s responses are more comprehensive and clinically relevant.</p>
<p>The data utilized in this study primarily originates from electronic medical records of liver cancer patients at Zhujiang Hospital of Southern Medical University and public information from <ext-link ext-link-type="uri" xlink:href="http://xywy.com">http://xywy.com</ext-link>. This dual-source approach ensures data consistency and reliability during the initial development phase. By integrating and analyzing large volumes of real clinical data and public medical information, the system is endowed with a robust foundation of medical knowledge and patient cases, enabling efficient identification and accurate answering of liver cancer-related questions.</p>
<p>Electronic medical records from different hospitals vary in structure, terminology standards, and data completeness, which may affect the accuracy of entity recognition when the system is applied across hospitals. In the future, we will further incorporate electronic medical records from multiple sources, continuously expand the data scale, and comprehensively enhance the model&#x00027;s adaptability to various types of data to ensure that the system can perform stably and excellently in more application scenarios.</p>
<p>Question parsing, as the core component of the system, relies on ChatGLM&#x00027;s function calls for entity extraction and BERT for intent recognition, both of which are tightly coupled with the entity-relationship schema of the knowledge graph. This deep integration offers two key advantages: first, multi-technical synergy enables information complementarity. ChatGLM&#x00027;s function calls ensure precise entity identification, while BERT facilitates rapid intent parsing, significantly enhancing the accuracy and efficiency of semantic understanding. Second, the structured nature of the knowledge graph allows parsing results to be directly mapped to a professional knowledge network, ensuring evidence-based answers and mitigating the &#x0201C;hallucination&#x0201D; issue common in LLM-generated responses.</p>
<p>However, this model also has two limitations. (1) The entity schema is restricted, only supporting predefined entity types, making it difficult to handle emerging concepts or complex composite entities, which affects query completeness. (2) Knowledge inference is insufficient; directly returning no results when knowledge is missing impairs usability. Future optimizations will include introducing a dynamic entity schema expansion module to adapt to new concepts, and designing a probabilistic inference mechanism to quantitatively annotate knowledge gaps, with clear prompts of uncertainty when limited inference is allowed.</p>
<p>System performance was validated using automated metrics and comparisons with baseline models. Additionally, the designed single-hop, multi-hop, and complex reasoning multiple-choice questions were reviewed by hepatobiliary surgeons to ensure clinical relevance (<xref ref-type="bibr" rid="B14">Heyi et al., 2023</xref>). However, clinical validation is incomplete, lacking comparisons with authoritative clinical practice guidelines and evaluations by clinicians. In terms of user research, only automated metrics were considered, with no empirical usability testing. Future efforts will refine the evaluation framework by quantitatively aligning with clinical guidelines and conducting user studies involving liver cancer patients and healthcare providers to collect feedback on the system&#x00027;s practicality, usability, and credibility.</p>
<p>The system framework exhibits cross-domain adaptability. To extend it to other fields, three core steps are required: first, knowledge graph reconstruction, replacing the liver cancer specific graph with a domain specific one; second, retraining the intent recognition model using domain specific questions to fine tune the BERT intent classifier for accurate domain specific intent parsing; third, adapting LLM function parameters by updating ChatGLM&#x00027;s function calling parameters and defining domain specific entities and their descriptions to ensure precise entity extraction. This modular design enables efficient cross-domain migration, offering intelligent question answering support for a broader range of clinical scenarios.</p></sec>
<sec sec-type="conclusions" id="s5">
<title>5 Conclusions</title>
<p>In this paper, a liver cancer Q&#x00026;A system is designed to make full use of medical open resources and patient electronic medical records to provide data support for the Q&#x00026;A system. In order to fully analyse the patient&#x00027;s intention to provide more effective answers, the deep combination of medical knowledge graph with LLMs is explored in the question parsing section by using LLMs to extract the entities in the question and BERT to identify the user&#x00027;s intention. The experimental results show that the liver cancer Q&#x00026;A system with the introduction of knowledge graphs and LLMs is able to answer patients&#x00027; questions more comprehensively and effectively.</p>
<p>The liver cancer Q&#x00026;A system combining LLMs and knowledge graph is an exploratory project of combining LLMs and knowledge graph, and there are still many parts that deserve to be improved. The system currently only realizes the already use on the disease of liver cancer, and the application to other diseases and fields still needs to be further explored.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="ethics-statement" id="s7">
<title>Ethics statement</title>
<p>This study has obtained approval from the Ethics Committee of Zhujiang Hospital, Southern Medical University, with the ethical approval number 2022-KY-027-01. Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>HWu: Writing &#x02013; original draft, Methodology, Software. MZ: Writing &#x02013; original draft, Conceptualization, Data curation, Investigation. HWa: Writing &#x02013; review &#x00026; editing, Supervision. XJ: Funding acquisition, Writing &#x02013; review &#x00026; editing, Methodology. YG: Writing &#x02013; review &#x00026; editing, Project administration, Methodology. RH: Data curation, Writing &#x02013; review &#x00026; editing, Investigation. ZF: Writing &#x02013; review &#x00026; editing, Conceptualization, Project administration, Supervision. XH: Data curation, Resources, Writing &#x02013; review &#x00026; editing. YF: Data curation, Funding acquisition, Resources, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This research was funded by the Guangzhou Provincial Science and Technology Bureau, grant number 202206010093 and Shanghai Natural Science Foundation General Project, Nr.: 25ZR1401148.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Brown</surname> <given-names>T. B.</given-names></name> <name><surname>Mann</surname> <given-names>B.</given-names></name> <name><surname>Ryder</surname> <given-names>N.</given-names></name> <name><surname>Subbiah</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Language models are few-shot learners,&#x0201D;</article-title> in <source>Proceedings of the 34th International Conference on Neural Information Processing Systems</source> (<publisher-loc>Red Hook, NY, USA</publisher-loc>: <publisher-name>Curran Associates Inc.</publisher-name>), <fpage>1877</fpage>&#x02013;<lpage>1901</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chai</surname> <given-names>X.</given-names></name></person-group> (<year>2020</year>). <article-title>Diagnosis method of thyroid disease combining knowledge graph and deep learning</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>149787</fpage>&#x02013;<lpage>149795</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2020.3016676</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Shi</surname> <given-names>P.</given-names></name> <name><surname>Pu</surname> <given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>Data-pooling reinforcement learning for personalized healthcare intervention</article-title>. <source>arXiv preprint arXiv:2211.08998</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2211.08998</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><collab>Chinese Society of Liver Cancer and Chinese Anti-Cancer Association</collab></person-group>. (<year>2025</year>). <article-title>Chinese expert consensus on multidisciplinary treatment of liver cancer (2025)</article-title>. <source>J. Clin. Hepatol</source>. <volume>41</volume>, <fpage>1279</fpage>&#x02013;<lpage>1286</lpage>. <pub-id pub-id-type="doi">10.12449/JCH250709</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chowdhery</surname> <given-names>A.</given-names></name> <name><surname>Narang</surname> <given-names>S.</given-names></name> <name><surname>Devlin</surname> <given-names>J.</given-names></name></person-group> (<year>2024</year>). <article-title>Palm: scaling language modeling with pathways</article-title>. <source>J. Mach. Learn. Res</source>. <volume>24</volume>, <fpage>11324</fpage>&#x02013;<lpage>11436</lpage>. <pub-id pub-id-type="doi">10.48550/arXiv.2204.02311</pub-id></citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>W.</given-names></name> <name><surname>Xiao</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Song</surname> <given-names>Y.</given-names></name> <name><surname>Hwang</surname> <given-names>S.-w.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name></person-group> (<year>2017</year>). <article-title>Kbqa: learning question answering over qa corpora and knowledge bases</article-title>. <source>Proc. VLDB Endow</source>. <volume>10</volume>, <fpage>565</fpage>&#x02013;<lpage>576</lpage>. <pub-id pub-id-type="doi">10.14778/3055540.3055549</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Devlin</surname> <given-names>J.</given-names></name> <name><surname>Chang</surname> <given-names>M.-W.</given-names></name> <name><surname>Lee</surname> <given-names>K.</given-names></name> <name><surname>Toutanova</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Bert: pre-training of deep bidirectional transformers for language understanding,&#x0201D;</article-title> in <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source> (<publisher-loc>Minneapolis, Minnesota</publisher-loc>: <publisher-name>Association for Computational Linguistics</publisher-name>), <fpage>4171</fpage>&#x02013;<lpage>4186</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Du</surname> <given-names>Z.</given-names></name> <name><surname>Qian</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Ding</surname> <given-names>M.</given-names></name> <name><surname>Qiu</surname> <given-names>J.</given-names></name> <name><surname>Yang</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;GLM: general language model pretraining with autoregressive blank infilling,&#x0201D;</article-title> in <source>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</source> (<publisher-loc>Dublin, Ireland</publisher-loc>: <publisher-name>Association for Computational Linguistics</publisher-name>), <fpage>320</fpage>&#x02013;<lpage>335</lpage>. <pub-id pub-id-type="doi">10.18653/v1/2022.acl-long.26</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Dutt</surname> <given-names>R.</given-names></name> <name><surname>Bhattacharjee</surname> <given-names>K.</given-names></name> <name><surname>Gangadharaiah</surname> <given-names>R.</given-names></name> <name><surname>Roth</surname> <given-names>D.</given-names></name> <name><surname>Rose</surname> <given-names>C.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;PerKGQA: question answering over personalized knowledge graphs,&#x0201D;</article-title> in <source>Findings of the Association for Computational Linguistics: NAACL 2022</source> (<publisher-loc>Seattle, United States</publisher-loc>: <publisher-name>Association for Computational Linguistics</publisher-name>), <fpage>253</fpage>&#x02013;<lpage>268</lpage>. <pub-id pub-id-type="doi">10.18653/v1/2022.findings-naacl.19</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gai</surname> <given-names>R. L.</given-names></name> <name><surname>Gao</surname> <given-names>F.</given-names></name> <name><surname>Duan</surname> <given-names>L. M.</given-names></name> <name><surname>Sun</surname> <given-names>X. H.</given-names></name> <name><surname>Li</surname> <given-names>H. Z.</given-names></name></person-group> (<year>2014</year>). <article-title>&#x0201C;Bidirectional maximal matching word segmentation algorithm with rules,&#x0201D;</article-title> in <source>Progress in Applied Sciences, Engineering and Technology, volume 926 of Advanced Materials Research</source> (<publisher-loc>B&#x000E4;ch</publisher-loc>: <publisher-name>Trans Tech Publications Ltd</publisher-name>), <fpage>3368</fpage>&#x02013;<lpage>3372</lpage>. <pub-id pub-id-type="doi">10.4028/www.scientific.net/AMR.926-930.3368</pub-id></citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gentile</surname> <given-names>A. L.</given-names></name> <name><surname>Gruhl</surname> <given-names>D.</given-names></name> <name><surname>Ristoski</surname> <given-names>P.</given-names></name> <name><surname>Welch</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Personalized knowledge graphs for the pharmaceutical domain,&#x0201D;</article-title> in <source>The Semantic Web</source> - <italic>ISWC 2019</italic> (Cham: Springer International Publishing), <fpage>400</fpage>&#x02013;<lpage>417</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-30796-7_25</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Guu</surname> <given-names>K.</given-names></name> <name><surname>Lee</surname> <given-names>K.</given-names></name> <name><surname>Tung</surname> <given-names>Z.</given-names></name> <name><surname>Pasupat</surname> <given-names>P.</given-names></name> <name><surname>Chang</surname> <given-names>M.-W.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Realm: retrieval-augmented language model pre-training,&#x0201D;</article-title> in <source>Proceedings of the 37th International Conference on Machine Learning</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>3929</fpage>&#x02013;<lpage>3938</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://JMLR.org">https://JMLR.org</ext-link></citation>
</ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gyrard</surname> <given-names>A.</given-names></name> <name><surname>Gaur</surname> <given-names>M.</given-names></name> <name><surname>Shekarpour</surname> <given-names>S.</given-names></name> <name><surname>Thirunarayan</surname> <given-names>K.</given-names></name> <name><surname>Sheth</surname> <given-names>A.</given-names></name></person-group> (<year>2018</year>). <article-title>Personalized Health Knowledge Graph</article-title>. <source>CEUR Workshop Proc</source>. <volume>2317</volume>:<fpage>5</fpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Heyi</surname> <given-names>Z.</given-names></name> <name><surname>Xin</surname> <given-names>W.</given-names></name> <name><surname>Lifan</surname> <given-names>H.</given-names></name> <name><surname>Zhao</surname> <given-names>L.</given-names></name> <name><surname>Zirui</surname> <given-names>C.</given-names></name></person-group> (<year>2023</year>). Research on question answering system on the joint of knowledge graph and large language models. <volume>17</volume>, <fpage>1</fpage>&#x02013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.3778/j.issn.1673-9418.2308070</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Lan</surname> <given-names>Y.</given-names></name> <name><surname>He</surname> <given-names>G.</given-names></name> <name><surname>Jiang</surname> <given-names>J.</given-names></name> <name><surname>Jiang</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>W. X.</given-names></name> <name><surname>Wen</surname> <given-names>J.-R.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;A survey on complex knowledge base question answering: Methods, challenges and solutions,&#x0201D;</article-title> in <source>Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence Survey Track</source> (<publisher-loc>Montreal, QC</publisher-loc>: <publisher-name>International Joint Conferences on Artificial Intelligence (IJCAI</publisher-name>)). <pub-id pub-id-type="doi">10.24963/ijcai.2021/611</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Lu</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>Knowledge map construction for question and answer archives</article-title>. <source>Expert Syst. Appl</source>. <volume>141</volume>:<fpage>112923</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2019.112923</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>T.</given-names></name> <name><surname>Ma</surname> <given-names>X.</given-names></name> <name><surname>Zhuang</surname> <given-names>A.</given-names></name> <name><surname>Gu</surname> <given-names>Y.</given-names></name> <name><surname>Su</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>W.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Few-shot in-context learning on knowledge base question answering,&#x0201D;</article-title> in <source>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics</source> (<publisher-loc>Toronto, Canada</publisher-loc>: <publisher-name>Association for Computational Linguistics</publisher-name>), <fpage>6966</fpage>&#x02013;<lpage>6980</lpage>. <pub-id pub-id-type="doi">10.18653/v1/2023.acl-long.385</pub-id></citation>
</ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>R.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>G.</given-names></name></person-group> (<year>2024</year>). <article-title>An enhanced prompt-based llm reasoning scheme via knowledge graph-integrated collaboration</article-title>. <source>arXiv preprint arXiv:2402.04978</source>. <pub-id pub-id-type="doi">10.1007/978-3-031-72344-5_17</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li&#x000E9;vin</surname> <given-names>V.</given-names></name> <name><surname>Hother</surname> <given-names>C. E.</given-names></name> <name><surname>Motzfeldt</surname> <given-names>A. G.</given-names></name> <name><surname>Winther</surname> <given-names>O.</given-names></name></person-group> (<year>2024</year>). <article-title>Can large language models reason about medical questions?</article-title> <source>Patterns</source> <volume>5</volume>:<fpage>100943</fpage>. <pub-id pub-id-type="doi">10.1016/j.patter.2024.100943</pub-id><pub-id pub-id-type="pmid">38487804</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Qin</surname> <given-names>X.</given-names></name> <name><surname>Xu</surname> <given-names>R.</given-names></name> <name><surname>Dimitriadis</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Personalized federated learning with adaptive batchnorm for healthcare</article-title>. <source>IEEE Trans. Big Data</source> 10. <pub-id pub-id-type="doi">10.1109/TBDATA.2022.3177197</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>Y.</given-names></name> <name><surname>Yang</surname> <given-names>B.</given-names></name> <name><surname>Xu</surname> <given-names>D.</given-names></name> <name><surname>Tian</surname> <given-names>L.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;A survey: complex knowledge base question answering,&#x0201D;</article-title> in <source>2022 IEEE 2nd International Conference on Information Communication and Software Engineering (ICICSE)</source> (<publisher-loc>Chongqing</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>46</fpage>&#x02013;<lpage>52</lpage>. <pub-id pub-id-type="doi">10.1109/ICICSE55337.2022.9828967</pub-id></citation>
</ref>
<ref id="B22">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Maynez</surname> <given-names>J.</given-names></name> <name><surname>Narayan</surname> <given-names>S.</given-names></name> <name><surname>Bohnet</surname> <given-names>B.</given-names></name> <name><surname>McDonald</surname> <given-names>R.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;On faithfulness and factuality in abstractive summarization,&#x0201D;</article-title> in <source>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</source> (<publisher-loc>Washington, DC</publisher-loc>: <publisher-name>Association for Computational Linguistics</publisher-name>), <fpage>1906</fpage>&#x02013;<lpage>1919</lpage>. <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.173</pub-id></citation>
</ref>
<ref id="B23">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Oduro-Afriyie</surname> <given-names>J.</given-names></name> <name><surname>Jamil</surname> <given-names>H. M.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Enabling the informed patient paradigm with secure and personalized medical question answering,&#x0201D;</article-title> in <source>Proceedings of the 14th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics</source> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.1145/3584371.3613016</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Shirai</surname> <given-names>S.</given-names></name> <name><surname>Seneviratne</surname> <given-names>O.</given-names></name> <name><surname>McGuinness</surname> <given-names>D. L.</given-names></name></person-group> (<year>2021</year>). <article-title>Applying personal knowledge graphs to health</article-title>. <source>arXiv preprint</source> arXiv: 2104.07587. Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2104.07587">https://arxiv.org/abs/2104.07587</ext-link></citation>
</ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>A.</given-names></name> <name><surname>PourNejatian</surname> <given-names>N.</given-names></name> <name><surname>Shin</surname> <given-names>H. C.</given-names></name> <name><surname>Smith</surname> <given-names>K. E.</given-names></name> <name><surname>Parisien</surname> <given-names>C.</given-names></name></person-group> (<year>2022</year>). <article-title>A large language model for electronic health records</article-title>. <source>NPJ Digit. Med</source>. <volume>5</volume>:<fpage>194</fpage>. <pub-id pub-id-type="doi">10.1038/s41746-022-00742-2</pub-id><pub-id pub-id-type="pmid">36572766</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Rao</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>M.</given-names></name> <name><surname>Kang</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>Multi-layer information fusion based on graph convolutional network for knowledge-driven herb recommendation</article-title>. <source>Neural Netw</source>. <volume>146</volume>, <fpage>1</fpage>&#x02013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1016/j.neunet.2021.11.010</pub-id><pub-id pub-id-type="pmid">34826774</pub-id></citation></ref>
</ref-list>
</back>
</article>