<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2024.1369765</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Oncology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Predicting risk factors for Epstein-Barr virus reactivation using Bayesian network analysis: a population-based study of high-risk areas for nasopharyngeal cancer</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Zeng</surname>
<given-names>Zhiwen</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2627851"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lin</surname>
<given-names>Kena</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Xueqi</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Tong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1509117"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Xiaoman</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Jiayi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2642120"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ning</surname>
<given-names>Zule</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Qinxian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xie</surname>
<given-names>Shanghang</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Cao</surname>
<given-names>Sumei</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1891329"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Du</surname>
<given-names>Jinlin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>School of Public Health, Guangdong Medical University</institution>, <addr-line>Dongguan, Guangdong</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Cancer Prevention, Sun Yat-sen University Cancer Center</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>School of Public Health, Sun Yat-sen University</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>State Key Laboratory of Oncology in South China, Collaborative Innovation Center for Cancer Medicine, and Guangdong Key Laboratory of Nasopharyngeal Carcinoma Diagnosis and Therapy, Sun Yat-Sen University Cancer Center</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Hussain Gadelkarim Ahmed, Prof. Medical Research Consultancy Center -MRCC, Sudan</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Ahmed Abdalla Agab Eldour, Kordofan University, South Sudan</p>
<p>Zhenyu Dai, Stanford University, United States</p>
<p>Xiaolong Wang, Temple University, United States</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Sumei Cao, <email xlink:href="mailto:caosm@sysucc.org.cn">caosm@sysucc.org.cn</email>; Jinlin Du, <email xlink:href="mailto:dujinlin@gdmu.edu.cn">dujinlin@gdmu.edu.cn</email>
</p>
</fn>
<fn fn-type="present-address" id="fn003">
<p>&#x2020;Present address: Zhiwen Zeng, Department of Business Management, Longhua District Chronic Disease Control Center, Shenzhen</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>01</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>14</volume>
<elocation-id>1369765</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>12</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Zeng, Lin, Li, Li, Li, Li, Ning, Liu, Xie, Cao and Du</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Zeng, Lin, Li, Li, Li, Li, Ning, Liu, Xie, Cao and Du</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Background and objective</title>
<p>Nasopharyngeal carcinoma (NPC) is a rare disease in most parts of the world, but it is highly prevalent in South China. Epstein-Barr virus (EBV) is one of the major risk factors for NPC. Hence, understanding the factors associated with the reactivation of EBV from the latent stage is crucial for preventing NPC. This study aimed to investigate the risk factors for EBV reactivation associated with NPC in high-prevalence areas in China using a Bayesian network (BN) model combined with structural equation modeling tools.</p>
</sec>
<sec>
<title>Methods</title>
<p>The baseline information for this study was derived from NPC screening data from a population-based prospective cohort in Sihui City, Guangdong Province, China. We divided the data into a training dataset and a test dataset. We then constructed an interaction networktionba BN prediction model to explore the risk factors for EBV reactivation, which was compared with a conventional logistic regression model.</p>
</sec>
<sec>
<title>Results</title>
<p>A total of 12,579 participants were included in the analyses, with 1596 participant pairs finally included after the use of a nested case-control study. The results of multivariable logistic regression showed that only being older than 60 years (OR = 1.718, 95% CI = 1.273,2.322) and being a current smoker (OR = 1.477, 95% CI = 1.167 - 1.872) were the risk factors for EBV reactivation. The results of the model constructed using BN showed that age and smoking were directly associated with EBV reactivation. In contrast, sex, education level, tea drinking, cooking, and family history of cancer were indirectly associated with EBV reactivation. Further, we predicted the risk of EBV reactivation using Bayesian inference and visualized the BN inference. Model prediction performance was evaluated using the test dataset. The results showed that the BN model slightly outperformed the traditional logistic regression model in all metrics.</p>
</sec>
<sec>
<title>Conclusions</title>
<p>BN not only reflects the complex interaction between factors but also visualizes the prediction results. It has a promising application potential in the risk prediction of EBV reactivation associated with NPC.</p>
</sec>
</abstract>
<kwd-group>
<kwd>Bayesian network</kwd>
<kwd>EBV reactivation</kwd>
<kwd>model construction</kwd>
<kwd>nasopharyngeal carcinoma</kwd>
<kwd>logistic regression</kwd>
</kwd-group>
<contract-num rid="cn001">2020ZDZX1048</contract-num>
<contract-num rid="cn002">82073625</contract-num>
<contract-sponsor id="cn001">Department of Education of Guangdong Province<named-content content-type="fundref-id">10.13039/501100010226</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
<counts>
<fig-count count="3"/>
<table-count count="6"/>
<equation-count count="0"/>
<ref-count count="47"/>
<page-count count="12"/>
<word-count count="5894"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Cancer Epidemiology and Prevention</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Nasopharyngeal carcinoma (NPC) is a rarely diagnosed disease in most parts of the world, with an age-standardized incidence rate usually lower than 1 case per 100,000 person-years. However, the rates are high in South China, South-East Asia, the Arctic, North Africa, and the Middle East among indigenous groups (<xref ref-type="bibr" rid="B1">1</xref>). Research has shown that the 5-year survival rate of stage I NPC in high-prevalence areas is as high as &#x2265;85%. However, the 5-year survival rate of stage IV patients is only 20%, and the 5-year survival rate after distant metastasis is less than 5% (<xref ref-type="bibr" rid="B2">2</xref>). The proportion of early-stage patients among the patients attending the clinic is less than 30%. Most patients are already in advanced stages at the time of consultation because of the hidden location of the nasopharyngeal cavity and the lack of specificity of the early symptoms of NPC (<xref ref-type="bibr" rid="B3">3</xref>). Therefore, the early detection, diagnosis, and treatment of NPC are now central to its prevention and treatment. The Epstein-Barr virus (EBV) is a ubiquitous B-lymphotropic virus carried latently by almost all humans. It is usually first contracted in childhood, during which it either causes no symptoms or only mild ones (<xref ref-type="bibr" rid="B4">4</xref>). The International Agency for Research on Cancer has shown that EBV is an established cause of several human malignancies, including nasopharyngeal cancer (<xref ref-type="bibr" rid="B5">5</xref>). Approximately 95% of the global population is asymptomatically infected with EBV throughout their lives. However, EBV can be periodically reactivated in response to endogenous and environmental stresses (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>). During the transition of EBV from a latent infection state to a lytic replication phase, several latent and lytic gene products are expressed, contributing to epithelial cell genetic damage, immune system perturbation, and angiogenesis in the nasopharynx, thereby increasing the risk of nasopharyngeal carcinoma (<xref ref-type="bibr" rid="B8">8</xref>). Our previous studies have demonstrated that serum immunoglobulin A (IgA) antibodies against EBV nuclear antigen 1 (EBNA1/IgA) and viral capsid antigen (VCA/IgA) serve as serological biomarkers of EBV activation and are effective in predicting nasopharyngeal cancer (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>). Thus, understanding the risk factors for EBV reactivation associated with NPC is essential for preventing nasopharyngeal cancer.</p>
<p>It was previously found through the construction of logistic models that smoking (<xref ref-type="bibr" rid="B11">11</xref>), using solid fuel (<xref ref-type="bibr" rid="B12">12</xref>), and consuming salty food (<xref ref-type="bibr" rid="B12">12</xref>) were associated with EBV reactivation. However, the conventional logistic regression models fail to capture the complex network of interactions between multiple risk factors. Also, the assumption of linear additivity of the model may limit its use, leading to reduced efficacy of regression model tests and regression model failure (<xref ref-type="bibr" rid="B13">13</xref>&#x2013;<xref ref-type="bibr" rid="B15">15</xref>). Furthermore, the model cannot identify direct or indirect risk factors. Accordingly, screening variables or constructing models based on the network structures is significant for analyzing the risk factors for EBV reactivation associated with NPC.</p>
<p>Bayesian networks (BNs), which were proposed by Judea Pearl (<xref ref-type="bibr" rid="B16">16</xref>), can better compensate for the shortcomings of logistic regression models. BNs comprise two components: a directed acyclic graph (DAG) that reflects the complex network of interactions among risk factors and a conditional probability table that depicts the correlation between variables (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>). The BN overcomes the correlation restrictions of the traditional regression models (i.e., the assumption of independence between variables), and it can also infer the probability of an unknown node when the node is known. An increasing number of studies have applied BN for predicting risk factors for diseases such as cardiovascular disease (<xref ref-type="bibr" rid="B19">19</xref>), stroke (<xref ref-type="bibr" rid="B20">20</xref>), and colorectal cancer prognosis (<xref ref-type="bibr" rid="B21">21</xref>). However, reports regarding the application of the BN in NPC studies, especially in the context of China, are limited. In addition, no BN modeling studies related to predicting the risk of EBV reactivation have been found. Therefore, considering the problems of the traditional prediction model, we attempted to construct the EBV reactivation prediction model based on the interactive network system and explore the predictive effect of the model, which is of great significance to public health.</p>
<p>Unbalanced datasets can lead to degraded model performance, so balancing the classes of EBV reactivation states is crucial for constructing BN models (<xref ref-type="bibr" rid="B22">22</xref>). Also, we used a nested case&#x2013;control study approach and processed the data by matching 1:1 for several baseline characteristics and ill-defined variables to make the data more comparable and control for confounding bias as much as possible. Besides, in BN structure learning, we applied structural equation modeling tools, enhancing the stability of the constructed structures (<xref ref-type="bibr" rid="B23">23</xref>). In this study, we combined screening data from a high-prevalence area of NPC in South China, aiming to explore the risk factors for EBV reactivation by constructing a structural equation modeling (SEM)-treated BN model using a nested case&#x2013;control study. The purpose was to intervene and reduce the risk of NPC in an early stage and provide a new idea for the prevention and treatment of NPC.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Data sources and study participants</title>
<p>The baseline information for this study was obtained from a population-based prospective cohort study (South China Chronic Disease Cohort) in Sihui City, Guangdong Province, designed to systematically investigate the risk factors for common noncommunicable diseases in the local population. A total of 12,619 native permanent residents (defined as having lived in Sihui City for at least 6 months) aged 18 years or older were recruited between October 2017 and March 2021 to participate in the baseline study. The study was approved by the Human Ethics Committee of the Sun Yat-sen University Cancer Control Centre (SYSUCC), and written informed consent was obtained from all the participants.</p>
<p>Each participant was required to complete a structured questionnaire after being interviewed by a trained investigator. The questionnaire included basic demographic characteristics (sex, age, education level, marital status, income, etc.), general health status (personal and family history of communicable and noncommunicable diseases, family history of cancer, history of drug use, etc.), smoking and alcohol consumption habits, diet, indoor air pollution, physical activity, female reproductive history, and sleep and mental statuses. The physical examination included measurements of height, weight, body fat, waist and hip circumference, electrocardiogram, and blood pressure, as well as the collection and storage of blood, urine, and mouthwash samples.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>EBV serological antibody testing</title>
<p>The collected blood samples were centrifuged and transported by a cold chain to the central laboratory of the SYSUCC for testing. The levels of two biomarkers, EBNA1/IgA (Zhongshan Biotechnology, Zhongshan, China) and VCA/IgA (Euroimmun, L&#xfc;beck, Germany), were measured by enzyme-linked immunosorbent assay (ELISA) using commercial kits (<xref ref-type="bibr" rid="B24">24</xref>). The levels of these serum markers were determined using photometric methods following the manufacturer&#x2019;s protocols. EBV antibody levels were standardized by the ratio of the optical density of the sample to the reference control (rOD). According to the ELISA kit standards, the criterion for positivity was <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mtext>rOD</mml:mtext>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0.7</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> for EBNA1/IgA and <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:mtext>rOD</mml:mtext>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0.8</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> for VCA/IgA. The risk scores of the NPC were calculated using a risk prediction model: <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:mtext>Logit</mml:mtext>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>B</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3.934</mml:mn>
<mml:mo>+</mml:mo>
<mml:mn>2.203</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>VCA</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>IgA</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mn>4.797</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mtext>EBNA</mml:mtext>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>IgA</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> (<xref ref-type="bibr" rid="B24">24</xref>). A predefined serological algorithm was used for risk stratification (low risk: <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:mtext>PROB</mml:mtext>
<mml:mo>&lt;</mml:mo>
<mml:mn>0.65</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>; medium risk: <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:mn>0.65</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>PROB</mml:mtext>
<mml:mo>&lt;</mml:mo>
<mml:mn>0.98</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>; high risk: <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:mtext>PROB</mml:mtext>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0.98</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B25">25</xref>). Based on this algorithm, the samples classified as intermediate and high risk were considered positive for EBV reactivation, whereas those with low risk were judged to be negative for EBV reactivation.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Nested case&#x2013;control study</title>
<p>We used a nested case&#x2013;control study approach to improve the comparability of the data. The EBV reactivation status associated with NPC was used as the dependent variable, matched 1:1 by marriage and income. A total of 1596 pairs of participants were eventually included in the study. Furthermore, 70% of these data were used as a training set for the constructed model, and the remaining 30% were used as a test set to validate the performance of the constructed model. Finally, the training and test sets comprised 1124 and 472 pairs of participants, respectively.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Bayesian network</title>
<p>A BN is a DAG based on probabilistic inference, which consists of a set of nodes representing each variable and arcs showing the relationships between the nodes (<xref ref-type="bibr" rid="B26">26</xref>, <xref ref-type="bibr" rid="B27">27</xref>). BN can represent the complex relationships of variables in a given problem in a network structure, reflecting the dependencies of variables in the problem domain through a network model suitable for representing and reasoning about uncertain knowledge. Because of its acyclic nature, the relationships between variables within a BN are often described by &#x201c;family analogies,&#x201d; where the nodes of interest may have parent and child nodes (<xref ref-type="bibr" rid="B28">28</xref>). When an arc extends from variable A to variable B (A &#x2192; B), this indicates that variable A either has a direct influence on variable B or is a risk factor for it. In this context, variable A is referred to as the parent node of variable B, and variable B is considered the child node of variable A (<xref ref-type="bibr" rid="B20">20</xref>). BNs comprise two components: structure learning and parameter learning. Structure learning involves determining an appropriate BN topology using a training sample set and combining it with prior knowledge. Parameter learning entails determining the conditional probability densities at each node given the BN topology.</p>
<p>This study used the following structural learning algorithms to construct BNs and compared them: score-based TABU search algorithm (<xref ref-type="bibr" rid="B29">29</xref>), hybrid-based max-min hill-climbing (MMHC) algorithm (<xref ref-type="bibr" rid="B30">30</xref>), and combined TABU and MMHC algorithm. We also attempted to construct averaged models learned using the same algorithms but applying model averaging techniques over an ensemble of 5000 network structures so as to give the model better predictive performance and reduce overfitting (<xref ref-type="bibr" rid="B31">31</xref>). Hence, six BN structures were constructed in this study using the following algorithms: TABU, MMHC, TABU after model averaging (Avg.TABU), MMHC after model averaging (Avg.MMHC), TABU combined with MMHC algorithm (TABU + MMHC), and TABU after model averaging and MMHC after model averaging combined algorithm (Avg.TABU + MMHC). We used Bayesian posterior estimation for parameter learning of the BN.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Structural equation modeling</title>
<p>When using different BN algorithms to obtain the causal structure, SEM can stabilize the variability of the results and compare the received models to obtain a more reliable structure (<xref ref-type="bibr" rid="B32">32</xref>). SEM was applied in this study to select the most reliable structure among the BNs constructed by combining algorithms (TABU + MMHC and Avg.TABU + MMHC). An example of a BN constructed by combining the TABU and MMHC algorithms was given. An initial network structure was first constructed, including arcs that appeared in both structures. The initial structure was then fitted to the data using SEM. Next, uncertain arcs (e.g., arcs that appeared in the TABU-based model structure but not in the MMHC-based model structure) were sequentially added to the model, which was then compared using chi-square tests to retain the best structure for model fitting (<xref ref-type="bibr" rid="B33">33</xref>).</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Definition of variables</title>
<p>The information of interest was collected through questionnaires, and the continuous variables were transformed into categorical variables such as age (&#x2264;40, 41&#x2013;50, 51&#x2013;60, and &gt;60), body mass index (&lt;18.5, 18.5&#x2013;24.9, 25.0&#x2013;29.9, and &#x2265;30 kg/m<sup>2</sup>), and income (&lt;50,000, 50,000&#x2013;80,000, 80,000&#x2013;100,000, and &#x2265;100,000 &#xa5;). Other categorical variables included sex (female and male), education level (primary school or less, secondary school, and college/university or more), marriage (married and unmarried), smoking (nonsmokers, former smokers, and current smokers), preserved foods (no and yes), cooking (no and yes), family history of cancer (no/yes), tea consumption (non-tea drinkers, occasional tea drinkers, and regular tea drinkers), soup consumption (no/yes), and herbal tea consumption (no/yes). Current smokers were defined as those who had smoked 100 cigarettes in their lifetime and were still smoking, former smokers as those who had quit smoking for more than 6 months, and nonsmokers as those who did not meet these criteria. Regular tea drinkers were defined as those who drank tea at least 1 day a week, occasional tea drinkers as those who drank tea at most one to three times a month, and non-tea drinkers as those who did not fulfill these criteria.</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Statistical analysis</title>
<p>The categorical variables were expressed as percentages. We used univariate logistic regression to initially analyze the potential risk factors for EBV reactivation and multivariate stepwise logistic regression analyses for variables that were statistically significant as independent variables. All statistical analyses were performed using R software (version 4.2.2, R Core Team, Vienna, Austria). <italic>P</italic> values &lt;0.05 indicated statistically significant differences. BN structure learning was implemented using the &#x201c;bnlearn&#x201d; package (<xref ref-type="bibr" rid="B34">34</xref>) in R software, and SEM was implemented using the &#x201c;lavaan&#x201d; package (<xref ref-type="bibr" rid="B33">33</xref>). The tenfold cross-validation was applied to evaluate the prediction performance of BN models constructed using different structural algorithms. Bayesian posterior estimation was used to learn the parameters of the BN model. This study also used Netica software (version 6.09, Norsys Software Corp., BC, Canada) to visualize the BN model and BN inference. Finally, we compared the predictive performance of the logistic model with that of the BN model using confusion matrix calculations.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Baseline characteristics of the study population</title>
<p>In this study, 23 patients with NPC previously, 17 participants with missing essential information, 2 patients with liver and lung malignancies, and 1 participant with duplicate information were excluded, leaving 12,579 participants in the analysis. The sample screening flowchart is shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>. The baseline characteristics of the study participants are depicted in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. The mean age of the participants was 51.67 &#xb1; 9.52 years (range: 20&#x2013;77 years). More female patients were recruited into the study, with a male-to-female sex ratio of approximately 2:5 (3724:8855). Most participants (92.14%) were married, and more than half (54.29%) had completed secondary education. Further, 65.82% of the participants were of normal weight, 27.03% were overweight, and almost 30% (30.55%) had a family history of cancer. Most participants were in the habit of drinking soup (92.35%) and herbal tea (71.47%).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The screening process for research samples.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1369765-g001.tif"/>
</fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Characteristics of the participants in this study, conducted in Sihui City, Guangdong Province, China, during 2017&#x2013;2021 (<italic>n</italic> = 12,579).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Characteristic</th>
<th valign="top" align="left">Research objects (<italic>n</italic> = 12579)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">
<bold>Age, y</bold>(<inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>X</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mo>&#xb1;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>)</td>
<td valign="middle" align="left">51.67 &#xb1; 9.52</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Age group(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">&#x2264;40</td>
<td valign="top" align="left">1650(13.12)</td>
</tr>
<tr>
<td valign="top" align="left">41-</td>
<td valign="top" align="left">4250(33.79)</td>
</tr>
<tr>
<td valign="top" align="left">51-</td>
<td valign="top" align="left">4103(32.62)</td>
</tr>
<tr>
<td valign="top" align="left">&gt;60</td>
<td valign="top" align="left">2576(20.48)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Sex(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Female</td>
<td valign="top" align="left">8855(70.40)</td>
</tr>
<tr>
<td valign="top" align="left">Male</td>
<td valign="top" align="left">3724(29.60)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Class_P(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Low-risk</td>
<td valign="top" align="left">10965(87.17)</td>
</tr>
<tr>
<td valign="top" align="left">Medium-risk</td>
<td valign="top" align="left">1196(9.51)</td>
</tr>
<tr>
<td valign="top" align="left">High-risk</td>
<td valign="top" align="left">418(3.32)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Education level(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Primary school or less</td>
<td valign="top" align="left">3703(29.44)</td>
</tr>
<tr>
<td valign="top" align="left">Secondary school</td>
<td valign="top" align="left">6829(54.29)</td>
</tr>
<tr>
<td valign="top" align="left">College/University or more</td>
<td valign="top" align="left">2042(16.23)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">5(0.04)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Marital status(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Married</td>
<td valign="top" align="left">11590(92.14)</td>
</tr>
<tr>
<td valign="top" align="left">Unmarried</td>
<td valign="top" align="left">974(7.74)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">15(0.12)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Cigarette smoking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Never</td>
<td valign="top" align="left">10419(82.83)</td>
</tr>
<tr>
<td valign="top" align="left">Current</td>
<td valign="top" align="left">1741(13.84)</td>
</tr>
<tr>
<td valign="top" align="left">Former</td>
<td valign="top" align="left">411(3.27)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">8(0.06)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Preserved food(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">8025(63.80)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">4548(36.16)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">6(0.05)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Cooking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">1503(11.95)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">11055(87.88)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">21(0.17)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Income(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">&lt;50,000</td>
<td valign="top" align="left">2633(20.93)</td>
</tr>
<tr>
<td valign="top" align="left">50,000-80,000</td>
<td valign="top" align="left">3266(25.96)</td>
</tr>
<tr>
<td valign="top" align="left">80,000-100,000</td>
<td valign="top" align="left">3002(23.87)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2265;100,000</td>
<td valign="top" align="left">3664(29.13)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">14(0.11)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Family history of cancer(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">8299(65.98)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">3843(30.55)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">437(3.47)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Tea drinking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Never</td>
<td valign="top" align="left">5776(45.92)</td>
</tr>
<tr>
<td valign="top" align="left">Former</td>
<td valign="top" align="left">4386(34.87)</td>
</tr>
<tr>
<td valign="top" align="left">Current</td>
<td valign="top" align="left">2405(19.12)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">12(0.10)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Herbal tea drinking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">3567(28.36)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">8990(71.47)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">22(0.17)</td>
</tr>
<tr>
<th valign="top" colspan="2" align="left">Soup drinking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">933(7.42)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">11617(92.35)</td>
</tr>
<tr>
<td valign="top" align="left">Others</td>
<td valign="top" align="left">29(0.23)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Based on the EBV reactivation status, the study participants were matched 1:1 by marriage and income, including 1124 EBV reactivation-positive and 1124 EBV reactivation-negative patients in the training set. <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> presents the baseline characteristics of the study participants after matching. The EBV reactivation-positive group had a higher percentage of individuals who were 60 years of age or older, were male, had a primary school education or less, and were both former and current smokers compared with the EBV reactivation-negative group.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Characteristics of the participants after matching (<italic>n</italic> = 2248).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="left">Characteristic</th>
<th valign="top" colspan="2" align="left">EBV reactivation status</th>
</tr>
<tr>
<th valign="top" align="left">Negative(<italic>n</italic> = 1124)</th>
<th valign="top" align="left">Positive(<italic>n</italic> = 1124)</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" colspan="3" align="left">Age group(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">&#x2264;40</td>
<td valign="top" align="left">144(12.81)</td>
<td valign="top" align="left">115(10.23)</td>
</tr>
<tr>
<td valign="top" align="left">41-</td>
<td valign="top" align="left">366(32.56)</td>
<td valign="top" align="left">312(27.76)</td>
</tr>
<tr>
<td valign="top" align="left">51-</td>
<td valign="top" align="left">377(33.54)</td>
<td valign="top" align="left">362(32.21)</td>
</tr>
<tr>
<td valign="top" align="left">&gt;60</td>
<td valign="top" align="left">237(21.09)</td>
<td valign="top" align="left">335(29.80)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Sex(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Female</td>
<td valign="top" align="left">809(71.98)</td>
<td valign="top" align="left">732(65.12)</td>
</tr>
<tr>
<td valign="top" align="left">Male</td>
<td valign="top" align="left">315(28.02)</td>
<td valign="top" align="left">392(34.88)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Education level(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Primary school or less</td>
<td valign="top" align="left">347(30.87)</td>
<td valign="top" align="left">381(33.90)</td>
</tr>
<tr>
<td valign="top" align="left">Secondary school</td>
<td valign="top" align="left">597(53.11)</td>
<td valign="top" align="left">606(53.91)</td>
</tr>
<tr>
<td valign="top" align="left">College/University or more</td>
<td valign="top" align="left">180(16.01)</td>
<td valign="top" align="left">137(12.19)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Cigarette smoking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Never</td>
<td valign="top" align="left">942(83.81)</td>
<td valign="top" align="left">869(77.31)</td>
</tr>
<tr>
<td valign="top" align="left">Current</td>
<td valign="top" align="left">143(12.72)</td>
<td valign="top" align="left">207(18.42)</td>
</tr>
<tr>
<td valign="top" align="left">Former</td>
<td valign="top" align="left">39(3.47)</td>
<td valign="top" align="left">48(4.27)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Preserved food(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">712(63.35)</td>
<td valign="top" align="left">747(66.46)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">412(36.65)</td>
<td valign="top" align="left">377(33.54)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Cooking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">113(10.05)</td>
<td valign="top" align="left">145(12.90)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">1011(89.95)</td>
<td valign="top" align="left">979(87.10)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Family history of cancer(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">788(70.11)</td>
<td valign="top" align="left">791(70.37)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">336(29.89)</td>
<td valign="top" align="left">333(29.63)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Tea drinking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Never</td>
<td valign="top" align="left">536(47.69)</td>
<td valign="top" align="left">500(44.48)</td>
</tr>
<tr>
<td valign="top" align="left">Former</td>
<td valign="top" align="left">379(33.72)</td>
<td valign="top" align="left">374(33.27)</td>
</tr>
<tr>
<td valign="top" align="left">Current</td>
<td valign="top" align="left">209(18.59)</td>
<td valign="top" align="left">250(22.24)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Herbal tea drinking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">332(29.54)</td>
<td valign="top" align="left">315(28.02)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">792(70.46)</td>
<td valign="top" align="left">809(71.98)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Soup drinking(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="left">91(8.10)</td>
<td valign="top" align="left">75(6.67)</td>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="top" align="left">1033(91.90)</td>
<td valign="top" align="left">1049(93.33)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Marital status(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">Married</td>
<td valign="top" align="left">1031(91.73)</td>
<td valign="top" align="left">1031(91.73)</td>
</tr>
<tr>
<td valign="top" align="left">Unmarried</td>
<td valign="top" align="left">93(8.27)</td>
<td valign="top" align="left">93(8.27)</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Income(<italic>n</italic>,%)</th>
</tr>
<tr>
<td valign="top" align="left">&lt;50,000</td>
<td valign="top" align="left">252(22.42)</td>
<td valign="top" align="left">252(22.42)</td>
</tr>
<tr>
<td valign="top" align="left">50,000-80,000</td>
<td valign="top" align="left">324(28.83)</td>
<td valign="top" align="left">324(28.83)</td>
</tr>
<tr>
<td valign="top" align="left">80,000-100,000</td>
<td valign="top" align="left">257(22.86)</td>
<td valign="top" align="left">257(22.86)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2265;100,000</td>
<td valign="top" align="left">291(25.89)</td>
<td valign="top" align="left">291(25.89)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Univariate analysis</title>
<p>We first performed univariate logistic regression analysis to investigate the potential risk factors for EBV reactivation. The results showed that consuming preserved food, having a family history of cancer, and drinking herbal tea and soup had no statistically significant effect on EBV reactivation (<italic>P</italic> &gt; 0.05). However, sex (OR = 1.375, 95% CI <italic>=</italic> 1.150&#x2013;1.645 for men), age (OR = 1.770, 95% CI <italic>=</italic> 1.316&#x2013;2.380 for age more than 60 years), education level (OR = 0.693, 95% CI <italic>=</italic> 0.532&#x2013;0.904 for college/university or more), smoking status (OR = 1.569, 95% CI <italic>=</italic> 1.244&#x2013;1.979 for current smokers), cooking (OR = 0.755, 95% CI <italic>=</italic> 0.581&#x2013;0.980), and tea drinking (OR = 1.282, 95% CI <italic>=</italic> 1.029&#x2013;1.599 for regular tea drinkers) had a statistically significant effect on EBV reactivation (<italic>P</italic> &lt; 0.05) (<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>).</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Influencing factors of EBV reactivation status analyzed using univariate logistic regression (<italic>n</italic> = 2248).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Variable</th>
<th valign="middle" align="left">OR(95% CI)</th>
<th valign="middle" align="left">
<italic>P</italic>
</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" colspan="3" align="left">Sex</th>
</tr>
<tr>
<td valign="top" align="left">Female</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Male</td>
<td valign="middle" align="left">
<bold>1.375(1.150,1.645)</bold>
</td>
<td valign="middle" align="left">
<bold>&lt;0.001</bold>
</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Age, y</th>
</tr>
<tr>
<td valign="top" align="left">&#x2264;40</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">41-</td>
<td valign="middle" align="left">1.067(0.800,1.424)</td>
<td valign="middle" align="left">0.657</td>
</tr>
<tr>
<td valign="top" align="left">51-</td>
<td valign="middle" align="left">1.202(0.905,1.598)</td>
<td valign="middle" align="left">0.204</td>
</tr>
<tr>
<td valign="top" align="left">&gt;60</td>
<td valign="middle" align="left">
<bold>1.770(1.316,2.380)</bold>
</td>
<td valign="middle" align="left">
<bold>&lt;0.001</bold>
</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Education level</th>
</tr>
<tr>
<td valign="top" align="left">Primary school or less</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Secondary school</td>
<td valign="middle" align="left">0.924(0.769,1.111)</td>
<td valign="middle" align="left">0.403</td>
</tr>
<tr>
<td valign="top" align="left">College/University or more</td>
<td valign="middle" align="left">
<bold>0.693(0.532,0.904)</bold>
</td>
<td valign="middle" align="left">
<bold>0.007</bold>
</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Cigarette smoking</th>
</tr>
<tr>
<td valign="top" align="left">Never</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Current</td>
<td valign="middle" align="left">
<bold>1.569(1.244,1.979)</bold>
</td>
<td valign="middle" align="left">
<bold>&lt;0.001</bold>
</td>
</tr>
<tr>
<td valign="top" align="left">Former</td>
<td valign="middle" align="left">1.334(0.866,2.056)</td>
<td valign="middle" align="left">0.191</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Preserved food</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="middle" align="left">0.872(0.733,1.037)</td>
<td valign="middle" align="left">0.122</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Cooking</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="middle" align="left">
<bold>0.755(0.581,0.980)</bold>
</td>
<td valign="middle" align="left">
<bold>0.035</bold>
</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Family history of cancer</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="middle" align="left">0.987(0.824,1.183)</td>
<td valign="middle" align="left">0.890</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Tea drinking</th>
</tr>
<tr>
<td valign="top" align="left">Never</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Former</td>
<td valign="middle" align="left">1.058(0.877,1.276)</td>
<td valign="middle" align="left">0.557</td>
</tr>
<tr>
<td valign="top" align="left">Current</td>
<td valign="middle" align="left">
<bold>1.282(1.029,1.599)</bold>
</td>
<td valign="middle" align="left">
<bold>0.027</bold>
</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Herbal tea drinking</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="middle" align="left">1.077(0.897,1.292)</td>
<td valign="middle" align="left">0.428</td>
</tr>
<tr>
<th valign="top" colspan="3" align="left">Soup drinking</th>
</tr>
<tr>
<td valign="top" align="left">No</td>
<td valign="middle" align="left">1.000(reference)</td>
<td valign="middle" align="left"/>
</tr>
<tr>
<td valign="top" align="left">Yes</td>
<td valign="middle" align="left">1.232(0.897,1.693)</td>
<td valign="middle" align="left">0.198</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold means that sex, age, education level, smoking status, cooking, and tea drinking had a statistically significant effect on EBV reactivation (<italic>P</italic> &lt; 0.05).</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Multivariate analysis</title>
<p>In this study, a multivariate logistic regression analysis of the factors influencing EBV reactivation was performed using a stepwise approach to construct a logistic regression model with EBV reactivation status as the dependent variable and the variables significantly associated with EBV reactivation as independent variables in the univariate logistic regression analysis (<xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>). The final results showed that only being older than 60 years (OR = 1.718, 95% CI <italic>=</italic> 1.273&#x2013;2.322) and being a current smoker (OR = 1.477, 95% CI <italic>=</italic> 1.167&#x2013;1.872) were the risk factors for EBV reactivation. This was consistent with a previous study (<xref ref-type="bibr" rid="B10">10</xref>).</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Influencing factors of EBV reactivation status analyzed using multivariate logistic regression (<italic>n</italic> = 2248).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Variable</th>
<th valign="middle" align="left">Estimate</th>
<th valign="middle" align="left">
<italic>S.E</italic>
</th>
<th valign="middle" align="left">
<italic>P</italic>
</th>
<th valign="middle" align="left">OR(95% CI)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">(Intercept)</td>
<td valign="middle" align="left">-0.081</td>
<td valign="middle" align="left">0.173</td>
<td valign="middle" align="left">0.638</td>
<td valign="middle" align="left">0.922(0.657,1.293)</td>
</tr>
<tr>
<td valign="middle" align="left">Age of 41 to 50</td>
<td valign="middle" align="left">0.067</td>
<td valign="middle" align="left">0.148</td>
<td valign="middle" align="left">0.650</td>
<td valign="middle" align="left">1.069(0.801,1.429)</td>
</tr>
<tr>
<td valign="middle" align="left">Age of 41 to 50</td>
<td valign="middle" align="left">0.161</td>
<td valign="middle" align="left">0.146</td>
<td valign="middle" align="left">0.270</td>
<td valign="middle" align="left">1.175(0.883,1.566)</td>
</tr>
<tr>
<td valign="middle" align="left">Age over 60</td>
<td valign="middle" align="left">0.541</td>
<td valign="middle" align="left">0.153</td>
<td valign="middle" align="left">
<bold>&lt;0.001</bold>
</td>
<td valign="middle" align="left">
<bold>1.718(1.273,2.322)</bold>
</td>
</tr>
<tr>
<td valign="middle" align="left">Current-smoker</td>
<td valign="middle" align="left">0.390</td>
<td valign="middle" align="left">0.120</td>
<td valign="middle" align="left">
<bold>&lt;0.001</bold>
</td>
<td valign="middle" align="left">
<bold>1.477(1.167,1.872)</bold>
</td>
</tr>
<tr>
<td valign="middle" align="left">Former-smoker</td>
<td valign="middle" align="left">0.083</td>
<td valign="middle" align="left">0.227</td>
<td valign="middle" align="left">0.713</td>
<td valign="middle" align="left">1.087(0.698,1.701)</td>
</tr>
<tr>
<td valign="middle" align="left">Cooking</td>
<td valign="middle" align="left">-0.217</td>
<td valign="middle" align="left">0.136</td>
<td valign="middle" align="left">0.111</td>
<td valign="middle" align="left">0.805(0.615,1.051)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold means that being older than 60 years and being a current smoker had a statistically significant effect on EBV reactivation (<italic>P</italic> &lt; 0.05).</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Bayesian network model</title>
<p>In the present study, six BN structures were constructed using different learning algorithms and then compared. The results are presented in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>. The combination of the average TABU and MMHC structures balanced by SEM was better on all dimensions of assessment compared with the performance of the other structures, achieving the highest recall (i.e., 0.5391) and the lowest prediction error (0.4558). This suggested that balanced structures could be found to provide models with high predictive performance. Consequently, the final choice in this study was to construct the BN with balanced mean TABU and MMHC structures.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Results of cross-validation using different BN structures.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Algorithm</th>
<th valign="middle" align="left">TABU</th>
<th valign="middle" align="left">MMHC</th>
<th valign="middle" align="left">Avg.<break/>TABU</th>
<th valign="middle" align="left">Avg.<break/>MMHC</th>
<th valign="middle" align="left">TABU+<break/>MMHC</th>
<th valign="middle" align="left">Avg.TABU+<break/>MMHC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Sensitivity</td>
<td valign="middle" align="left">0.5238</td>
<td valign="middle" align="left">0.4930</td>
<td valign="middle" align="left">0.4857</td>
<td valign="middle" align="left">0.4826</td>
<td valign="middle" align="left">0.5365</td>
<td valign="middle" align="left">0.5391</td>
</tr>
<tr>
<td valign="middle" align="left">Specificity</td>
<td valign="middle" align="left">0.5355</td>
<td valign="middle" align="left">0.4928</td>
<td valign="middle" align="left">0.4848</td>
<td valign="middle" align="left">0.4807</td>
<td valign="middle" align="left">0.5568</td>
<td valign="middle" align="left">0.5658</td>
</tr>
<tr>
<td valign="middle" align="left">Precision</td>
<td valign="middle" align="left">0.6269</td>
<td valign="middle" align="left">0.5004</td>
<td valign="middle" align="left">0.5004</td>
<td valign="middle" align="left">0.5080</td>
<td valign="middle" align="left">0.6485</td>
<td valign="middle" align="left">0.6765</td>
</tr>
<tr>
<td valign="middle" align="left">Recall</td>
<td valign="middle" align="left">0.5238</td>
<td valign="middle" align="left">0.4930</td>
<td valign="middle" align="left">0.4857</td>
<td valign="middle" align="left">0.4826</td>
<td valign="middle" align="left">0.5365</td>
<td valign="middle" align="left">0.5391</td>
</tr>
<tr>
<td valign="middle" align="left">F1</td>
<td valign="middle" align="left">0.5707</td>
<td valign="middle" align="left">0.4967</td>
<td valign="middle" align="left">0.4930</td>
<td valign="middle" align="left">0.4950</td>
<td valign="middle" align="left">0.5872</td>
<td valign="middle" align="left">0.6000</td>
</tr>
<tr>
<td valign="middle" align="left">Prevalence</td>
<td valign="middle" align="left">0.5984</td>
<td valign="middle" align="left">0.5076</td>
<td valign="middle" align="left">0.5151</td>
<td valign="middle" align="left">0.5263</td>
<td valign="middle" align="left">0.6038</td>
<td valign="middle" align="left">0.6275</td>
</tr>
<tr>
<td valign="middle" align="left">Detection Rate</td>
<td valign="middle" align="left">0.3134</td>
<td valign="middle" align="left">0.2502</td>
<td valign="middle" align="left">0.2502</td>
<td valign="middle" align="left">0.2540</td>
<td valign="middle" align="left">0.3240</td>
<td valign="middle" align="left">0.3382</td>
</tr>
<tr>
<td valign="middle" align="left">Detection Prevalence</td>
<td valign="middle" align="left">0.5000</td>
<td valign="middle" align="left">0.5000</td>
<td valign="middle" align="left">0.5000</td>
<td valign="middle" align="left">0.5000</td>
<td valign="middle" align="left">0.5000</td>
<td valign="middle" align="left">0.5000</td>
</tr>
<tr>
<td valign="middle" align="left">Balanced Accuracy</td>
<td valign="middle" align="left">0.5296</td>
<td valign="middle" align="left">0.4929</td>
<td valign="middle" align="left">0.4853</td>
<td valign="middle" align="left">0.4817</td>
<td valign="middle" align="left">0.5467</td>
<td valign="middle" align="left">0.5524</td>
</tr>
<tr>
<td valign="middle" align="left">BIC loss<break/>(SD)</td>
<td valign="middle" align="left">7.1477 (0.0033)</td>
<td valign="middle" align="left">7.1527 (0.0022)</td>
<td valign="middle" align="left">7.1521 (0.0029)</td>
<td valign="middle" align="left">7.1372 (0.0023)</td>
<td valign="middle" align="left">7.1492 (0.0018)</td>
<td valign="middle" align="left">7.1319 (0.0033)</td>
</tr>
<tr>
<td valign="middle" align="left">BDE loss<break/>(SD)</td>
<td valign="middle" align="left">7.1472 (0.0034)</td>
<td valign="middle" align="left">7.1521 (0.0041)</td>
<td valign="middle" align="left">7.1513 (0.0030)</td>
<td valign="middle" align="left">7.1372 (0.0020)</td>
<td valign="middle" align="left">7.1499 (0.0015)</td>
<td valign="middle" align="left">7.1340 (0.0014)</td>
</tr>
<tr>
<td valign="middle" align="left">Prediction loss(SD)</td>
<td valign="middle" align="left">0.4765 (0.0061)</td>
<td valign="middle" align="left">0.5008 (0.0080)</td>
<td valign="middle" align="left">0.5025 (0.0108)</td>
<td valign="middle" align="left">0.5056 (0.0083)</td>
<td valign="middle" align="left">0.4588 (0.0056)</td>
<td valign="middle" align="left">0.4558 (0.0029)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> shows the optimal BN constructed from the balanced average TABU and MMHC structures. The network was constructed from 8 nodes and 10 directed edges, indicating the probabilistic dependencies between connected nodes. The results showed that age and smoking status were the parent nodes of class_P, that is, the direct influence on EBV reactivation. However, sex, education level, tea drinking, cooking, and family history of cancer were indirect influencing factors of EBV reactivation. In addition, the model showed that age and sex had a direct effect on education level and sex had a direct effect on cooking status. Furthermore, tea drinking was associated with sex and smoking status.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>BNs and prior probability of EBV reactivation status constructed using the optimal algorithm.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1369765-g002.tif"/>
</fig>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Bayesian inference</title>
<p>The prior probabilities of the variables are illustrated in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>. We used Bayesian posterior estimation to learn the parameters of the network model. The resulting probabilistic model was used to analyze the effect of these variables on EBV reactivation by calculating conditional probabilities. For example, if an individual was aged 60 years or older, the probability of EBV reactivation increased from 50% to 58.5% (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>). If a person was positive for EBV reactivation, the probability that their sex was male increased from 31.5% to 34.0%, and the probability that they had never smoked decreased from 80.3% to 77.0% (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>). If a person was older than 60 years and smoked, the probability of being EBV reactivation positive, that is classified as having a high risk of NPC, was 66.4% (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3C</bold>
</xref>). In contrast, a man who smoked and had a primary school education or less had a 62.7% chance of being EBV reactivation positive (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3D</bold>
</xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Bayesian network conditional probability inference results on EBV reactivation state. <bold>(A)</bold> Conditional probability when the individual was 60 years old and above. <bold>(B)</bold> Conditional probability when an individual was positive for EBV reactivation. <bold>(C)</bold> Conditional probability when an individual was over 60 years old and currently smoked. <bold>(D)</bold> Conditional probabilities for men who currently smoked and had primary school education or less.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-14-1369765-g003.tif"/>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>In previous studies, logistic regression was usually used to analyze NPC-associated risk factors for EBV reactivation, the results of which were better understood and more acceptable to researchers. However, the shortcomings of the regression model are magnified in a network framework (<xref ref-type="bibr" rid="B14">14</xref>). First, the assumption of linear additivity of the model may limit its use. Second, the performance of the regression model is more likely to be affected by covariance between variables in datasets with complex relationships. Third, the regression model cannot capture the complex network of interactions between multiple risk factors. Although this problem can be solved by adding interaction terms, the number of possible interactions increases exponentially with the increase in the number of variables, leading to a complex interaction process and thus reducing the effectiveness of the model test. However, the BN model can compensate for the aforementioned deficiencies of the regression model. As a result, BNs have been increasingly favored by clinical researchers in recent years as a risk assessment tool for large clinical datasets.</p>
<p>The BN model suggests that EBV may be reactivated due to poor daily living habits, unhealthy cooking environments, decreased host immunity, and having specific genes. Smoking directly exposes the nasopharyngeal epithelium to tobacco smoke. It has been suggested that smoking may promote nasopharyngeal carcinogenesis not only through the direct carcinogenic effect of tobacco smoke, but also indirectly by inducing EBV reactivation (<xref ref-type="bibr" rid="B35">35</xref>, <xref ref-type="bibr" rid="B36">36</xref>). The rationale may be that exposure to nicotine promotes NPC cell proliferation and EBV replication and expression of its cleaved gene products (<xref ref-type="bibr" rid="B37">37</xref>). Similarly, in a prospective screening study based in southern China, Hu et&#xa0;al. (<xref ref-type="bibr" rid="B38">38</xref>) found that smoking was associated with EBV serum (VCA/IgA, EBNA1/IgA) positivity during a 3- to 5-year follow-up period, and that participants aged 60 to 69 years had a higher risk of EBNA1/IgA positivity. In addition, some studies have found an increased risk of EBV reactivation in people who have used solid fuels continuously for more than 40 years compared with those who do not cook or use cleaner fuels, and that ingestion of preserved foods may exacerbate the effect of solid fuels on the risk of EBV reactivation (<xref ref-type="bibr" rid="B39">39</xref>). During the combustion of solid fuels, incomplete combustion results in the emission of complex mixtures of gaseous and particulate pollutants; these products, such as polycyclic aromatic hydrocarbons (PAHs), have been found to cause immune damage (<xref ref-type="bibr" rid="B40">40</xref>, <xref ref-type="bibr" rid="B41">41</xref>) and lead to changes in the reactivation of defense viruses (<xref ref-type="bibr" rid="B42">42</xref>). The nasopharynx is the first site of deposition of solid fuel combustion products, and the pro-inflammatory effect on epithelial cells may disrupt the inflammatory balance and affect the bacterial and viral flora of the nasopharynx (<xref ref-type="bibr" rid="B43">43</xref>).</p>
<p>This study also visually reasoned the constructed BN model using Netica software. Moreover, we compared the predictive performance of the two models using the test set. The results are depicted in <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>. The table demonstrates that the accuracy (56.36%) was consistent. The BN slightly outperformed the traditional logistic regression model in all indicators. The BN, being based on the interaction network, not only predicted the direct or indirect risk factors of the disease but also facilitated visual inference. Therefore, the BN model might be considered a novel predictive model in studying NPC-related risk factors for EBV reactivation.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Evaluation indices of logistic regression and Bayesian network models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Model<break/>type</th>
<th valign="middle" align="left">Accuracy<break/>(%)</th>
<th valign="middle" align="left">Precision<break/>(%)</th>
<th valign="middle" align="left">Sensitivity<break/>(%)</th>
<th valign="middle" align="left">Specificity<break/>(%)</th>
<th valign="middle" align="left">F1<break/>(%)</th>
<th valign="middle" align="left">AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Logistic regression</td>
<td valign="middle" align="left">56.36</td>
<td valign="middle" align="left">57.18</td>
<td valign="middle" align="left">43.22</td>
<td valign="middle" align="left">69.49</td>
<td valign="middle" align="left">49.23</td>
<td valign="middle" align="left">0.5520</td>
</tr>
<tr>
<td valign="middle" align="left">BN</td>
<td valign="middle" align="left">56.36</td>
<td valign="middle" align="left">58.52</td>
<td valign="middle" align="left">43.64</td>
<td valign="middle" align="left">69.07</td>
<td valign="middle" align="left">50.00</td>
<td valign="middle" align="left">0.5669</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>NPC is a complex disease whose development is associated with genetic susceptibility, EBV infection, and environmental factors (<xref ref-type="bibr" rid="B44">44</xref>&#x2013;<xref ref-type="bibr" rid="B46">46</xref>). Non-keratinizing NPC is the main type of NPC in China, and almost all patients with non-keratinizing NPC have EBV infection. Also, many other factors affect EBV reactivation. However, based on the South China Chronic Disease Cohort Study, we only considered the influence of the living habits of people in high-prevalence areas on EBV reactivation. Besides, the selection of different EBV-related markers might also have influenced the results. Under the hypothesis that antibodies against the remaining EBV proteins may perform better than those already reported, Li et&#xa0;al. (<xref ref-type="bibr" rid="B47">47</xref>) designed a peptide library containing highly sorted B-cell epitopes of EBV based on linear B-cell epitope prediction. The serum antibodies, including IgA, IgG, and Ab, to the peptide fragment P85 expressed by the <italic>BNLF2b</italic> gene differed significantly between cases and controls, with P85-Ab having the best performance (area under the curve = 0.97). This novel NPC screening marker might substantially improve the performance of screening methods for NPC populations and increase the cost-effectiveness of screening, thus shedding new light on our study of the risk factors for NPC-associated EBV reactivation.</p>
<p>To consider intricate interactions between factors, this study applied BN to analyze the risk factors associated with EBV reactivation in NPC and used SEM tools in constructing BN. The results of the study not only revealed the risk factors for EBV reactivation associated with NPC but also identified the direct and indirect influences on EBV reactivation and elucidated their complex interactive network relationships. In addition, the model, with Bayesian visualization of predictive inference, can raise our awareness of nasopharyngeal cancer prevention in our daily life, suggesting that we should maintain good living habits, such as quitting smoking, not staying up late, using ventilated and clean cooking environments, and exercising. This not only has a positive effect on the middle- and high-risk groups to actively participate in follow-up screening, but also helps in clinical decision-making, which can improve the early detection rate of nasopharyngeal cancer, which is of great significance to the prognosis of nasopharyngeal cancer patients, and provides a new way of thinking about the prevention of nasopharyngeal cancer. However, this study had some limitations. First, some of the data in this study were collected through structured questionnaires, and participants are also required to review their lifestyle habits from the past six months or even several years ago, such as eating pickled foods, smoking and drinking. So recall bias was inevitable. Second, the directed edges in BNs did not indicate causal relationships between connected nodes. Instead, they represented probabilistic dependencies. Third, most of the invalid variables were found in the construction of the logistic regression model, which could be partly explained by undifferentiated misclassification. In addition, we did not find an association between the consumption of salty food and EBV reactivation. This lack of association could be attributed to the extended duration of the NPC screening study in Sihui City, leading to a gradual shift in local residents&#x2019; dietary habits. Fourth, this study was only based on the investigation in Sihui City, Guangdong Province, and the extrapolation of data was affected. Fifth, this study did not attempt to use other antibody biomarkers of EBV reactivation for comparison.</p>
<p>EBV reactivation is also associated with a number of diseases. The present study is only a preliminary study of the relationship between lifestyle habits and EBV reactivation, and subsequent studies could further incorporate a number of chronic non-communicable diseases (e.g., hypertension, diabetes mellitus, stroke, etc.) to explore the relationship between them and EBV reactivation. Moreover, We plan to conduct multicenter studies with more EBV reactivation antibody biomarkers and more variable parameters in the future to construct more accurate and reliable BN models of EBV reactivation risk factors associated with NPC. And we will try to construct BN prediction models for other diseases in order to build a more mature and stable prediction network to provide methodological references for the prevention of more diseases.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions</title>
<p>This study demonstrated that BN based on SEM balanced mean TABU and MMHC combined algorithm could not only realize the complex network relationship between risk factors and NPC-associated EBV reactivation but also made it possible to predict the risk of EBV reactivation. The findings provided a scientific idea for preventing and treating NPC, contributing to the reduction of its prevalence. The specific findings were as follows: (1) The logistic regression model showed that NPC-related EBV reactivation was significantly associated with age and smoking status, whereas stable environmental factors had no association, suggesting that environmental factors might in turn influence NPC through other mechanisms. (2)The BN model of EBV reactivation was constructed using 8 nodes and 10 directed edges. Age and smoking status were the direct influencing factors for EBV reactivation. In contrast, sex, education level, tea drinking, cooking, and family history of cancer constituted the indirect influencing factors of EBV reactivation. (3) The BN using the SEM balanced average TABU and MMHC combined algorithm could achieve the probabilistic inference of unknown nodes through known nodes and flexibly demonstrated the influence of a risk factor on EBV reactivation. (4) Overall, the BN with SEM balanced average TABU and MMHC combined algorithm could be used as a new model to predict risk factors for NPC-associated EBV reactivation, with a broad prospect in the clinical practice of NPC.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The datasets presented in this article are not readily available because confidentiality agreement for data use. Requests to access the datasets should be directed to Director Cao Sumei of the Sun Yat - sen University Cancer Center, <email xlink:href="mailto:caosm@sysucc.org.cn">caosm@sysucc.org.cn</email>.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>ZZ: Data curation, Methodology, Software, Writing &#x2013; original draft, Formal Analysis. KL: Investigation, Writing &#x2013; original draft, Validation. XQL: Investigation, Writing &#x2013; original draft. TL: Methodology, Writing &#x2013; original draft. XML: Investigation, Writing &#x2013; original draft. JL: Investigation, Writing &#x2013; original draft. ZN: Investigation, Writing &#x2013; original draft. QL: Investigation, Writing &#x2013; original draft. SX: Data curation, Resources, Writing &#x2013; original draft. SC: Funding acquisition, Resources, Supervision, Writing &#x2013; review &amp; editing. JD: Funding acquisition, Supervision, Writing &#x2013; review &amp; editing, Project administration.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. The work was supported by National Natural Science Foundation of China, Grant/Award Numbers: (82373655, 82073625) and The Key Project of Department of Education of Guangdong Province (2020ZDZX1048).</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname> <given-names>ET</given-names>
</name>
<name>
<surname>Ye</surname> <given-names>W</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>YX</given-names>
</name>
<name>
<surname>Adami</surname> <given-names>HO</given-names>
</name>
</person-group>. <article-title>The evolving epidemiology of nasopharyngeal carcinoma</article-title>. <source>Cancer Epidemiol Biomarkers Prev</source>. (<year>2021</year>) <volume>30</volume>:<page-range>1035&#x2013;47</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1158/1055-9965.EPI-20-1702</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname> <given-names>RM</given-names>
</name>
<name>
<surname>Zong</surname> <given-names>YN</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>SM</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>RH</given-names>
</name>
</person-group>. <article-title>Current cancer situation in China: good or bad news from the 2018 Global Cancer Statistics</article-title>? <source>Cancer Commun (Lond)</source>. (<year>2019</year>) <volume>39</volume>(<issue>1</issue>):<fpage>22</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s40880-019-0368-6</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>YP</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>LL</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Poh</surname> <given-names>SS</given-names>
</name>
<name>
<surname>Hui</surname> <given-names>EP</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>ATC</given-names>
</name>
<etal/>
</person-group>. <article-title>Induction chemotherapy plus concurrent chemoradiotherapy in endemic nasopharyngeal carcinoma: individual patient data pooled analysis of four randomized trials</article-title>. <source>Clin Cancer Res</source>. (<year>2018</year>) <volume>24</volume>:<page-range>1824&#x2013;33</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1158/1078-0432.CCR-17-2656</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Klein</surname> <given-names>G</given-names>
</name>
<name>
<surname>Klein</surname> <given-names>E</given-names>
</name>
<name>
<surname>Kashuba</surname> <given-names>E</given-names>
</name>
</person-group>. <article-title>Interaction of Epstein-Barr virus (EBV) with human B-lymphocytes</article-title>. <source>Biochem Biophys Res Commun</source>. (<year>2010</year>) <volume>396</volume>:<fpage>67</fpage>&#x2013;<lpage>73</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.bbrc.2010.02.146</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bouvard</surname> <given-names>V</given-names>
</name>
<name>
<surname>Baan</surname> <given-names>R</given-names>
</name>
<name>
<surname>Straif</surname> <given-names>K</given-names>
</name>
<name>
<surname>Grosse</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Secretan</surname> <given-names>B</given-names>
</name>
<name>
<surname>Ghissassi</surname> <given-names>F</given-names>
</name>
<etal/>
</person-group>. <article-title>Biological agents. Volume 100 B. A review of human carcinogens</article-title>. <source>Iarc Monogr Eval Carcinog Risks Hum</source> (<year>2012</year>) <volume>100</volume>(<issue>Pt B</issue>):<page-range>1&#x2013;441</page-range>.</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Young</surname> <given-names>LS</given-names>
</name>
<name>
<surname>Yap</surname> <given-names>LF</given-names>
</name>
<name>
<surname>Murray</surname> <given-names>PG</given-names>
</name>
</person-group>. <article-title>Epstein-Barr virus: more than 50 years old and still providing surprises</article-title>. <source>Nat Rev Cancer</source>. (<year>2016</year>) <volume>16</volume>:<fpage>789</fpage>&#x2013;<lpage>802</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nrc.2016.92</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shannon-Lowe</surname> <given-names>CD</given-names>
</name>
<name>
<surname>Neuhierl</surname> <given-names>B</given-names>
</name>
<name>
<surname>Baldwin</surname> <given-names>G</given-names>
</name>
<name>
<surname>Rickinson</surname> <given-names>AB</given-names>
</name>
<name>
<surname>Delecluse</surname> <given-names>HJ</given-names>
</name>
</person-group>. <article-title>Resting B cells as a transfer vehicle for Epstein-Barr virus infection of epithelial cells</article-title>. <source>Proc Natl Acad Sci U.S.A</source>. (<year>2006</year>) <volume>103</volume>:<page-range>7065&#x2013;70</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1073/pnas.0510512103</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Young</surname> <given-names>LS</given-names>
</name>
<name>
<surname>Rickinson</surname> <given-names>AB</given-names>
</name>
</person-group>. <article-title>Epstein-Barr virus: 40 years on</article-title>. <source>Nat Rev Cancer</source>. (<year>2004</year>) <volume>4</volume>:<page-range>757&#x2013;68</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nrc1452</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname> <given-names>MF</given-names>
</name>
<name>
<surname>Sheng</surname> <given-names>W</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>WM</given-names>
</name>
<name>
<surname>Ng</surname> <given-names>MH</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>BH</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>X</given-names>
</name>
<etal/>
</person-group>. <article-title>Incidence and mortality of nasopharyngeal carcinoma: interim analysis of a cluster randomized controlled screening trial (PRO-NPC-001) in southern China</article-title>. <source>Ann Oncol</source>. (<year>2019</year>) <volume>30</volume>:<page-range>1630&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/annonc/mdz231</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>MF</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>QH</given-names>
</name>
<name>
<surname>Fang</surname> <given-names>F</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>WH</given-names>
</name>
<etal/>
</person-group>. <article-title>Two Epstein-Barr virus-related serologic antibody tests in nasopharyngeal carcinoma screening: results from the initial phase of a cluster randomized controlled trial in Southern China</article-title>. <source>Am J Epidemiol</source>. (<year>2013</year>) <volume>177</volume>:<page-range>242&#x2013;50</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/aje/kws404</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>YF</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>ET</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>G</given-names>
</name>
<etal/>
</person-group>. <article-title>Environmental factors for epstein-barr virus reactivation in a high-risk area of nasopharyngeal carcinoma: A population-based study</article-title>. <source>Open Forum Infect Di</source>. (<year>2022</year>) <volume>9</volume>(<issue>5</issue>):<fpage>ofac128</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/ofid/ofac128</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fang</surname> <given-names>CY</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>SY</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>CC</given-names>
</name>
<name>
<surname>Hsu</surname> <given-names>HY</given-names>
</name>
<name>
<surname>Chou</surname> <given-names>SP</given-names>
</name>
<name>
<surname>Tsai</surname> <given-names>CH</given-names>
</name>
<etal/>
</person-group>. <article-title>The synergistic effect of chemical carcinogens enhances epstein-barr virus reactivation and tumor progression of nasopharyngeal carcinoma cells</article-title>. <source>PloS One</source>. (<year>2012</year>) <volume>7</volume>(<issue>9</issue>):<elocation-id>e44810</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0044810</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taninaga</surname> <given-names>J</given-names>
</name>
<name>
<surname>Nishiyama</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Fujibayashi</surname> <given-names>K</given-names>
</name>
<name>
<surname>Gunji</surname> <given-names>T</given-names>
</name>
<name>
<surname>Sasabe</surname> <given-names>N</given-names>
</name>
<name>
<surname>Iijima</surname> <given-names>K</given-names>
</name>
<etal/>
</person-group>. <article-title>Prediction of future gastric cancer risk using a machine learning algorithm and comprehensive medical check-up data: A case-control study</article-title>. <source>Sci Rep</source>. (<year>2019</year>) <volume>9</volume>:<fpage>12384</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-019-48769-y</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>J</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Network or regression-based methods for disease discrimination: a comparison study</article-title>. <source>BMC Med Res Methodol</source>. (<year>2016</year>) <volume>16</volume>:<fpage>100</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12874-016-0207-2</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>F</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>H</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>D</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>B</given-names>
</name>
<name>
<surname>Wiemels</surname> <given-names>JL</given-names>
</name>
<etal/>
</person-group>. <article-title>Integrative Bayesian variable selection with gene-based informative priors for genome-wide association studies</article-title>. <source>BMC Genet</source>. (<year>2014</year>) <volume>15</volume>:<fpage>130</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12863-014-0130-7</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Pearl</surname> <given-names>J</given-names>
</name>
</person-group>. <source>Graphical Models for Probabilistic and Causal Reasoning</source>. <publisher-loc>Dordrecht, Netherlands</publisher-loc>: <publisher-name>Springer Netherlands</publisher-name> (<year>1997</year>).</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moe</surname> <given-names>SJ</given-names>
</name>
<name>
<surname>Carriger</surname> <given-names>JF</given-names>
</name>
<name>
<surname>Glendell</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Increased use of bayesian network models has improved environmental risk assessments</article-title>. <source>Integr Environ Asses</source>. (<year>2021</year>) <volume>17</volume>:<fpage>53</fpage>&#x2013;<lpage>61</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ieam.v17.1</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Frolova</surname> <given-names>A</given-names>
</name>
<name>
<surname>Wilczynski</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>Distributed Bayesian networks reconstruction on the whole genome scale</article-title>. <source>Peerj</source>. (<year>2018</year>) <volume>6</volume>:<elocation-id>e5692</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.7717/peerj.5692</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ordovas</surname> <given-names>JM</given-names>
</name>
<name>
<surname>Rios-Insua</surname> <given-names>D</given-names>
</name>
<name>
<surname>Santos-Lozano</surname> <given-names>A</given-names>
</name>
<name>
<surname>Lucia</surname> <given-names>A</given-names>
</name>
<name>
<surname>Torres</surname> <given-names>A</given-names>
</name>
<name>
<surname>Kosgodagan</surname> <given-names>A</given-names>
</name>
<etal/>
</person-group>. <article-title>A Bayesian network model for predicting cardiovascular risk</article-title>. <source>Comput Methods Programs BioMed</source>. (<year>2023</year>) <volume>231</volume>:<fpage>107405</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cmpb.2023.107405</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname> <given-names>W</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>L</given-names>
</name>
<name>
<surname>Qing</surname> <given-names>J</given-names>
</name>
<name>
<surname>Zhi</surname> <given-names>W</given-names>
</name>
<name>
<surname>Zha</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>X</given-names>
</name>
<etal/>
</person-group>. <article-title>Using Bayesian network model with MMHC algorithm to detect risk factors for stroke</article-title>. <source>Math Biosci Eng</source>. (<year>2022</year>) <volume>19</volume>:<page-range>13660&#x2013;74</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.3934/mbe.2022637</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>RK</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>C</given-names>
</name>
<name>
<surname>Du</surname> <given-names>KL</given-names>
</name>
<name>
<surname>Dan</surname> <given-names>H</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>R</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>Z</given-names>
</name>
<etal/>
</person-group>. <article-title>Analysis of prognostic factors of rectal cancer and construction of a prognostic prediction model based on bayesian network</article-title>. <source>Front Public Health</source>. (<year>2022</year>) <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpubh.2022.842970</pub-id>
</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>D</given-names>
</name>
<name>
<surname>Nie</surname> <given-names>T</given-names>
</name>
<name>
<surname>Kou</surname> <given-names>Y</given-names>
</name>
</person-group>. <article-title>A hybrid sampling algorithm combining M-SMOTE and ENN based on Random forest for medical imbalanced data</article-title>. <source>J BioMed Inform</source>. (<year>2020</year>) <volume>107</volume>:<fpage>103465</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jbi.2020.103465</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ullman</surname> <given-names>JB</given-names>
</name>
</person-group>. <article-title>Structural equation modeling: reviewing the basics and moving forward</article-title>. <source>J Pers Assess</source>. (<year>2006</year>) <volume>87</volume>:<fpage>35</fpage>&#x2013;<lpage>50</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1207/s15327752jpa8701_03</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>W</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>W</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>E</given-names>
</name>
<etal/>
</person-group>. <article-title>Establishment of VCA and EBNA1 IgA-based combination by enzyme-linked immunosorbent assay as preferred screening method for nasopharyngeal carcinoma: a two-stage design with a preliminary performance study and a mass screening in southern China</article-title>. <source>Int J Cancer</source>. (<year>2012</year>) <volume>131</volume>:<page-range>406&#x2013;16</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ijc.v131.2</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>GH</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>MF</given-names>
</name>
<name>
<surname>Pfeiffer</surname> <given-names>RM</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>QH</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>YQ</given-names>
</name>
<etal/>
</person-group>. <article-title>Prospective assessment of a nasopharyngeal carcinoma risk score in a population undergoing screening</article-title>. <source>Int J Cancer</source>. (<year>2020</year>) <volume>148</volume>(<issue>10</issue>):<page-range>2398&#x2013;406</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ijc.v148.10</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heckerman</surname> <given-names>D</given-names>
</name>
<name>
<surname>Geiger</surname> <given-names>D</given-names>
</name>
<name>
<surname>Chickering</surname> <given-names>DM</given-names>
</name>
</person-group>. <article-title>Learning Bayesian networks: The combination of knowledge and statistical data</article-title>. <source>Mach Learn</source>. (<year>1995</year>) <volume>20</volume>:<fpage>197</fpage>&#x2013;<lpage>243</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/BF00994016</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>J</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>VA</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>PP</given-names>
</name>
<name>
<surname>Hartemink</surname> <given-names>AJ</given-names>
</name>
<name>
<surname>Jarvis</surname> <given-names>ED</given-names>
</name>
</person-group>. <article-title>Advances to Bayesian network inference for generating causal networks from observational biological data</article-title>. <source>Bioinformatics</source>. (<year>2004</year>) <volume>20</volume>:<page-range>3594&#x2013;603</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/bth448</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Videla Rodriguez</surname> <given-names>EA</given-names>
</name>
<name>
<surname>Mitchell</surname> <given-names>JBO</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>VA</given-names>
</name>
</person-group>. <article-title>A Bayesian network structure learning approach to identify genes associated with stress in spleens of chickens</article-title>. <source>Sci Rep</source>. (<year>2022</year>) <volume>12</volume>:<fpage>7482</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-022-11633-7</pub-id>
</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Glover</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Future paths for integer programming and links to artificial intelligence</article-title>. <source>Comput Operations Res</source>. (<year>1986</year>) <volume>13</volume>:<page-range>533&#x2013;49</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/0305-0548(86)90048-1</pub-id>
</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tsamardinos</surname> <given-names>I</given-names>
</name>
<name>
<surname>Brown</surname> <given-names>LE</given-names>
</name>
<name>
<surname>Aliferis</surname> <given-names>CF</given-names>
</name>
</person-group>. <article-title>The max-min hill-climbing Bayesian network structure learning algorithm</article-title>. <source>Mach Learn</source>. (<year>2006</year>) <volume>65</volume>:<fpage>31</fpage>&#x2013;<lpage>78</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10994-006-6889-7</pub-id>
</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scutari</surname> <given-names>M</given-names>
</name>
<name>
<surname>Denis</surname> <given-names>JB</given-names>
</name>
</person-group>. <article-title>Bayesian networks with examples in R: bayesian networks with examples in R</article-title>. (<year>2014</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1201/b17065</pub-id>
</citation>
</ref>
<ref id="B32">
<label>32</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kopacheva</surname> <given-names>E</given-names>
</name>
</person-group>. <article-title>Predicting online participation through Bayesian network analysis</article-title>. <source>PloS One</source>. (<year>2021</year>) <volume>16</volume>:<elocation-id>e0261663</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0261663</pub-id>
</citation>
</ref>
<ref id="B33">
<label>33</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rosseel</surname> <given-names>Y</given-names>
</name>
</person-group>. <article-title>lavaan: an R package for structural equation modeling</article-title>. <source>J Stat Softw</source>. (<year>2012</year>) <volume>48</volume>:<fpage>1</fpage>&#x2013;<lpage>36</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18637/jss.v048.i02</pub-id>
</citation>
</ref>
<ref id="B34">
<label>34</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scutari</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Learning bayesian networks with the bnlearn R package</article-title>. <source>J Stat Softw</source>. (<year>2010</year>) <volume>35</volume>(<issue>3</issue>):<page-range>1&#x2013;22</page-range>.</citation>
</ref>
<ref id="B35">
<label>35</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>FH</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>D</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>YF</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>S-M</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>W-Q</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>H-D</given-names>
</name>
<etal/>
</person-group>. <article-title>An epidemiological and molecular study of the relationship between smoking, risk of nasopharyngeal carcinoma, and Epstein-Barr virus activation</article-title>. <source>J Natl Cancer Inst</source>. (<year>2012</year>) <volume>104</volume>:<page-range>1396&#x2013;410</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/jnci/djs320</pub-id>
</citation>
</ref>
<ref id="B36">
<label>36</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>YQ</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>WQ</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>FH</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>SM</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>WQ</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>HD</given-names>
</name>
<etal/>
</person-group>. <article-title>The relationship between environmental factors and the profile of epstein-barr virus antibodies in the lytic and latent infection periods in healthy populations from endemic and non-endemic nasopharyngeal carcinoma areas in China</article-title>. <source>EBioMedicine</source>. (<year>2018</year>) <volume>30</volume>:<page-range>184&#x2013;91</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ebiom.2018.02.019</pub-id>
</citation>
</ref>
<ref id="B37">
<label>37</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname> <given-names>D</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>W</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>W</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>L</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>Y</given-names>
</name>
<etal/>
</person-group>. <article-title>Nicotine promotes proliferation of human nasopharyngeal carcinoma cells by regulating &#x3b1;7AChR, ERK, HIF-1&#x3b1; and VEGF/PEDF signaling</article-title>. <source>PloS One</source>. (<year>2012</year>) <volume>7</volume>:<elocation-id>e43898</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0043898</pub-id>
</citation>
</ref>
<ref id="B38">
<label>38</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>T</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>CY</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>SH</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>GH</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>YQ</given-names>
</name>
<name>
<surname>Ling</surname> <given-names>W</given-names>
</name>
<etal/>
</person-group>. <article-title>Smoking can increase nasopharyngeal carcinoma risk by repeatedly reactivating Epstein-Barr Virus: An analysis of a prospective study in southern China</article-title>. <source>Cancer Med</source>. (<year>2019</year>) <volume>8</volume>:<page-range>2561&#x2013;71</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cam4.2019.8.issue-5</pub-id>
</citation>
</ref>
<ref id="B39">
<label>39</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>M</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>WJ</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Charvat</surname> <given-names>H</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>SH</given-names>
</name>
<name>
<surname>Li</surname> <given-names>T</given-names>
</name>
<etal/>
</person-group>. <article-title>Association between solid fuel use and seropositivity against Epstein-Barr virus in a high-risk area for nasopharyngeal carcinoma</article-title>. <source>Environ pollut</source>. (<year>2022</year>) <volume>304</volume>:<fpage>119184</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.envpol.2022.119184</pub-id>
</citation>
</ref>
<ref id="B40">
<label>40</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>D</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>H</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C</given-names>
</name>
<name>
<surname>Chang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Low</surname> <given-names>P</given-names>
</name>
<etal/>
</person-group>. <article-title>The impact on T-regulatory cell related immune responses in rural women exposed to polycyclic aromatic hydrocarbons (PAHs) in household air pollution in Gansu, China: A pilot investigation</article-title>. <source>Environ Res</source>. (<year>2019</year>) <volume>173</volume>:<page-range>306&#x2013;17</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.envres.2019.03.053</pub-id>
</citation>
</ref>
<ref id="B41">
<label>41</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>JJ</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>KR</given-names>
</name>
</person-group>. <article-title>Household air pollution from coal and biomass fuels in China: measurements, health impacts, and interventions</article-title>. <source>Environ Health Perspect</source>. (<year>2007</year>) <volume>115</volume>:<page-range>848&#x2013;55</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1289/ehp.9479</pub-id>
</citation>
</ref>
<ref id="B42">
<label>42</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gordon</surname> <given-names>SB</given-names>
</name>
<name>
<surname>Bruce</surname> <given-names>NG</given-names>
</name>
<name>
<surname>Grigg</surname> <given-names>J</given-names>
</name>
<name>
<surname>Hibberd</surname> <given-names>PL</given-names>
</name>
<name>
<surname>Kurmi</surname> <given-names>OP</given-names>
</name>
<name>
<surname>Lam</surname> <given-names>KB</given-names>
</name>
<etal/>
</person-group>. <article-title>Respiratory risks from household air pollution in low and middle income countries</article-title>. <source>Lancet Respir Med</source>. (<year>2014</year>) <volume>2</volume>:<page-range>823&#x2013;60</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S2213-2600(14)70168-7</pub-id>
</citation>
</ref>
<ref id="B43">
<label>43</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hawley</surname> <given-names>B</given-names>
</name>
<name>
<surname>Volckens</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Proinflammatory effects of cookstove emissions on human bronchial epithelial cells</article-title>. <source>Indoor Air</source>. (<year>2013</year>) <volume>23</volume>:<fpage>4</fpage>&#x2013;<lpage>13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1600-0668.2012.00790.x</pub-id>
</citation>
</ref>
<ref id="B44">
<label>44</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>YP</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>ATC</given-names>
</name>
<name>
<surname>Le</surname> <given-names>QT</given-names>
</name>
<name>
<surname>Blanchard</surname> <given-names>P</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>J</given-names>
</name>
</person-group>. <article-title>Nasopharyngeal carcinoma</article-title>. <source>Lancet</source>. (<year>2019</year>) <volume>394</volume>:<fpage>64</fpage>&#x2013;<lpage>80</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0140-6736(19)30956-0</pub-id>
</citation>
</ref>
<ref id="B45">
<label>45</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chan</surname> <given-names>KCA</given-names>
</name>
<name>
<surname>Woo</surname> <given-names>JKS</given-names>
</name>
<name>
<surname>King</surname> <given-names>A</given-names>
</name>
<name>
<surname>Zee</surname> <given-names>BCY</given-names>
</name>
<name>
<surname>Lam</surname> <given-names>WKJ</given-names>
</name>
<name>
<surname>Chan</surname> <given-names>SL</given-names>
</name>
<etal/>
</person-group>. <article-title>Analysis of plasma epstein-barr virus DNA to screen for nasopharyngeal cancer</article-title>. <source>N Engl J Med</source>. (<year>2017</year>) <volume>377</volume>:<page-range>513&#x2013;22</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1056/NEJMoa1701717</pub-id>
</citation>
</ref>
<ref id="B46">
<label>46</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kanda</surname> <given-names>T</given-names>
</name>
<name>
<surname>Yajima</surname> <given-names>M</given-names>
</name>
<name>
<surname>Ikuta</surname> <given-names>K</given-names>
</name>
</person-group>. <article-title>Epstein-Barr virus strain variation and cancer</article-title>. <source>Cancer Sci</source>. (<year>2019</year>) <volume>110</volume>:<page-range>1132&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/cas.2019.110.issue-4</pub-id>
</citation>
</ref>
<ref id="B47">
<label>47</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>T</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>X</given-names>
</name>
<name>
<surname>Hong</surname> <given-names>C</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>B</given-names>
</name>
<etal/>
</person-group>. <article-title>Anti-epstein-barr virus BNLF2b for mass screening for nasopharyngeal cancer</article-title>. <source>N Engl J Med</source>. (<year>2023</year>) <volume>389</volume>:<page-range>808&#x2013;19</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1056/NEJMoa2301496</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>