<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1087294</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2022.1087294</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A disease-related essential protein prediction model based on the transfer neural network</article-title>
<alt-title alt-title-type="left-running-head">Chen et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2022.1087294">10.3389/fgene.2022.1087294</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Sisi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Huang</surname>
<given-names>Chiguo</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Lei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/664933/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhou</surname>
<given-names>Shunxian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>The First Hospital of Hunan University of Chinese Medicine</institution>, <addr-line>Changsha</addr-line>, <addr-line>Hunan</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Big Data Innovation and Entrepreneurship Education Center of Hunan Province</institution>, <institution>Changsha University</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Information Science and Engineering</institution>, <institution>Hunan Women&#x2019;s University</institution>, <addr-line>Changsha</addr-line>, <addr-line>Hunan</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/153577/overview">Marco S Nobile</ext-link>, Ca&#x2019; Foscari University of Venice, Italy</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/773277/overview">Yuhua Yao</ext-link>, Hainan Normal University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2096530/overview">Chaoyang Pang</ext-link>, Sichuan Normal University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Chiguo Huang, <email>2394181557@qq.com</email>; Lei Wang, <email>wanglei@xtu.edu.cn</email>; Shunxian Zhou, <email>zxs_hd@hnu.edu.cn</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Computational Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>04</day>
<month>01</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1087294</elocation-id>
<history>
<date date-type="received">
<day>02</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>12</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Chen, Huang, Wang and Zhou.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Chen, Huang, Wang and Zhou</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Essential proteins play important roles in the development and survival of organisms whose mutations are proven to be the drivers of common internal diseases having higher prevalence rates. Due to high costs of traditional biological experiments, an improved Transfer Neural Network (TNN) was designed to extract raw features from multiple biological information of proteins first, and then, based on the newly-constructed Transfer Neural Network, a novel computational model called TNNM was designed to infer essential proteins in this paper. Different from traditional Markov chain, since Transfer Neural Network adopted the gradient descent algorithm to automatically obtain the transition probability matrix, the prediction accuracy of TNNM was greatly improved. Moreover, additional antecedent memory coefficient and bias term were introduced in Transfer Neural Network, which further enhanced both the robustness and the non-linear expression ability of TNNM as well. Finally, in order to evaluate the identification performance of TNNM, intensive experiments have been executed based on two well-known public databases separately, and experimental results show that TNNM can achieve better performance than representative state-of-the-art prediction models in terms of both predictive accuracies and decline rate of accuracies. Therefore, TNNM may play an important role in key protein prediction in the future.</p>
</abstract>
<kwd-group>
<kwd>essential protein</kwd>
<kwd>prediction model</kwd>
<kwd>transfer neural network</kwd>
<kwd>biological information</kwd>
<kwd>internal disease</kwd>
<kwd>protein-protein interaction network</kwd>
</kwd-group>
<contract-num rid="cn001">62272064 61873221</contract-num>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Essential proteins refer to proteins that removal of them will lead to cell death or infertility (<xref ref-type="bibr" rid="B1">Acencio and Lemke, 2009</xref>). Identification of essential proteins can help us understand the basic requirements for maintaining life forms. However, considering that it is very costly and time-consuming to identify essential proteins by adopting traditional biological experimental techniques such as gene knock-out (<xref ref-type="bibr" rid="B24">Maliackal et al., 2005</xref>), RNA interference (<xref ref-type="bibr" rid="B12">Jeong et al., 2001</xref>) and conditional knockout (<xref ref-type="bibr" rid="B10">Hahn and Kern, 2005</xref>), more and more computational models have been proposed to infer essential proteins in recent years based on the topological characteristics of Protein-Protein Interaction (PPI) networks, in which, proteins are the vertices of PPI networks, while the interactions between proteins constitute the edges. Researches demonstrated that the topological position of a protein in the protein network has a strong correlation with whether the protein is critical (<xref ref-type="bibr" rid="B12">Jeong et al., 2001</xref>; <xref ref-type="bibr" rid="B26">Maslov and Sneppen, 2002</xref>; <xref ref-type="bibr" rid="B30">Pr&#x17e;ulj et al., 2004</xref>). Based on the topological characteristics of proteins in PPI networks, a series of essential protein recognition methods have been proposed. For instance, based on the centrality of PPI networks (<xref ref-type="bibr" rid="B34">Stephenson and Zelen, 1989</xref>; <xref ref-type="bibr" rid="B12">Jeong et al., 2001</xref>), different computational models including CC (Closeness Centrality) (<xref ref-type="bibr" rid="B33">Stefan and Stadler, 2003</xref>), DC (Degree Centrality) (<xref ref-type="bibr" rid="B10">Hahn and Kern, 2005</xref>), BC (Between Centrality) (<xref ref-type="bibr" rid="B37">Wang et al., 2012</xref>), SC (Graph Centrality) (<xref ref-type="bibr" rid="B24">Maliackal et al., 2005</xref>) and NC (Neighbor Centrality) (<xref ref-type="bibr" rid="B37">Wang et al., 2012</xref>) have been proposed in succession. In addition, Li M et al. designed a recognition model named LAC (<xref ref-type="bibr" rid="B20">Li et al., 2011</xref>) to detect essential proteins (<xref ref-type="bibr" rid="B19">Li et al., 2015</xref>) based on the local average connectivity of protein nodes in the PPI network. Qi Yi et al. (<xref ref-type="bibr" rid="B31">Qi and Luo, 2016</xref>) introduced a prediction model based on the local interaction density (LID) of protein nodes in the PPI network to infer essential proteins. Chen B et al. (<xref ref-type="bibr" rid="B4">Chen and Wu, 2013</xref>) proposed an essential protein recognition method based on multiple topological features of the PPI network. In all these above methods, only topological characteristics of the PPI network were considered to identify essential proteins, however, since there is a large amount of noise data in PPI networks, then the predictive accuracy of these methods is not very satisfactory.</p>
<p>In order to break through the inherent limitations of existing PPI data, in the past few years, people proposed novel models by combining the topological characteristics of PPI networks with biological information of proteins. For example, M Li et al. and Xiwei Tang et al. put forward prediction models called Pec (<xref ref-type="bibr" rid="B21">Li et al., 2012</xref>) and WDC (<xref ref-type="bibr" rid="B35">Tang et al., 2014</xref>) by integrating PPI network and gene expression data of proteins respectively. W Peng et al. designed a prediction model (<xref ref-type="bibr" rid="B29">Peng et al., 2012</xref>) by integrating protein homology information with PPI networks, and a prediction model (<xref ref-type="bibr" rid="B28">Peng et al., 2015</xref>) through combining protein domain information with PPI network, simultaneously. X Zhang et al. (<xref ref-type="bibr" rid="B41">Zhang et al., 2013</xref>) introduced a recognition method called CoEWC by merging topological characteristics of the PPI network with the co expression characteristics of proteins. BH Zhao et al. designed a prediction model named POEM (<xref ref-type="bibr" rid="B45">Zhao et al., 2014</xref>) by combining gene expression data of proteins with the topological characteristics of PPI networks. J Luo et al. proposed a identification method based on local interaction density of PPI networks and biological characteristics of protein complexes (<xref ref-type="bibr" rid="B13">Jiawei et al., 2015</xref>). Seketoulie Keretsu et al. presented a protein complex recognition model (<xref ref-type="bibr" rid="B19">Li et al., 2015</xref>) based on clustering weighted edges and gene expression profile of proteins. M Li et al. designed two essential protein recognition methods by combining PPI networks with subcellular location information and complex centrality of proteins respectively (<xref ref-type="bibr" rid="B15">Keretsu and Sarmah, 2016</xref>; <xref ref-type="bibr" rid="B18">Li et al., 2017</xref>; <xref ref-type="bibr" rid="B5">Chen et al., 2020</xref>). J Luo et al. introduced a prediction method ECC (edge clustering coefficient) based on the complex co expression data of proteins and PPI networks (<xref ref-type="bibr" rid="B23">Luo and Wu, 2015</xref>). Bihai Zhao et al. proposed a model based on multiple biological networks (<xref ref-type="bibr" rid="B43">Zhao et al., 2020a</xref>) and a model based on diffusion distance network (<xref ref-type="bibr" rid="B42">Zhao et al., 2020b</xref>) to predict essential proteins respectively. S. Li et al. designed an iterative method called CVIM (<xref ref-type="bibr" rid="B22">Li et al., 2020</xref>) based on topological and functional characteristics of proteins to predict key proteins. Lei X et al. proposed a necessary protein prediction method AFSOEP (<xref ref-type="bibr" rid="B16">Lei et al., 2018</xref>) to infer protein complexes through AFSO (Artificial Fish Swarm Optimization). BH Zhao et al. designed an iterative method to identify potential essential proteins (<xref ref-type="bibr" rid="B44">Zhao et al., 2019</xref>) based on heterogeneous PPI networks. Dai W et al. proposed a method to discover key genes based on protein-protein interaction network embedding (<xref ref-type="bibr" rid="B7">Dai et al., 2020</xref>). Fengyu Zhang et al. predicted the key gene (<xref ref-type="bibr" rid="B39">Zhang et al., 2019</xref>) by fusing the dynamic PPI network. Chen Z et al. proposed an essential protein prediction model NPRI based on heterogeneous network, and established heterogeneous protein domain network (<xref ref-type="bibr" rid="B5">Chen et al., 2020</xref>) according to initial PPI network, protein domain network and gene expression data.</p>
<p>All these above methods show that the identification accuracy of models can be significantly improved by combining biological information of proteins with topological features of PPI networks. However, through analyzing results achieved by these existing methods, it is not difficult to find that the prediction accuracies of these algorithms decline fast with the increasing of predicted essential proteins. Hence, inspired by recognition models based on the Markov chain and the Transfer algorithm, we designed a new neural network called TNN in this manuscript, based on which, a novel model named TNNM was proposed to predict essential proteins. TNN can be divided into three parts, namely, probability transfer matrix, antecedent output and bias term. In addition, in order to evaluate the performance of TNNM, we compared it with existing representative models such as IC (<xref ref-type="bibr" rid="B34">Stephenson and Zelen, 1989</xref>), DC (<xref ref-type="bibr" rid="B10">Hahn and Kern, 2005</xref>), SC (<xref ref-type="bibr" rid="B24">Maliackal et al., 2005</xref>), NC (<xref ref-type="bibr" rid="B37">Wang et al., 2012</xref>), PeC (<xref ref-type="bibr" rid="B21">Li et al., 2012</xref>), ION (<xref ref-type="bibr" rid="B29">Peng et al., 2012</xref>), CoEWC (<xref ref-type="bibr" rid="B41">Zhang et al., 2013</xref>), POEM (<xref ref-type="bibr" rid="B45">Zhao et al., 2014</xref>), CVIM (<xref ref-type="bibr" rid="B22">Li et al., 2020</xref>), NPRI (<xref ref-type="bibr" rid="B5">Chen et al., 2020</xref>) and RWHN (<xref ref-type="bibr" rid="B45">Zhao et al., 2014</xref>) separately. Experimental results show that TNNM is far superior to these traditional models in terms of both predictive accuracy and decline rate of accuracy.</p>
<p>The rest of this paper is organized as follows: The experimental data and specific steps are organized in Section 2. In Section 3, the influence of parameters and comparison with other methods are shown. Section 4 describes the shortcomings of the model and future improvement goals. Finally, a summary is made in Part 5.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Method and materials</title>
<p>The flow chart of TNNM is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. Through observing <xref ref-type="fig" rid="F1">Figure 1</xref>, it is easy to see that TNNM consists of the following three major parts. Firstly, based on prior knowledge, topological features and biological features of each protein will be extracted from PPI networks, gene expression data, subcellular localization and ortholog data of proteins separately. And then, the Transfer Neural Network (TNN) will be designed. Finally, through adopting TNN, the prediction model TNNM will be constructed to infer essential proteins based on these newly extracted features.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Flowchart of TNNM.</p>
</caption>
<graphic xlink:href="fgene-13-1087294-g001.tif"/>
</fig>
<sec id="s2-1">
<title>2.1 Experimental data</title>
<p>In order to evaluate the prediction performance of TNNM, during experiments, we first downloaded known PPIs from different benchmark databases such as DIP 2010 (<xref ref-type="bibr" rid="B38">Xenarios et al., 2002</xref>) and Gavin (<xref ref-type="bibr" rid="B9">Gavin et al., 2006</xref>) respectively. After preprocessing, a dataset containing 5093 proteins and 24,743 known PPIs was finally obtained from the DIP2010 database, and a dataset containing 1855 proteins and 7669 known PPIs was obtained from the Gavin database. In addition, based on databases including MIPS (<xref ref-type="bibr" rid="B27">Mewes et al., 2006</xref>), SGD (<xref ref-type="bibr" rid="B6">Cherry et al., 1998</xref>), DEG (<xref ref-type="bibr" rid="B40">Zhang and Lin, 2009</xref>) and SGDP (<xref ref-type="bibr" rid="B32">StanfordMedicine, 2012</xref>), a benchmark dataset containing 1285 essential proteins was constructed, based on which, 1167 and 714 essential proteins were screened from the DIP2010 and Gavin databases respectively. Moreover, based on the dataset provided by Tu BP et al. (<xref ref-type="bibr" rid="B36">Tu et al., 2005</xref>), a dataset containing the gene expression data of 6776 proteins was obtained, which consists of the gene expression level data of proteins in the continuous metabolic cycle. Simultaneously, the homologous information of proteins was downloaded from the Inparanoid database (seventh edition), including paired comparison (<xref ref-type="bibr" rid="B8">Gabriel et al., 2010</xref>) between 100 whole genomes, and the number of times that proteins have homologous information in the reference organism. Finally, we downloaded the dataset containing subcellular localization information of proteins from the COMPART-MENTS database (<xref ref-type="bibr" rid="B2">Binder et al., 2014</xref>) (2014 version), and retained only 11 types of subcellular localization data closely related to essential proteins, such as cytoplasm, cytoskeleton, Golgi apparatus, cytoplasm, vacuoles, mitochondria, endosomes, plasma, nucleus, peroxisomes and extracellular enzymes, etc.</p>
<p>Based on above newly-downloaded datasets, firstly, we constructed an original <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> network. And then, through combining with the existing complex network topological features including degree centrality, closeness centrality, node betweenness centrality and edge betweenness centrality, some new important protein topological features are extracted from the <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> network, including the degree of contact between the protein node and the neighborhood nodes, the importance of the protein node relative to the total distance, and the importance of the protein node relative to the carrying capacity. Simultaneously, we would further extract some biological features for proteins, including the importance of protein node relative to the Pearson correlation coefficient, the importance of protein node relative to the subcellular locations, and the importance of protein node relative to the homologous information, from multiple biological information existed in above newly-downloaded datasets.</p>
</sec>
<sec id="s2-2">
<title>2.2 Topological feature extraction</title>
<p>Let the undirected graph <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represent the original <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> network formed by a dataset downloaded from any given base database, <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the set of different proteins in the downloaded dataset, then, for any two given proteins <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in <italic>V</italic>, we define that there is an edge <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> between <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, if and only if there is a known interaction between them. And for convenience, we define that <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the set of edges in <italic>G</italic>. Hence, we can obtain the adjacency matrix <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> corresponding to <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> as follows: if there is <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, then there is <inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, otherwise there is <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>For any given protein <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in <italic>G</italic>, let <inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the set of neighboring nodes of <italic>p</italic>, then we have:<disp-formula id="e1">
<mml:math id="m19">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>Based on above <xref ref-type="disp-formula" rid="e1">formula (1)</xref>, we define that the degree of contact between <inline-formula id="inf19">
<mml:math id="m20">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and its neighboring nodes as follows:<disp-formula id="e2">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>Here, <inline-formula id="inf20">
<mml:math id="m22">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> represents the number of elements in <inline-formula id="inf21">
<mml:math id="m23">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf22">
<mml:math id="m24">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the number of common neighbors of <italic>p</italic> and <italic>q</italic>, which can be calculated as follows:<disp-formula id="e3">
<mml:math id="m25">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2229;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>min</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2002;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2002;</mml:mtext>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>Here, &#x7c;<italic>N</italic>(<italic>p</italic>)&#x2229;<italic>N</italic>(<italic>q</italic>)&#x7c; represents the number of elements in <italic>N</italic>(<italic>p</italic>)&#x2229;<italic>N</italic>(<italic>q</italic>).</p>
<p>It is reasonable to consume that the smaller the total distance between a protein and all other proteins, the more important the protein will be. Hence, let <italic>l</italic> (<italic>p</italic>, <italic>q</italic>) denote the length of the shortest path from protein <italic>p</italic> to the protein <italic>q</italic> in <italic>G</italic>, if there is no path between <italic>p</italic> and <italic>q</italic> in <italic>G</italic>, then we define the length of the shortest path between <italic>p</italic> and <italic>q</italic> is a constant number <italic>N</italic> (&#x3e;1). Therefore, we can calculate the importance of <italic>p</italic> related to the total distance as follows:<disp-formula id="e4">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Moreover, it is also reasonable to assume that the more important a protein <italic>p</italic> is, the more proteins that have the shortest path through <italic>p</italic>. This indicator reflects the carrying capacity of <italic>p</italic> between other nodes in <italic>G</italic>. it is obvious that the larger the value, the greater the impact of <italic>p</italic> in the network, which also means that <italic>p</italic> will be more important. Hence, we can calculate the importance of <italic>p</italic> related to the carrying capacity as follows:<disp-formula id="e5">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:msup>
<mml:mi>q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:msup>
<mml:mi>q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:msup>
<mml:mi>q</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:msub>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>Here, <italic>k</italic>
<sub>
<italic>qq&#x2019;</italic>
</sub> represents the number of shortest paths between <italic>q</italic> and <italic>q&#x2032;</italic> in <italic>G</italic>, and <italic>k</italic>
<sub>
<italic>qq&#x2032;</italic>
</sub>
<italic>p</italic>) denotes the number of shortest paths between <italic>q</italic> and <italic>q&#x2032;</italic> in <italic>G</italic>, which pass through <italic>p</italic>.</p>
</sec>
<sec id="s2-3">
<title>2.3 Biological feature extraction</title>
<p>Let <italic>ge</italic> (<italic>p</italic>, <italic>t</italic>) represent the gene expression value of the protein <italic>p</italic> at the time point <italic>t</italic>, <inline-formula id="inf23">
<mml:math id="m28">
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the average expression level of <italic>p</italic> at all <italic>n</italic> time points, and <inline-formula id="inf24">
<mml:math id="m29">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the standard variance of the gene expression level of <italic>p</italic> at all <italic>n</italic> time points, then we can calculate the Pearson correlation coefficient between <italic>p</italic> and <italic>q</italic> as follows:<disp-formula id="e6">
<mml:math id="m30">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>Based on above <xref ref-type="disp-formula" rid="e6">formula (6)</xref>, we can calculate the importance of <italic>p</italic> related to the Pearson correlation coefficient as follows:<disp-formula id="e7">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>It is reasonable to consume that essential proteins tend to be connected rather than independent. Therefore, we can believe that proteins closely related to essential proteins are more likely to be essential proteins. Thus, we can obtain another importance indicator of <italic>p</italic> as follows:<disp-formula id="e8">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>Where <inline-formula id="inf25">
<mml:math id="m33">
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> can be obtained as follows:<disp-formula id="e9">
<mml:math id="m34">
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2229;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x222a;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>Here, <italic>Sub</italic>(<italic>p</italic>) represents the set of subcellular locations of the protein <italic>p</italic>, <italic>&#x7c;Sub</italic>(<italic>p</italic>)<italic>&#x2229;Sub</italic>(<italic>q</italic>)<italic>&#x7c;</italic> denotes the number of elements in <italic>Sub</italic>(<italic>p</italic>)<italic>&#x2229;Sub</italic>(<italic>q</italic>), and <italic>&#x7c;Sub</italic>(<italic>p</italic>)&#x222a;<italic>Sub</italic>(<italic>q</italic>)<italic>&#x7c;</italic> is the number of elements in <italic>Sub</italic>(<italic>p</italic>)&#x222a;<italic>Sub</italic>(<italic>q</italic>).</p>
<p>Moreover, based on the reasonable assumption that the evolution of essential proteins is more conservative than that of non-essential proteins, and considering that the homologous information of proteins can objectively reflect the degree of evolutionary conservatism of proteins, let <inline-formula id="inf26">
<mml:math id="m35">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the value of homologous score of <italic>p</italic>, then it is obvious that the higher the value of <inline-formula id="inf27">
<mml:math id="m36">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the more conservative the evolution of <italic>p</italic> will be, i.e., the more important the protein <italic>p</italic> will be. Thus calculate the importance of <italic>p</italic> related to the homologous information as follows:<disp-formula id="e10">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mmultiscripts>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mprescripts/>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:mmultiscripts>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
</sec>
<sec id="s2-4">
<title>2.4 Construction of the TNN</title>
<p>A Markov chain is a stochastic process, whose characteristic can be summarized as &#x201c;the future depends on the past only through the present&#x201d;, that is, the probability distribution of the next state can only be determined by the current state, and the events before it in the time series are independent of it. In a Markov chain, let <inline-formula id="inf28">
<mml:math id="m38">
<mml:mrow>
<mml:msup>
<mml:mi>T</mml:mi>
<mml:mi>n</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denote the state space at time step <italic>n</italic>, and <inline-formula id="inf29">
<mml:math id="m39">
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represent the transition probability matrix, then there is:<disp-formula id="e11">
<mml:math id="m40">
<mml:mrow>
<mml:msup>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>Q</mml:mi>
<mml:msup>
<mml:mi>T</mml:mi>
<mml:mi>n</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>Due to strong predictive ability, Markov chains have been widely used in natural language processing, multivariate factor analysis, time series prediction and other fields. Inspired by the idea of Markov chains, in this manuscript, we designed a novel Transfer Neural Network called TNN, whose destination is being able to learn inherent feature representations from input data just like a Markov chain. In TNN, we introduced three main parameters such as the transition probability matrix <italic>W</italic>, the antecedent memory coefficient &#x3b1; with value between 0 and 1, and a bias term <italic>b</italic>. Let <inline-formula id="inf30">
<mml:math id="m41">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denote the input data of the <italic>i</italic>th layer in TNN, then similar to the principle of Markov chains, we define its output <inline-formula id="inf31">
<mml:math id="m42">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> as follows:<disp-formula id="e12">
<mml:math id="m43">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
</p>
<p>In the training process, TNN will adopt the gradient descent algorithm to optimize all parameters including <italic>W</italic>, &#x3b1; and <italic>b</italic> in above Eq. <xref ref-type="disp-formula" rid="e12">12</xref>, and can automatically find a set of optimal values for all these parameters. Thereafter, in the comparative experiments, through a series of complex calculations performed by itself and previous layers in TNNM based on these optimized parameters, TNN is able to assign larger weight values to more important features of proteins, and extract the most important features of proteins from the input data of TNNM, thus achieving satisfactory feature enhancement.</p>
</sec>
<sec id="s2-5">
<title>2.5 Construction of TNNM</title>
<p>Firstly, as illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, let <inline-formula id="inf32">
<mml:math id="m44">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denote the input data of the input layer in TNNM, then for any given protein <inline-formula id="inf33">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, there is:<disp-formula id="e13">
<mml:math id="m46">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x3c;</mml:mo>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mtd>
<mml:mtd>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mtd>
<mml:mtd>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo>&#x3e;</mml:mo>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
</p>
<p>Secondly, considering that <inline-formula id="inf34">
<mml:math id="m47">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is a <inline-formula id="inf35">
<mml:math id="m48">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>6</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> dimensional matrix, during experiments, we set the input and output dimensions of the first Linear layer in TNNM as six and 8 separately.</p>
<p>Thirdly, in the ReLU layer of TNNM, we adopt the following activation function:<disp-formula id="e14">
<mml:math id="m49">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>:</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>:</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>Here, <inline-formula id="inf36">
<mml:math id="m50">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the element in the <italic>j</italic>th row and <italic>k</italic>th column of <inline-formula id="inf37">
<mml:math id="m51">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. And <inline-formula id="inf38">
<mml:math id="m52">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf39">
<mml:math id="m53">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the input and output data of the ReLU layer respectively.</p>
<p>Moreover, in order to solve the problem of over fitting and reduce the training time of TNNM, we introduced two Dropout layers before and after the TNN layer. When each round of samples is inputted into TNNM for training, a probability <italic>p</italic> will be set in the Dropout layer so that each neuron will participate in training with the probability 1-<italic>p</italic>, that is, each neuron has a probability <italic>p</italic> of death. During experiments, we will set 0.7 to <italic>p</italic> in this manuscript.</p>
<p>Next, in the TNN layer, it is obvious that its input data is a <inline-formula id="inf40">
<mml:math id="m54">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 8 dimensional matrix, and for each protein, an 8-dimensional feature vector will be extracted by TNN as its output. Hence, in the second Linear layer of TNNM, we will set its input and output dimensions as eight and six respectively.</p>
<p>Finally, in order to estimate the criticality of proteins, we will set the input and output dimensions of the last Linear layer in TNNM as six and 1 separately, that is, TNNM will output 0 or 1 as its final predicted score.</p>
<p>Especially, in each Linear layer of TNNM, we will adopt the following Linear function:<disp-formula id="e15">
<mml:math id="m55">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>Here, <inline-formula id="inf41">
<mml:math id="m56">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is a matrix with <italic>m</italic> rows and <italic>n</italic> columns, where <italic>m</italic> and <italic>n</italic> denote the dimensions of input and output data of the Linear layer respectively. For instance, it is obvious that in these three Linear layers of TNNM, the dimensions of matrix <inline-formula id="inf42">
<mml:math id="m57">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> will be 6 <inline-formula id="inf43">
<mml:math id="m58">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 8, 8 <inline-formula id="inf44">
<mml:math id="m59">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 6 and 6 <inline-formula id="inf45">
<mml:math id="m60">
<mml:mrow>
<mml:mo>&#xd7;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 1 respectively. And additionally, <inline-formula id="inf46">
<mml:math id="m61">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf47">
<mml:math id="m62">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the input and output data of the Linear layer respectively.</p>
</sec>
<sec id="s2-6">
<title>2.6 Identification algorithm based on TNNM</title>
<p>Based on above description, we can present the identification algorithm based on TNNM as follows:</p>
<p>Step1: Based on the datasets of known PPIs downloaded from well-known public databases, constructing the original <inline-formula id="inf48">
<mml:math id="m63">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> network <inline-formula id="inf49">
<mml:math id="m64">
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and the corresponding adjacency matrix <inline-formula id="inf50">
<mml:math id="m65">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Step2: According to Eqs. <xref ref-type="disp-formula" rid="e2">2</xref> and <xref ref-type="disp-formula" rid="e4">4</xref>, <xref ref-type="disp-formula" rid="e5">5</xref>, extracting three kinds of important topological features for proteins from <inline-formula id="inf51">
<mml:math id="m66">
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> respectively.</p>
<p>Step3: According to Eqs. <xref ref-type="disp-formula" rid="e7">7</xref>, <xref ref-type="disp-formula" rid="e8">8</xref> and <xref ref-type="disp-formula" rid="e10">10</xref>, extracting three kinds of important biological features for proteins separately.</p>
<p>Step4: According to methods proposed in section 2.4 and section 2.5, constructing the TNN based identification model TNNM first, and then, obtaining the predicted criticality scores for proteins through taking the matrix <inline-formula id="inf52">
<mml:math id="m67">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> computed by Eq. <xref ref-type="disp-formula" rid="e13">13</xref> as the input data of TNNM.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Experimental results and analysis</title>
<p>During experiments, we will first divide the dataset of downloaded known PPI data into <italic>K</italic> subsets of proteins with the same size and proportion according to the proportion of essential proteins and non-essential proteins. And then, the <italic>K</italic>-fold cross validation will be adopted to evaluate the prediction performance of TNNM in this section.</p>
<sec id="s3-1">
<title>3.1 Value selection of the parameter <italic>K</italic>
</title>
<p>According to known results (<xref ref-type="bibr" rid="B14">Jung, 2017</xref>), the parameter <italic>K</italic> shall satisfy <italic>K</italic> &#x2248; log(<italic>N</italic>) and <italic>N</italic>/<italic>K</italic> &#x3e; 3&#xa0;<italic>d</italic>ays, where <italic>d</italic> is the number of extracted features. Hence, we can obtain the possible values of <italic>K</italic> as the following <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Values of <italic>K</italic> in different database.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">PPI database</th>
<th align="left">N</th>
<th align="left">3&#xa0;days</th>
<th align="left">K</th>
<th align="left">N/K</th>
<th align="left">N/K &#x3e; 3&#xa0;days</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">DIP2010</td>
<td align="left">5093</td>
<td align="left">18</td>
<td align="left">9</td>
<td align="left">565.89</td>
<td align="left">True</td>
</tr>
<tr>
<td align="left">Gavin</td>
<td align="left">1855</td>
<td align="left">18</td>
<td align="left">8</td>
<td align="left">231.88</td>
<td align="left">True</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>From observing above <xref ref-type="table" rid="T1">table 1</xref>, it is easy to see that the value of <italic>K</italic> shall be nine on the DIP2010 database and eight on the Gavin database.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-2">
<title>3.2 Comparison with representative methods</title>
<p>In this section, TNNM will be compared with 11 advanced competitive methods based on the DIP2010 database. <xref ref-type="fig" rid="F2">Figure 2</xref> shows the comparison results of the numbers of real essential proteins identified by TNNM and 11 recognition methods based on the DIP2010 database. During experiments, proteins will be sorted first in descending order according to predicted scores calculated by each competing methods, such as DC, IC, NC, SC, Pec, POME, CoEWC, ION, CVIM, NPRI, RWHN and TNNM. And then, we will select the top 1%, 5%, 10%, 15%, 20% and 25% proteins as candidate essential proteins. Finally, by comparing with the downloaded dataset of known essential proteins, the number of real essential proteins in the candidate essential proteins identified by each method will be calculated, and used to compare and evaluate the recognition ability of essential proteins of different methods.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Comparison between TNNM and 11 competitive methods based on the DIP 2010. <bold>(A)</bold> The number of key proteins in the top 1% candidate proteins. <bold>(B)</bold> Number of key proteins in the top 5% candidate proteins. <bold>(C)</bold> Number of key proteins in the top 10% candidate proteins. <bold>(D)</bold> Number of key proteins in the top 15% candidate proteins. <bold>(E)</bold> Number of key proteins in the top 20% candidate proteins. <bold>(F)</bold> Number of key proteins in the top 25% candidate proteins. In above figures, the number in parentheses represents the number of proteins in each interval.</p>
</caption>
<graphic xlink:href="fgene-13-1087294-g002.tif"/>
</fig>
<p>From observing <xref ref-type="fig" rid="F2">Figure 2</xref>, it is easy to know that TNNM outperforms all these competitive state-of-the-art prediction methods significantly based on the experimental results on DIP2010 database. And especially, among the top 1%, top 5%, and top 10% candidate key proteins, TNNM can achieve recognition accuracies of 96.07%, 99.21%, and 97.45% separately, which are all higher than 97%. Besides, among the top 15% and 20% candidate key proteins, the recognition accuracy rates of TNNM are all higher than 94%. Even for the top 25% candidate proteins, TNNM can maintain the accuracy rate above 85%.</p>
</sec>
<sec id="s3-3">
<title>3.3 Evaluation based on the folding knife curve</title>
<p>In this section, the Jackknife method (<xref ref-type="bibr" rid="B11">Holman et al., 2009</xref>) will be used, based on the top 1000 candidate essential proteins predicted on the DIP2010 database by TNNM and 11 competitive methods, to compare their performance in identifying essential proteins. Comparison results are shown in <xref ref-type="fig" rid="F3">Figure 3</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Comparison results between TNNM and 11 competitive methods based on the jackknife method and the DIP2010 database. <bold>(A)</bold> Shows comparison results between BC, CC, DC, IC, NC, SC and TNNM, and <bold>(B)</bold> shows comparison results between Pec, CoEWC, POEM, ION, CVIM and TNNM. The <italic>X</italic>-axis represents the number of potential essential proteins predicted by each method, while the <italic>Y</italic>-axis represents the cumulative count of real essential proteins.</p>
</caption>
<graphic xlink:href="fgene-13-1087294-g003.tif"/>
</fig>
<p>From <xref ref-type="fig" rid="F3">Figure 3A</xref> and <xref ref-type="fig" rid="F3">Figure 3B</xref>, it can be seen that with the increasing of the number of predicted proteins, the gap in term of essential protein recognition performance between TNNM and these competitive methods will grow wider and wider, which means that the prediction performance of TNNM is much better than that of these 11 competitive methods.</p>
</sec>
<sec id="s3-4">
<title>3.4 Comparison between TNN and representative neural networks</title>
<p>In order to verify the contribution of TNN to TNNM, we will compare TNN with six commonly used neural networks in this section based on the DIP2010 database, and comparison results is illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref>. During experiments, in TNNM, the TNN layer will be replaced by competitive neural networks such as Linear, CNN, RNN, GRU, LSTM and Transformer in turn. And then, the top 1%, 5%, 10%, 15%, 20% and 25% predicted proteins will be compared with downloaded dataset of known essential proteins. Finally, the number of real essential proteins in the candidate essential proteins identified by each method will be calculated, and used to compare and evaluate the recognition ability of essential proteins of different methods.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Performance comparison of TNNM by adopting TNN and six representative neural networks based on the DIP2010 database. <bold>(A)</bold> The number of essential proteins in the top 1% candidate proteins. <bold>(B)</bold> The number of essential proteins in the top 5% candidate proteins. <bold>(C)</bold> The number of essential proteins in the top 10% candidate proteins. <bold>(D)</bold> The number of essential proteins in the top 15% candidate proteins. <bold>(E)</bold> The number of essential proteins in the top 20% candidate proteins. <bold>(F)</bold> The number of essential proteins in the top 25% candidate proteins. In above figures, the number in parentheses represents the number of proteins in each interval.</p>
</caption>
<graphic xlink:href="fgene-13-1087294-g004.tif"/>
</fig>
<p>From <xref ref-type="fig" rid="F4">Figure 4</xref>, it is easy to see that if the TNN in TNNM is replaced by Linear, CNN, RNN, GRU, LSTM or Transformer, the prediction performance of TNNM will turn to be poorer, which reflects that TNN plays a positive role in the prediction performance of TNNM.</p>
</sec>
<sec id="s3-5">
<title>3.5 Recognition performance based on the gavin database</title>
<p>To prove the universal applicability of TNNM, in this section, we further compared TNNM with 11 competitive recognition methods based on the Gavin database, and illustrated comparison results in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Performance comparison between TNNM and 11 competitive algorithms based on the Gavin database. <bold>(A)</bold> The number of essential proteins in the top 1% candidate proteins. <bold>(B)</bold> The number of essential proteins in the top 5% candidate proteins. <bold>(C)</bold> The number of essential proteins in the top 10% candidate proteins. <bold>(D)</bold> The number of essential proteins in the top 15% candidate proteins. <bold>(E)</bold> The number of essential proteins in the top 20% candidate proteins. <bold>(F)</bold> The number of essential proteins in the top 25% candidate proteins. In above figures, the number in parentheses represents the number of proteins in each interval.</p>
</caption>
<graphic xlink:href="fgene-13-1087294-g005.tif"/>
</fig>
<p>From <xref ref-type="fig" rid="F5">Figure 5</xref>, it is obvious that the recognition performance of TNNM is significantly superior to all these 11 competing methods. Especially, among the top 1%, top 5%, and top 10% candidate essential proteins, TNNM can achieve accuracies of 94.73%, 98.92%, and 96.77% respectively, which are all higher than 94%. Besides, among the top 15% and 20% candidate essential proteins, the recognition accuracies of TNNM are higher than 86% as well. Even in the top 25% candidate proteins, TNNM can also maintain its accuracy rate above 85%. Hence, we can say that TNNM has much better universal applicability than all these competitive methods.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>In this manuscript, a novel prediction model named TNNM was designed to identify essential proteins, and through intensive experiments, we demonstrated that TNNM outperformed various advanced algorithms in terms of both prediction accuracies and decline rate of accuracies. The major contributions of TNNM include: 1) we designed a new Transfer Neural Network (TNN), which can extract raw features from multiple biological information of proteins efficiently. 2) we introduced a TNN layer into the prediction model TNNM, which can not only improve the prediction accuracy of TNNM, but also enhance both the robustness and the non-linear expression ability of TNNM. Intensive experiments have demonstrated that TNNM can achieve satisfactory prediction accuracy in different databases, and simultaneously, TNN plays an irreplaceable positive role in TNNM as well.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>SC and CH produced the main ideas, and did the modeling, computation and analysis and also wrote the manuscript. LW and SZ provided supervision and effective scientific advice and related ideas, research design guidance, and added value to the article through editing and contributing completions. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was partly sponsored by the National Natural Science Foundation of China (No.62272064, No.61873221) and the Key project of Changsha Science and technology Plan (No. KQ2203001).</p>
</sec>
<ack>
<p>The authors thank the anonymous referees for suggestions that helped improve the paper substantially.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2022.1087294/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2022.1087294/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet2.PDF" id="SM1" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet4.PDF" id="SM2" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet3.PDF" id="SM3" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.PDF" id="SM4" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet5.PDF" id="SM5" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.XLSX" id="SM6" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Acencio</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Lemke</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Towards the prediction of essential genes by integration of network topology, cellular localization and biological process information</article-title>. <source>BMC Bioinforma.</source> <volume>10</volume> (<issue>1</issue>), <fpage>290</fpage>&#x2013;<lpage>307</lpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-10-290</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Binder</surname>
<given-names>J. X.</given-names>
</name>
<name>
<surname>Pletscher-Frankild</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tsafou</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Stolte</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>O&#x27;Donoghue</surname>
<given-names>S. I.</given-names>
</name>
<name>
<surname>Schneider</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Compartments: Unification and visualization of protein subcellular localization evidence</article-title>. <source>Database</source> <volume>2014</volume>, <fpage>bau012</fpage>. <pub-id pub-id-type="doi">10.1093/database/bau012</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F. X.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Identifying protein complexes based on multiple topological structures in PPI networks</article-title>. <source>IEEE Trans Nanobio- Sci.</source> <volume>12</volume> (<issue>3</issue>), <fpage>165</fpage>&#x2013;<lpage>172</lpage>. <pub-id pub-id-type="doi">10.1109/TNB.2013.2264097</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Kuang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pei</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A novel model for predicting essential proteins based on heterogeneous protein-domain network</article-title>. <source>IEEE Access</source> <volume>8</volume> (<issue>99</issue>), <fpage>8946</fpage>&#x2013;<lpage>8958</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2964571</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cherry</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Adler</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ball</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chervitz</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Dwight</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Hester</surname>
<given-names>E. T.</given-names>
</name>
<etal/>
</person-group> (<year>1998</year>). <article-title>SGD: Saccharomyces genome database</article-title>. <source>Nucleic Acids Res.</source> <volume>26</volume>, <fpage>73</fpage>&#x2013;<lpage>79</lpage>. <pub-id pub-id-type="doi">10.1093/nar/26.1.73</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Network embedding the protein-pro tein interaction network for human essential genes identification</article-title>. <source>Genes</source> <volume>11</volume> (<issue>2</issue>), <fpage>153</fpage>. <pub-id pub-id-type="doi">10.3390/genes11020153</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gabriel</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kristoffer</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kostler</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Messina</surname>
<given-names>D. N.</given-names>
</name>
<name>
<surname>Roopra</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>InParanoid 7: New algorithms and tools for eukaryotic orthology analysis</article-title>. <source>Nucleic Acids Res.</source> <volume>38</volume>, <fpage>D196</fpage>&#x2013;<lpage>D203</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkp931</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gavin</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Aloy</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Grandi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Krause</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Boesche</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Marzioch</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2006</year>). <article-title>Proteome survey reveals modularity of the yeast cell machinery</article-title>. <source>Nature</source> <volume>440</volume> (<issue>7084</issue>), <fpage>631</fpage>&#x2013;<lpage>636</lpage>. <pub-id pub-id-type="doi">10.1038/nature04532</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hahn</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Kern</surname>
<given-names>A. D.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Comparative genomics of centrality and essentiality in three eukaryotic protein-interaction networks</article-title>. <source>Mol. Biol. Evol.</source> <volume>22</volume> (<issue>4</issue>), <fpage>803</fpage>&#x2013;<lpage>806</lpage>. <pub-id pub-id-type="doi">10.1093/molbev/msi072</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Holman</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Davis</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Foster</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Carlow</surname>
<given-names>C. K. S.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Computational prediction of essential genes in an unculturable endosymbiotic bacterium, Wolbachia of <italic>Brugia malayi</italic>
</article-title>. <source>BMC Microbiol.</source> <volume>9</volume> (<issue>1</issue>), <fpage>243</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2180-9-243</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jeong</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Mason</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Al</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Oltvai</surname>
<given-names>Z. N.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Lethality and centrality in protein networks</article-title>. <source>Nature</source> <volume>411</volume> (<issue>6833</issue>), <fpage>41</fpage>&#x2013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1038/35075138</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiawei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yi</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Peter</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Identification of essential proteins based on a new combination of local interaction density and protein complexes</article-title>. <source>Plos One</source> <volume>10</volume> (<issue>6</issue>), <fpage>e0131418</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0131418</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jung</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Multiple predicting K -fold cross-validation for model selection</article-title>. <source>J. Nonparametric Statistics</source> <volume>30</volume>, <fpage>197</fpage>&#x2013;<lpage>215</lpage>. <pub-id pub-id-type="doi">10.1080/10485252.2017.1404598</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Keretsu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sarmah</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Weighted edge based clustering to identify protein complexes in protein protein inter- action networks incorporating gene expression profile</article-title>. <source>Comput. Biol. Chem.</source> <volume>65</volume>, <fpage>69</fpage>&#x2013;<lpage>79</lpage>. <pub-id pub-id-type="doi">10.1016/j.compbiolchem.2016.10.001</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lei</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Artificial fish swarm optimization based method to identify essential proteins</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform.</source> <volume>17</volume>, <fpage>495</fpage>&#x2013;<lpage>505</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2018.2865567</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Niu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F. X.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>United complex centrality for identification of essential proteins from PPI networks</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform.</source> <volume>14</volume> (<issue>2</issue>), <fpage>370</fpage>&#x2013;<lpage>380</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2015.2394487</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F. X.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A topology potential-based method for identifying essential proteins from PPI networks</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform.</source> <volume>12</volume> (<issue>2</issue>), <fpage>372</fpage>&#x2013;<lpage>383</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2014.2361350</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. X.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>A local average connectivity-based method for identifying essential proteins from the network level</article-title>. <source>Comput. Biol. Chem.</source> <volume>35</volume> (<issue>3</issue>), <fpage>143</fpage>&#x2013;<lpage>150</lpage>. <pub-id pub-id-type="doi">10.1016/j.compbiolchem.2011.04.002</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. X.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>A new essential protein discovery method based on the integration of protein-protein interaction and gene expression data</article-title>. <source>BMC Syst. Biol.</source> <volume>6</volume> (<issue>1</issue>), <fpage>15</fpage>. <pub-id pub-id-type="doi">10.1186/1752-0509-6-15</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Pei</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>An iteration method for identifying yeast essential proteins from weighted PPI network based on topological and functional features of proteins</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>90792</fpage>&#x2013;<lpage>90804</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2993860</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A new algorithm for essential proteins identification based on the integration of protein complex co-expression information and edge clustering coefficient</article-title>. <source>Int. J. Data Min. Bioinform</source> <volume>12</volume> (<issue>3</issue>), <fpage>257</fpage>&#x2013;<lpage>274</lpage>. <pub-id pub-id-type="doi">10.1504/ijdmb.2015.069654</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maliackal</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Amy</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Donald</surname>
<given-names>E. I.</given-names>
</name>
<name>
<surname>Sui</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>High-Betweenness proteins in the yeast protein interaction network</article-title>. <source>J. Biomed. Biotechnol.</source> <volume>2005</volume> (<issue>2</issue>), <fpage>96</fpage>&#x2013;<lpage>103</lpage>. <pub-id pub-id-type="doi">10.1155/JBB.2005.96</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maslov</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sneppen</surname>
<given-names>K.</given-names>
</name>
</person-group>, (<year>2002</year>). <article-title>Specificity and stability in topology of protein networks</article-title>. <source>Science</source> <volume>296</volume>, <fpage>910</fpage>&#x2013;<lpage>913</lpage>. <pub-id pub-id-type="doi">10.1126/science.1065103</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mewes</surname>
<given-names>H. W.</given-names>
</name>
<name>
<surname>Frishman</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Mayer</surname>
<given-names>K. F. X.</given-names>
</name>
<name>
<surname>Munsterkotter</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Noubibou</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Pagel</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2006</year>). <article-title>MIPS: Analysis and annotation of proteins from whole genomes in 2005</article-title>. <source>Nucleic Acids Res.</source> <volume>34</volume>, <fpage>D169</fpage>&#x2013;<lpage>D172</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkj148</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. X.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>UDoNC: An algorithm for identifying essential proteins based on protein domains and protein-protein interaction networks</article-title>. <source>IEEE/ACM Trans Comput. Biol. Bioinforma.</source> <volume>12</volume> (<issue>2</issue>), <fpage>276</fpage>&#x2013;<lpage>288</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2014.2338317</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F. X.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Iteration method for predicting essential proteins based on orthology and protein-protein interaction networks</article-title>. <source>BMC Syst. Biol.</source> <volume>6</volume> (<issue>1</issue>), <fpage>87</fpage>. <pub-id pub-id-type="doi">10.1186/1752-0509-6-87</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pr&#x17e;ulj</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wigle</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Jurisica</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Functional topology in a network of protein interactions</article-title>. <source>Bioinformatics</source> <volume>20</volume> (<issue>3</issue>), <fpage>340</fpage>&#x2013;<lpage>348</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btg415</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Prediction of essential proteins based on local interaction density</article-title>. <source>IEEE/ACM Trans Computa- tional Biol. Bioinforma.</source> <volume>13</volume> (<issue>6</issue>), <fpage>1170</fpage>&#x2013;<lpage>1182</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2015.2509989</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="web">
<collab>StanfordMedicine</collab> (<year>2012</year>). <article-title>Saccharomyces genome deletion project</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://yeastdeletion.stanford.edu/">http://yeastdeletion.stanford.edu/</ext-link> </comment>(<comment>Accessed June 20, 2012)</comment>.</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stefan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Stadler</surname>
<given-names>P. F.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Centers of complex networks</article-title>. <source>J. Theor. Biol.</source> <volume>223</volume> (<issue>1</issue>), <fpage>45</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1016/s0022-5193(03)00071-7</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stephenson</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zelen</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1989</year>). <article-title>Rethinking centrality: Methods and examples</article-title>. <source>Soc. Netw.</source> <volume>11</volume>, <fpage>1</fpage>&#x2013;<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1016/0378-8733(89)90016-6</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Predicting essential proteins based on weighted degree centrality</article-title>. <source>IEEE/ACM Trans Comput. Biol. Bioinforma.</source> <volume>11</volume> (<issue>2</issue>), <fpage>407</fpage>&#x2013;<lpage>418</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2013.2295318</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tu</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Kudlicki</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rowicka</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>McKnight</surname>
<given-names>S. L.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Logic of the yeast metabolic cycle: Temporal compartmentalization of cellular processes</article-title>. <source>Science</source> <volume>310</volume> (<issue>5751</issue>), <fpage>1152</fpage>&#x2013;<lpage>1158</lpage>. <pub-id pub-id-type="doi">10.1126/science.1120499</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J. X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Identification of essential proteins based on edge clustering coefficient</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform.</source> <volume>9</volume> (<issue>4</issue>), <fpage>1070</fpage>&#x2013;<lpage>1080</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2011.147</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xenarios</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Salwinski</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>X. J.</given-names>
</name>
<name>
<surname>Higney</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Eisenberg</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>DIP, the database of interacting proteins: A research tool for studying cellular networks of protein interactions</article-title>. <source>Nucleic Acids Res.</source> <volume>30</volume> (<issue>1</issue>), <fpage>303</fpage>&#x2013;<lpage>305</lpage>. <pub-id pub-id-type="doi">10.1093/nar/30.1.303</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A novel method for identifying essential genes by fusing dynamic protein&#x2013;protein interactive networks</article-title>. <source>Genes</source> <volume>10</volume> (<issue>1</issue>), <fpage>31</fpage>. <pub-id pub-id-type="doi">10.3390/genes10010031</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>DEG 5.0, a database of essential genes in both prokaryotes and eukaryotes</article-title>. <source>Nucleic Acids Res.</source> <volume>37</volume>, <fpage>D455</fpage>&#x2013;<lpage>D458</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkn858</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>A new method for the discovery of essential proteins</article-title>. <source>PLoS One</source> <volume>8</volume> (<issue>3</issue>), <fpage>e58763</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0058763</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A novel method to predict essential proteins based on diffusion distance networks</article-title>. <source>IEEE ACCESS</source> <volume>8</volume>, <fpage>29385</fpage>&#x2013;<lpage>29394</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2972922</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A novel computational approach for identifying essential proteins from multiplex biological networks</article-title>. <source>Front. Genet.</source> <volume>11</volume>, <fpage>343</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2020.00343</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>An iteration method for identifying yeast essential proteins from heterogeneous network</article-title>. <source>BMC Bioinforma.</source> <volume>20</volume>, <fpage>355</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-2930-2</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>B. H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F. X.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Prediction of essential proteins based on overlapping essential modules</article-title>. <source>IEEE Trans. Nanobioscience</source> <volume>13</volume> (<issue>4</issue>), <fpage>415</fpage>&#x2013;<lpage>424</lpage>. <pub-id pub-id-type="doi">10.1109/TNB.2014.2337912</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>