<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fgene.2021.656140</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>TMP- SSurface2: A Novel Deep Learning-Based Surface Accessibility Predictor for Transmembrane Protein Sequence</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Zhe</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1110806/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Gong</surname> <given-names>Yingli</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1168716/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Guo</surname> <given-names>Yuanzhao</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1188969/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Xiao</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1205066/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Lu</surname> <given-names>Chang</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1253789/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhang</surname> <given-names>Li</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1205690/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Han</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/841171/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Computer Science and Engineering, Changchun University of Technology</institution>, <addr-line>Changchun</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>School of Information Science and Technology, Institute of Computational Biology, Northeast Normal University</institution>, <addr-line>Changchun</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Shanghai Mental Health Center, Shanghai Jiao Tong University School of Medicine, School of Biomedical Engineering, Shanghai Jiao Tong University</institution>, <addr-line>Shanghai</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>College of Intelligence and Computing, Tianjin University</institution>, <addr-line>Tianjin</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>College of Computing and Software Engineering, Kennesaw State University</institution>, <addr-line>Kennesaw, GA</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Wang Guohua, Harbin Institute of Technology, China</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Hongjie Wu, Suzhou University of Science and Technology, China; Xiujuan Lei, Shaanxi Normal University, China</p></fn>
<corresp id="c001">&#x002A;Correspondence: Li Zhang, <email>lizhang@ccut.edu.cn</email></corresp>
<fn fn-type="other" id="fn004"><p>This article was submitted to Computational Genomics, a section of the journal Frontiers in Genetics</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>15</day>
<month>03</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>656140</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>01</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>22</day>
<month>02</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2021 Liu, Gong, Guo, Zhang, Lu, Zhang and Wang.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Liu, Gong, Guo, Zhang, Lu, Zhang and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Transmembrane protein (TMP) is an important type of membrane protein that is involved in various biological membranes related biological processes. As major drug targets, TMPs&#x2019; surfaces are highly concerned to form the structural biases of their material-bindings for drugs or other biological molecules. However, the quantity of determinate TMP structures is still far less than the requirements, while artificial intelligence technologies provide a promising approach to accurately identify the TMP surfaces, merely depending on their sequences without any feature-engineering. For this purpose, we present an updated TMP surface residue predictor TMP-SSurface2 which achieved an even higher prediction accuracy compared to our previous version. The method uses an attention-enhanced Bidirectional Long Short Term Memory (BiLSTM) network, benefiting from its efficient learning capability, some useful latent information is abstracted from protein sequences, thus improving the Pearson correlation coefficients (CC) value performance of the old version from 0.58 to 0.66 on an independent test dataset. The results demonstrate that TMP-SSurface2 is efficient in predicting the surface of transmembrane proteins, representing new progress in transmembrane protein structure modeling based on primary sequences. TMP-SSurface2 is freely accessible at <ext-link ext-link-type="uri" xlink:href="https://github.com/NENUBioCompute/TMP-SSurface-2.0">https://github.com/NENUBioCompute/TMP-SSurface-2.0</ext-link>.</p>
</abstract>
<kwd-group>
<kwd>transmembrane protein</kwd>
<kwd>deep learning</kwd>
<kwd>relative accessible surface area</kwd>
<kwd>attention mechanism</kwd>
<kwd>long short term memory</kwd>
</kwd-group>
<contract-num rid="cn001">JJKH20191309KJ</contract-num>
<contract-num rid="cn003">2412019FZ048</contract-num>
<contract-sponsor id="cn001">Education Department of Jilin Province<named-content content-type="fundref-id">10.13039/501100010211</named-content></contract-sponsor><contract-sponsor id="cn002">Jilin Scientific and Technological Development Program<named-content content-type="fundref-id">10.13039/501100013061</named-content></contract-sponsor>
<contract-sponsor id="cn003">Fundamental Research Funds for the Central Universities<named-content content-type="fundref-id">10.13039/501100012226</named-content></contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="8"/>
<equation-count count="12"/>
<ref-count count="48"/>
<page-count count="10"/>
<word-count count="0"/>
</counts>
</article-meta>
</front>
<body>
<sec id="S1">
<title>Introduction</title>
<p>Transmembrane Proteins (TMPs) are the gatekeepers to the cells and control the flow of molecules and information across the membrane (<xref ref-type="bibr" rid="B9">Goddard et al., 2015</xref>). The function of MPs is crucial for a wide range of physiological processes like signal transduction, electron transfer, and neurotransmitter transport (<xref ref-type="bibr" rid="B30">Roy, 2015</xref>). They span the entire biological membrane with segments exposed on both the outside and inside of aqueous spaces and have a profound effect on the pharmacokinetics of various drugs (<xref ref-type="bibr" rid="B26">Padmanabhan, 2014</xref>), cell mechanics regulation (<xref ref-type="bibr" rid="B35">Stillwell, 2016</xref>), molecule transport (<xref ref-type="bibr" rid="B25">Oguro and Imaoka, 2019</xref>; <xref ref-type="bibr" rid="B27">Puder et al., 2019</xref>) and so on. Also, the evidence is pointing toward TMPs associating with a wide range of diseases, including dyslipidemia, autism, epilepsy (<xref ref-type="bibr" rid="B28">Rafi et al., 2019</xref>; <xref ref-type="bibr" rid="B37">Tanabe et al., 2019</xref>; <xref ref-type="bibr" rid="B41">Weihong et al., 2019</xref>), and multiple cancers (<xref ref-type="bibr" rid="B24">Moon et al., 2019</xref>; <xref ref-type="bibr" rid="B44">Yan et al., 2019</xref>). Moreover, based on the current therapeutics market, it is evaluated that more than one-third of future drug targets would be TMPs (<xref ref-type="bibr" rid="B36">Studer et al., 2014</xref>) and the surface of TMPs is always identified as an interaction interface according to statistical reports (<xref ref-type="bibr" rid="B22">Lu et al., 2019b</xref>).</p>
<p>The quantitative approach for measuring the exposure of residues is to calculate the relatively accessible surface area (rASA) of the residues (<xref ref-type="bibr" rid="B38">Tarafder et al., 2018</xref>). rASA reflects the exposure of a single residue to the solvent, making it a directive reference of protein structures. Predicting rASA of TMPs is a rewarding task to biological problems like function annotation, structural modeling, and drug discovery (<xref ref-type="bibr" rid="B48">Zhang et al., 2019</xref>). In this case, accurate sequence-based computational rASA predictors need to be developed urgently to provide more support for structure prediction.</p>
<p>Many rASA predictors had been reported performing well on soluble proteins but the structural differences between the two protein types are significant, especially when interacting with the phospholipid bilayer. There are a few methods released to predict rASA of TMP residues based on their primary sequences. <xref ref-type="bibr" rid="B3">Beuming and Weinstein (2004)</xref> firstly proposed a knowledge-based method to predict the binary state (buried or exposed) of residues in terms of a preassigned cutoff in the transmembrane region of &#x03B1;-TMPs, it is the first rASA predictor of TMPs. After that, a series of methods using machine learning including SVC, SVR, and SVM emerged, which can be automatically divided into two categories according to their functionality: binary classifier and rASA real value predictor. All of these machine learning-based methods were designed for &#x03B1;-TMPs, some methods were just effective with the transmembrane region of the proteins restrictedly, such as TMX (<xref ref-type="bibr" rid="B19">Liwicki et al., 2007</xref>; <xref ref-type="bibr" rid="B40">Wang et al., 2011</xref>), TMexpoSVC (<xref ref-type="bibr" rid="B16">Lai et al., 2013</xref>), and TMexpoSVR (<xref ref-type="bibr" rid="B16">Lai et al., 2013</xref>), only MPRAP (<xref ref-type="bibr" rid="B12">Illerg&#x00E5;rd et al., 2010</xref>) and MemBrane-Rasa (<xref ref-type="bibr" rid="B43">Xiao and Shen, 2015</xref>; <xref ref-type="bibr" rid="B45">Yin et al., 2018</xref>) were able to predict rASA of the entire sequence. Our previous work (<xref ref-type="bibr" rid="B21">Lu et al., 2019a</xref>) combined Inception blocks with CapsNet, proving that deep learning takes many advantages for the prediction but there is still room for accuracy improvement.</p>
<p>The predictors mentioned above including our previous version all applied common methods like SVM and feed-forward neural networks. However, these non-sequential models do not naturally handle sequential data and have trouble capturing long-term dependencies of a certain sequence (<xref ref-type="bibr" rid="B34">S&#x00F8;nderby and Winther, 2014</xref>), thus being a bottleneck in rASA prediction tasks, calling for more suitable models. In recent years, various Long Short Term Memory (LSTM) models have already employed to learn temporal information of protein secondary structure, confirming the amazing ability of LSTM in handling protein sequences through experimental verification (<xref ref-type="bibr" rid="B34">S&#x00F8;nderby and Winther, 2014</xref>; <xref ref-type="bibr" rid="B33">S&#x00F8;nderby et al., 2015</xref>; <xref ref-type="bibr" rid="B11">Heffernan et al., 2017</xref>). When it comes to sequence level issues, LSTM is definitely a better choice. Furthermore, previous tools did not have measures for reinforcing effective features, resulting in lower inefficiency of model learning. Additionally, various input restrictions and long waiting times also made the predictors less friendly to users.</p>
<p>In this study, we proposed an attention-enhanced bidirectional LSTM network named TMP-SSurface2 to predict rASA of TMPs at the residue level, which was implemented on top of the CNN-based Z-coordinate predictor TM-ZC (<xref ref-type="bibr" rid="B20">Lu et al., 2020</xref>). TMP-SSurface2 was trained and tested against the non-redundant benchmark dataset we created with primary sequences as input, improving the Pearson correlation coefficients (CC) value performance of the old version from 0.584 to 0.659, and reduced the mean absolute error (MAE) from 0.144 to 0.140. Apart from state-of-the-art prediction accuracy, TMP-SSurface2 also achieved the highest output efficiency compared to existing methods with no length restriction of input. The source codes of TMP-SSurface2 and the corresponding materials can be freely accessed at <ext-link ext-link-type="uri" xlink:href="https://github.com/NENUBioCompute/TMP-SSurface-2.0">https://github.com/NENUBioCompute/TMP-SSurface-2.0</ext-link>.</p>
</sec>
<sec id="S2" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="S2.SS1">
<title>Benchmark Dataset</title>
<p>A total of 4,007 TMPs were downloaded from PDBTM (version: 2019-01-04). We removed the proteins which contained unknown residues such as &#x201C;X&#x201D; or whose length was less than 30 residues since too short a sequence may not form a representative structure. To avoid the redundancy of data and reduce the influence of homology bias, CD-HIT (<xref ref-type="bibr" rid="B18">Li and Godzik, 2006</xref>) was utilized to eliminate the duplicate structures with a 30% sequence identity cut-off resulting in 704 protein chains (618 &#x03B1; protein chains and 86 &#x03B2; protein chains) left. These proteins were randomly divided into a training set of 604 proteins, a validation set of 50 proteins, and a test set of 50 proteins, respectively. In this work, five-fold cross-validation experiments were performed and the results were compared against other predictors.</p>
<p>The residue solvent accessibility surface area (ASA) is defined as the surface accessibility of a certain residue when exposed to water or lipid. Several tools are capable of calculating ASA, such as Naccess (<xref ref-type="bibr" rid="B17">Lee and Richards, 1971</xref>), PSAIA (<xref ref-type="bibr" rid="B23">Mihel et al., 2008</xref>), MSMS (<xref ref-type="bibr" rid="B31">Sanner et al., 1996</xref>), and Dictionary of Protein Secondary Structure (DSSP) (<xref ref-type="bibr" rid="B14">Kabsch and Sander, 1983</xref>).</p>
<p>The ASA of residues was calculating by DSSP, using a probe with a radius of 1.4 &#x00C5;. A residue&#x2019;s ASA is divided by the corresponding standard maximum accessible surface area (MaxASA), which is the ASA of extended tri-peptides (Gly-X-Gly) (<xref ref-type="bibr" rid="B39">Tien et al., 2013</xref>), to generate rASA values. rASA can be calculated by the following formula:</p>
<disp-formula id="S2.E1">
<label>(1)</label>
<mml:math id="M1">
<mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
<sec id="S2.SS2">
<title>Features and Encoding</title>
<p>To make the prediction more accurate, it is vital to provide useful features to deep learning-based methods. In our experiments, we carefully select two encoding features to represent the protein fragment: one-hot code and PSSM.</p>
<p>Prediction of transmembrane protein residues&#x2019; rASA is a classical regression problem, which can be formulated as follows: for a given primary sequence of a TMP, a sliding window of <italic>k</italic> residues was used to predict the real value of central residue&#x2019;s rASA. For instance, if <italic>k</italic> is 19, then each protein is subsequently sliced into fragments of 19 amino acids.</p>
<p>For each residue in protein sequences, one-hot code is a 20-dimension vector (see <xref ref-type="fig" rid="F1">Figure 1</xref>), using a 19 dimensional &#x201C;0&#x201D; vector with a &#x201C;1&#x201D; corresponding to the amino acid at the index of a certain protein sequence. In this way, each protein fragment can be mapped into an exclusive and undisturbed coding within its relative position information (<xref ref-type="bibr" rid="B10">He et al., 2018</xref>). It is proved that a one-hot code is extremely easy to generate while effective for protein function prediction associated problems (<xref ref-type="bibr" rid="B6">Ding and Li, 2015</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption><p>One-hot code of protein residues.</p></caption>
<graphic xlink:href="fgene-12-656140-g001.tif"/>
</fig>
<p>A position-specific scoring matrix (PSSM) reflects the evolutionary profile of the protein sequence based on a search against a certain database. Highly conserved regions during evolution are always functional regions according to the researches (<xref ref-type="bibr" rid="B13">Jeong et al., 2010</xref>; <xref ref-type="bibr" rid="B47">Zeng et al., 2019</xref>), so PSSM has been widely used in many bioinformatics problems and achieves commendable results. In our study, PSI-BLAST (<xref ref-type="bibr" rid="B1">Altschul et al., 1997</xref>) was utilized to generate PSSM searching against the uniref50 (version: 2019-01-16) database with 3 iterations and a 0.01 <italic>E</italic>-value cutoff. For a given protein sequence, the PSSM feature is a 20-dimension matrix with each column representing a profile and each row representing a residue.</p>
<p>As shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, each amino acid in the protein sequence is represented as a vector of 41 numbers, including 20 from one-hot code (represented as binary numbers), 20 from PSSM, and 1 Noseq label (representing a gap) (<xref ref-type="bibr" rid="B7">Fang et al., 2018</xref>) in the last column to improve the prediction performance of the residues located on both ends of protein while using a sliding window. In order to facilitate the window sliding operation, the first and last parts of the sequence are, respectively, padded with 1 and 0 s, which length is half of the sliding windows size. For each protein with L residues, we can get L matrices.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption><p>Encoding features as the model input.</p></caption>
<graphic xlink:href="fgene-12-656140-g002.tif"/>
</fig>
</sec>
<sec id="S2.SS3">
<title>Model Design</title>
<p>In this section, a novel compound deep learning network is presented. <xref ref-type="fig" rid="F3">Figure 3A</xref> shows the proposed pipeline. The input features for TMP-SSurface2 are the one-hot code and the PSSM matrix. The CNN whose structure and parameters are all same as TM-ZC is used to generate the Z-coordinate of TMP residues. Z-coordinate, which is an important constituent in the field of MP structure prediction, is often implemented to stand for a residue&#x2019;s relative position concerning the membrane (<xref ref-type="bibr" rid="B45">Yin et al., 2018</xref>). After that, the final feature map containing a one-hot code, PSSM, and Z-coordinate will be put into a bidirectional LSTM (BiLSTM) network for training and testing.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption><p><bold>(A)</bold> Pipeline of the deep learning model. <bold>(B)</bold> The attention-enhanced bidirectional LSTM network.</p></caption>
<graphic xlink:href="fgene-12-656140-g003.tif"/>
</fig>
<p>To further optimize the model, we also attached an attention mechanism (<xref ref-type="bibr" rid="B2">Baron-Cohen, 1995</xref>) layer to the top of BiLSTM, which is motivated by how we pay visual attention to different regions of an image or correlate words in one sentence, to help LSTM focus on a certain region that relatively deserves more attention. The detailed structure of the mentioned LSTM network is shown in <xref ref-type="fig" rid="F3">Figure 3B</xref>.</p>
<p>Formula (2) to formula (9) describe the forward recursions for a single LSTM layer, where &#x2299; equals to the elementwise multiplication, <italic>x_t</italic> means input from the previous layer,<italic>i</italic><sub><italic>t</italic></sub>,<italic>f</italic><sub><italic>t</italic></sub>, <italic>o_t</italic> represent &#x201C;input gate,&#x201D; &#x201C;forget gate&#x201D; and &#x201C;output gate,&#x201D; respectively.<italic>h</italic><sub><italic>t</italic>&#x2212;<italic>rec</italic></sub> stands for the output forwarded to the next time slice, and <italic>h_t</italic> is passed upwards in a multilayer LSTM (<xref ref-type="bibr" rid="B34">S&#x00F8;nderby and Winther, 2014</xref>). Attention neural networks have recently demonstrated popularity in a wide range of tasks ranging from natural language processing to computer vision (<xref ref-type="bibr" rid="B4">Chorowski et al., 2014</xref>; <xref ref-type="bibr" rid="B29">Rockt&#x00E4;schel et al., 2015</xref>; <xref ref-type="bibr" rid="B32">Sharma et al., 2015</xref>). Inspired by these projects, we attached an attention mechanism to LSTM for feature capturing. As shown in formula (10), the combination of attention mechanism enables the model to re-assign the weight (<italic>W</italic><sub><italic>att</italic></sub>) of the feature vector (<italic>V</italic>), indicating that the next output vector (<italic>V</italic>&#x2032;) should focus more on which part of the input sequence, and then generate the next output according to the focus region.</p>
<disp-formula id="S2.E2">
<label>(2)</label>
<mml:math id="M2">
<mml:mrow>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">&#x03C3;</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>-</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="S2.E3">
<label>(3)</label>
<mml:math id="M3">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">&#x03C3;</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>-</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="S2.E4">
<label>(4)</label>
<mml:math id="M4">
<mml:mrow>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">&#x03C3;</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>-</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:msub>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="S2.E5">
<label>(5)</label>
<mml:math id="M5">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>-</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="S2.E6">
<label>(6)</label>
<mml:math id="M6">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo largeop="true" mathsize="160%" movablelimits="false" stretchy="false" symmetric="true">&#x2299;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>-</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo largeop="true" mathsize="160%" movablelimits="false" stretchy="false" symmetric="true">&#x2299;</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="S2.E7">
<label>(7)</label>
<mml:math id="M7">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo largeop="true" mathsize="160%" movablelimits="false" stretchy="false" symmetric="true">&#x2299;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="S2.E8">
<label>(8)</label>
<mml:math id="M8">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>-</mml:mo>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>e</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>e</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>e</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>o</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>e</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="S2.E9">
<label>(9)</label>
<mml:math id="M9">
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">&#x03C3;</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>z</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mo>-</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="S2.E10">
<label>(10)</label>
<mml:math id="M10">
<mml:mrow>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo largeop="true" mathsize="160%" movablelimits="false" stretchy="false" symmetric="true">&#x2299;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Our model was implemented, trained, and tested using Keras and Tensorflow. Main hyperparameters (sliding window size, training dropout rate, number of LSTM units, and layers of LSTM) were explored. The early stopping and save-best strategy were applied when the validation loss did not reduce in 10 epochs during training time, the process would stop and save the best model parameters. We used Adam optimizer to dynamically transform the learning rate while the model was training. All the experiments were performed using an Nvidia 1080Ti GPU.</p>
</sec>
<sec id="S2.SS4">
<title>Performance Evaluation</title>
<p>To quantitatively evaluate the predictions of TMP-SSurface2, Pearson correlation coefficients (CC) and mean absolute error (MAE) were used in this study. CC undertook the task of measuring the linear correlation between real values and predicting values. CC ranges from &#x2212;1 to 1, where &#x2212;1 indicates an abstract negative correlation, 1 positive correlation, and 0 absolutely no correlation. Formula (11) shows the definition of CC, where L represents the number of residues, <italic>x_i</italic> and <italic>y_i</italic> define the observed and predicted rASA value severally, <inline-formula><mml:math id="INEQ6"><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo>&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula> and <inline-formula><mml:math id="INEQ7"><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>&#x00AF;</mml:mo></mml:mover></mml:math></inline-formula> equal to the corresponding mean value, respectively.</p>
<disp-formula id="S2.E11">
<label>(11)</label>
<mml:math id="M11">
<mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo largeop="true" symmetric="true">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>L</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>-</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>-</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mo largeop="true" symmetric="true">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>L</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>-</mml:mo>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mo largeop="true" symmetric="true">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>L</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>-</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x00AF;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msqrt>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Mean absolute error measures the closeness of prediction values to real values. As shown in formula (12), MAE is defined as the average difference between predicted and observed rASA values of all residues.</p>
<disp-formula id="S2.E12">
<label>(12)</label>
<mml:math id="M12">
<mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2062;</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>L</mml:mi>
</mml:mfrac>
<mml:mo>&#x2062;</mml:mo>
<mml:mrow>
<mml:munderover>
<mml:mo largeop="true" movablelimits="false" symmetric="true">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>L</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>-</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
</sec>
<sec id="S3">
<title>Results</title>
<sec id="S3.SS1">
<title>Feature Analysis</title>
<p>As we all know, it is the features, instead of model structures, determine the upper-performance limit of deep learning. To investigate the different features&#x2019; contribution to the predictor TMP-SSurface2, we tested both independent features used in the predictor and their various combinations on our valid dataset.</p>
<p><xref ref-type="table" rid="T1">Table 1</xref> illustrates that all of the three independent features (Z-coordinate, one-hot, and PSSM) contain useful information for predicting rASA by themselves, among which PSSM achieves the best overall results (CC = 0.631 and MAE = 0.144). It is suggested that PSSM is an important feature in rASA prediction mainly because of the inclusion of evolutionary knowledge. When combining these different features, as was indicated by a former study, the CC values are almost linearly related to the MAE values (<xref ref-type="bibr" rid="B46">Yuan et al., 2006</xref>), the maximum CC values always accompany the minimum MAE. Experimental investigation shows that every single feature made a contribution to the prediction and achieved the most considerable performance (CC = 0.659 and MAE = 0.140) when they were combined.</p>
<table-wrap position="float" id="T1">
<label>TABLE 1</label>
<caption><p>Prediction performance based on individual input features and their various combinations.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Feature</td>
<td valign="top" align="center">CC</td>
<td valign="top" align="center">MAE</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Z-coordinate</td>
<td valign="top" align="center">0.310</td>
<td valign="top" align="center">0.191</td>
</tr>
<tr>
<td valign="top" align="left">one-hot</td>
<td valign="top" align="center">0.417</td>
<td valign="top" align="center">0.180</td>
</tr>
<tr>
<td valign="top" align="left">PSSM</td>
<td valign="top" align="center">0.631</td>
<td valign="top" align="center">0.144</td>
</tr>
<tr>
<td valign="top" align="left">one-hot+PSSM</td>
<td valign="top" align="center">0.641</td>
<td valign="top" align="center">0.142</td>
</tr>
<tr>
<td valign="top" align="left">one-hot+PSSM+ Z-coordinate</td>
<td valign="top" align="center"><bold>0.659</bold></td>
<td valign="top" align="center"><bold>0.140</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<attrib><italic>&#x002A;Bold fonts represent the best experimental results.</italic></attrib>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="S3.SS2">
<title>Hyperparameter Tuning and Model Performance</title>
<p><xref ref-type="table" rid="T2">Tables 2</xref>&#x2013;<xref ref-type="table" rid="T5">5</xref> summarizes the exploration of the attention-enhanced bidirectional LSTM network with various hyperparameters on the validation dataset. The object of doing these experiments was to find out a better configuration of our method. The tested hyperparameters were carefully selected and only the major factors which would greatly influence the model were explored on the validation dataset.</p>
<table-wrap position="float" id="T2">
<label>TABLE 2</label>
<caption><p>Effect of sliding window length on CC performance.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Window Length</td>
<td valign="top" align="center">CC</td>
<td valign="top" align="center">MAE</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">13</td>
<td valign="top" align="center">0.642</td>
<td valign="top" align="center">0.141</td>
</tr>
<tr>
<td valign="top" align="left">15</td>
<td valign="top" align="center">0.641</td>
<td valign="top" align="center">0.143</td>
</tr>
<tr>
<td valign="top" align="left">17</td>
<td valign="top" align="center">0.645</td>
<td valign="top" align="center">0.143</td>
</tr>
<tr>
<td valign="top" align="left">19</td>
<td valign="top" align="center"><bold>0.648</bold></td>
<td valign="top" align="center"><bold>0.140</bold></td>
</tr>
<tr>
<td valign="top" align="left">21</td>
<td valign="top" align="center">0.646</td>
<td valign="top" align="center">0.141</td>
</tr>
<tr>
<td valign="top" align="left">23</td>
<td valign="top" align="center">0.640</td>
<td valign="top" align="center">0.142</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<attrib><italic>&#x002A;Bold fonts represent the best experimental results.</italic></attrib>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T3">
<label>TABLE 3</label>
<caption><p>Effect of dropout rate on CC performance.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Dropout rate</td>
<td valign="top" align="center">Train CC</td>
<td valign="top" align="center">Test CC</td>
<td valign="top" align="center">Test MAE</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">No</td>
<td valign="top" align="center">0.851</td>
<td valign="top" align="center">0.632</td>
<td valign="top" align="center">0.143</td>
</tr>
<tr>
<td valign="top" align="left">0.2</td>
<td valign="top" align="center">0.806</td>
<td valign="top" align="center">0.640</td>
<td valign="top" align="center">0.143</td>
</tr>
<tr>
<td valign="top" align="left">0.3</td>
<td valign="top" align="center"><bold>0.782</bold></td>
<td valign="top" align="center"><bold>0.648</bold></td>
<td valign="top" align="center"><bold>0.140</bold></td>
</tr>
<tr>
<td valign="top" align="left">0.4</td>
<td valign="top" align="center">0.762</td>
<td valign="top" align="center">0.641</td>
<td valign="top" align="center">0.141</td>
</tr>
<tr>
<td valign="top" align="left">0.5</td>
<td valign="top" align="center">0.725</td>
<td valign="top" align="center">0.638</td>
<td valign="top" align="center">0.143</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<attrib><italic>&#x002A;Bold fonts represent the best experimental results.</italic></attrib>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T4">
<label>TABLE 4</label>
<caption><p>Effect of LSTM units&#x2019; number on CC performance.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Num of units</td>
<td valign="top" align="center">CC</td>
<td valign="top" align="center">MAE</td>
<td valign="top" align="center">Num of Parameters</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">500</td>
<td valign="top" align="center">0.639</td>
<td valign="top" align="center">0.142</td>
<td valign="top" align="center">2,191,381</td>
</tr>
<tr>
<td valign="top" align="left">600</td>
<td valign="top" align="center">0.641</td>
<td valign="top" align="center">0.142</td>
<td valign="top" align="center">3,109,591</td>
</tr>
<tr>
<td valign="top" align="left">700</td>
<td valign="top" align="center"><bold>0.648</bold></td>
<td valign="top" align="center"><bold>0.140</bold></td>
<td valign="top" align="center"><bold>4,187,781</bold></td>
</tr>
<tr>
<td valign="top" align="left">800</td>
<td valign="top" align="center">0.643</td>
<td valign="top" align="center">0.143</td>
<td valign="top" align="center">5,425,981</td>
</tr>
<tr>
<td valign="top" align="left">900</td>
<td valign="top" align="center">0.646</td>
<td valign="top" align="center">0.140</td>
<td valign="top" align="center">6,824,181</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<attrib><italic>&#x002A;Bold fonts represent the best experimental results.</italic></attrib>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T5">
<label>TABLE 5</label>
<caption><p>Effect of the number of LSTM layers on CC performance.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">LSTM Layers</td>
<td valign="top" align="center">CC</td>
<td valign="top" align="center">MAE</td>
<td valign="top" align="center">Num of parameters</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">0.648</td>
<td valign="top" align="center">0.140</td>
<td valign="top" align="center">4,187,781</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center"><bold>0.659</bold></td>
<td valign="top" align="center"><bold>0.140</bold></td>
<td valign="top" align="center"><bold>15,953,381</bold></td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center">0.642</td>
<td valign="top" align="center">0.141</td>
<td valign="top" align="center">27,718,981</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">0.646</td>
<td valign="top" align="center">0.141</td>
<td valign="top" align="center">39,484,581</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<attrib><italic>&#x002A;Bold fonts represent the best experimental results.</italic></attrib>
</table-wrap-foot>
</table-wrap>
<p>A sliding window approach is utilized to append useful neighborhood information to improve prediction accuracy. <xref ref-type="table" rid="T2">Table 2</xref> shows how the length of the sliding window affects the performance of our network. Since the contexts fed into the proposed deep learning model relies on the length of the sliding window, the prediction accuracy would be directly influenced by its value. In general, when the window size becoming larger, it will cost more time for training, but the prediction performance may not be better as the window length increases. Historically, if a sliding window was utilized by sequence-based protein structure predicting tasks, the peak of performance often occurred when its length was between about 13 and 23 residues (<xref ref-type="bibr" rid="B7">Fang et al., 2018</xref>; <xref ref-type="bibr" rid="B21">Lu et al., 2019a</xref>). We searched the window length from 13 to 23 by a step of two residues, finding the best result when the number is 19 and it was chosen as the final window length in this section.</p>
<p><xref ref-type="table" rid="T3">Table 3</xref> shows how the dropout rate affects the model performance when the window size is 19. Deep learning neural networks are much easier to overfit a training dataset with few examples, dropout regularization will help reducing overfitting and improve the generalization of deep neural networks (<xref ref-type="bibr" rid="B5">Dahl et al., 2013</xref>). The dropout rates in the range of 0.2&#x2013;0.4 are all acceptable according to the training and testing prediction performance. Finally, we chose 0.3 as our dropout rate, and the concatenation network in our study is regularized using a 30% dropout.</p>
<p>In the LSTM network, the number of LSTM units is also an important parameter, which determines the output dimension of different layers just like ordinary neural networks. When the number of LSTM units in one layer changes, the scale of parameters and prediction accuracy of the model will immediately be affected. To find the best choice of LSTM units, we tried different values at the same time. The results are shown in <xref ref-type="table" rid="T4">Table 4</xref>, we chose 700 as the number of LSTM units in a simple layer.</p>
<p>As it can be seen in <xref ref-type="table" rid="T5">Table 5</xref>, when the LSTM network has two bidirectional layers (i.e., four simple layers, two forward and two backward), the model performs best on the validation set. However, the prediction accuracy of the model may not grow as the number of LSTM layers increases. It is suspected that a large number of model parameters will lead to the overfitting of LSTM on the training set, thus reducing the generalization ability of it.</p>
</sec>
<sec id="S3.SS3">
<title>Comparison With Previous Predictors</title>
<p>In this section, we list the existing methods that can be used to predict the rASA of TMP in the full chain and compare TMP-SSurface2 with them. <xref ref-type="table" rid="T6">Table 6</xref> shows the performance improvement of the proposed TMP-SSurface2 after implementing the new model relative to the old version and the other tools. During testing MPRAP and MemBrane-Rasa on the independent dataset, we figured out that not every sequence fed into these predictors can get a corresponding output since some third-party tools might cause the failure. Just like TMP-SSurface, the new version is reliable in getting prediction results because of the simple coding scheme. Furthermore, TMP-SSurface2 significantly outperformed the previous predictors and has the quickest predicting speed. The details of the comparison are shown in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<table-wrap position="float" id="T6">
<label>TABLE 6</label>
<caption><p>Comparison of TMP-SSurface2 with the previous predictors on the independent dataset.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Predictor</td>
<td valign="top" align="center">CC</td>
<td valign="top" align="center">MAE</td>
<td valign="top" align="center">Failure</td>
<td valign="top" align="center">Time Cost (min)</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">MPRAP</td>
<td valign="top" align="center">0.397</td>
<td valign="top" align="center">0.176</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">6.5</td>
</tr>
<tr>
<td valign="top" align="left">MemBrane-Rasa</td>
<td valign="top" align="center">0.545</td>
<td valign="top" align="center">0.153</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">23.7</td>
</tr>
<tr>
<td valign="top" align="left">TMP-SSurface</td>
<td valign="top" align="center">0.584</td>
<td valign="top" align="center">0.144</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">4.7</td>
</tr>
<tr>
<td valign="top" align="left">TMP-SSurface2</td>
<td valign="top" align="center"><bold>0.659</bold></td>
<td valign="top" align="center"><bold>0.140</bold></td>
<td valign="top" align="center"><bold>0</bold></td>
<td valign="top" align="center"><bold>4.3</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<attrib><italic>&#x002A;Bold fonts represent the best experimental results.</italic></attrib>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="S3.SS4">
<title>TMP Type Test</title>
<p>Statistical results show that most of the existing methods only focused on &#x03B1;-helical TMPs while ignored &#x03B2;-barrel TMPs, which made it inconvenient for the users who cannot distinguish the protein type. As described previously, the data set we used contains both &#x03B1;-helical and &#x03B2;-barrel TMPs, making our predictor more suitable for all types of TMP. <xref ref-type="table" rid="T7">Table 7</xref> illustrates that when TMP-SSurface2 meets either of these two different TMPs, the prediction performance on the independent testing dataset was both considerable and reliable.</p>
<table-wrap position="float" id="T7">
<label>TABLE 7</label>
<caption><p>Performance of TMP-SSurface2 on different types of TMPs.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">TMP Types</td>
<td valign="top" align="center">Protein number</td>
<td valign="top" align="center">CC</td>
<td valign="top" align="center">MAE</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">&#x03B1;-helical TMPs</td>
<td valign="top" align="center">45</td>
<td valign="top" align="center">0.674</td>
<td valign="top" align="center">0.138</td>
</tr>
<tr>
<td valign="top" align="left">&#x03B2;-barrel TMPs</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">0.562</td>
<td valign="top" align="center">0.151</td>
</tr>
<tr>
<td valign="top" align="left">all-TMPs</td>
<td valign="top" align="center">50</td>
<td valign="top" align="center">0.659</td>
<td valign="top" align="center">0.140</td>
</tr>
</tbody>
</table></table-wrap>
</sec>
<sec id="S3.SS5">
<title>Contribution of Attention Mechanism</title>
<p>The attention mechanism promotes the model to extract features more effectively, speeding up the prediction accuracy to the peak, even improving the performance at the same time. To verify the positive effect of the attention mechanism, we monitoring the mean absolute error loss curve of the validation dataset with or without the attention layer, respectively, using the preselected best hyperparameters while training. As is shown in <xref ref-type="fig" rid="F4">Figure 4</xref>, when the network is attention-enhanced, the convergence speed and accuracy of the training set were significantly improved.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption><p>Validation loss curve of the training process with and without attention mechanism.</p></caption>
<graphic xlink:href="fgene-12-656140-g004.tif"/>
</fig>
<p>Moreover, we also combined attention mechanisms with various network layers to verify whether or how much the attention mechanism would improve the prediction performance. Firstly, we removed the attention layer and tested the trained model on the test set. Meanwhile, we attached the attention mechanism to the bidirectional LSTM layer and the Dropout layer, respectively, to conduct experiments, the results are shown in <xref ref-type="table" rid="T8">Table 8</xref>. It can be seen that the combination of attention mechanism and bidirectional LSTM layer reached the best performance, which is related to the fact that the LSTM layer had learned the most abundant features. In essence, the attention mechanism is to enhance the feature extraction process, so it will achieve the best effect when combined with the network layer that is the most effective for feature extraction.</p>
<table-wrap position="float" id="T8">
<label>TABLE 8</label>
<caption><p>Contribution of attention mechanism.</p></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<td valign="top" align="left">Model</td>
<td valign="top" align="center">CC</td>
<td valign="top" align="center">MAE</td>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">No attention</td>
<td valign="top" align="center">0.637</td>
<td valign="top" align="center">0.150</td>
</tr>
<tr>
<td valign="top" align="left">Attention with LSTM</td>
<td valign="top" align="center"><bold>0.659</bold></td>
<td valign="top" align="center"><bold>0.140</bold></td>
</tr>
<tr>
<td valign="top" align="left">Attention with Dropout</td>
<td valign="top" align="center">0.645</td>
<td valign="top" align="center">0.141</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<attrib><italic>&#x002A;Bold fonts represent the best experimental results.</italic></attrib>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="S3.SS6">
<title>Visualization of the Features Learnt by LSTM</title>
<p>Deep neural networks can learn high-level abstract features from original inputs, to verify whether the extracted features are generalizable, we utilized PCA (<xref ref-type="bibr" rid="B42">Wold, 1987</xref>) to visualize the input features and each LSTM unit&#x2019;s output in one bidirectional layer with test data. <xref ref-type="fig" rid="F5">Figure 5</xref> shows the PCA scatter diagram of the test data before and after fed into LSTM, respectively. The input data had 42 features (i.e., 42 dimensions), PCA reduced its dimensionality and visualized it, but there was no clear cluster. The bidirectional LSTM layer we used contained 1,400 dimensions (twice of units in a simple LSTM layer) and the trend toward clustering had occurred, which demonstrates that LSTM had effectively captured useful and powerful features needed in this work.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption><p>Visualization of the features learned by LSTM using PCA.</p></caption>
<graphic xlink:href="fgene-12-656140-g005.tif"/>
</fig>
<p>Generally, buried residues are under stronger evolutionary constraints than exposed ones irrespectively of the environment (<xref ref-type="bibr" rid="B15">Kauko et al., 2008</xref>). The diagram shows that the residues whose rASA was lower than 0.2 narrowed down to a small area through PCA, which means these residues&#x2019; rASA values stayed closely aligned with the features derived from their sequence, just proved the previous statement.</p>
</sec>
<sec id="S3.SS7">
<title>Case Studies</title>
<p>To further demonstrate the effectiveness of TMP-SSurface2, we take 4n6h_A as an example of case studies. 4n6h_A is an Escherichia coli &#x03B1;-TMP (subgroup: G protein-coupled receptor) containing 408 residues as the receptor of multiple ligands like sodium ion, heme, and so on (<xref ref-type="bibr" rid="B8">Fenalti et al., 2014</xref>). <xref ref-type="fig" rid="F6">Figure 6</xref> shows the 3D visualization of the predicted result (surface version) and <xref ref-type="fig" rid="F7">Figure 7</xref> illustrates the comparison between the TMP-SSurface2-predicted rASA values and real rASA values. As were shown in figures, the overall trend of rASA has been appropriately captured, but TMP-SSurface2 seems conservative in predicting some fully exposed or buried residues&#x2019; rASA. It is suspected that TMP-SSurface2 may confuse these residues with the ones located on water-soluble regions, resulting in low prediction performance of them.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption><p>The 3D visualization of the predicted result (surface version).</p></caption>
<graphic xlink:href="fgene-12-656140-g006.tif"/>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption><p>The comparison between the TMP-SSurface2-predicted rASA values and real rASA values.</p></caption>
<graphic xlink:href="fgene-12-656140-g007.tif"/>
</fig>
</sec>
</sec>
<sec id="S4">
<title>Conclusion</title>
<p>In this study, we proposed an updated TMP-SSurface predictor, which aimed to predict transmembrane protein residues&#x2019; rASA from primary sequences. Apart from classical feed-forward neural networks, we developed an attention-enhanced bidirectional LSTM network on top of the CNN-based Z-coordinate predictor to process sequential data and improved the CC value performance of the old version from 0.58 to 0.66 on the independent test dataset. The improvement of LSTM directly indicates that the order of residues in a sequence would exactly influence the protein structure and LSTM has a more powerful ability to process sequential data than CapsNet. The Z-coordinate feature was explored and applied in TMP-SSurface2 and proved to be useful, which means the z-coordinate has a lifting effect on rASA prediction, indicating that structural features can support each other. We also appended various important experiments like feature visualization and case study to visualize the effectiveness of the model. TMP-SSurface2 had no constraints with input since it could handle all types of TMPs at any length. The predicted rASA would make contributions to TMPs&#x2019; structure analysis, TMP-ligand binding prediction, TMP function identification and so on.</p>
</sec>
<sec id="S5">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="S6">
<title>Author Contributions</title>
<p>ZL, YGo, and XZ conceived the idea of this research, collected the data, implemented the predictor, and wrote the manuscript. YGu and CL tuned the model and tested the predictor. LZ and HW supervised the research and reviewed the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
</body>
<back>
<fn-group>
<fn fn-type="financial-disclosure">
<p><bold>Funding.</bold> This work was supported by the Science and Technology Research Project of the Education Department of Jilin Province (No. JJKH20191309KJ), Jilin Scientific and Technological Development Program (No. 20180414006GH), and Fundamental Research Funds for the Central Universities (Nos. 2412019FZ052 and 2412019FZ048).</p>
</fn>
</fn-group>
<ack>
<p>This article is recommended by the 5th CCF Bioinformatics Conference.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="B1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altschul</surname> <given-names>S. F.</given-names></name> <name><surname>Madden</surname> <given-names>T. L.</given-names></name> <name><surname>Sch&#x00E4;ffer</surname> <given-names>A. A.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Miller</surname> <given-names>W.</given-names></name><etal/></person-group> (<year>1997</year>). <article-title>Gapped BLAST and PSI-BLAST: a new generation of protein database search programs.</article-title> <source><italic>Nucleic acids research</italic></source> <volume>25</volume> <fpage>3389</fpage>&#x2013;<lpage>3402</lpage>. <pub-id pub-id-type="doi">10.1093/nar/25.17.3389</pub-id> <pub-id pub-id-type="pmid">9254694</pub-id></citation></ref>
<ref id="B2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baron-Cohen</surname> <given-names>S.</given-names></name></person-group> (<year>1995</year>). &#x201C;<article-title>The eye direction detector (EDD) and the shared attention mechanism (SAM): Two cases for evolutionary psychology</article-title>,&#x201D; in <source><italic>proceeding at the Portions of this paper were presented at the Society for Research in Child Development Conference, New Orleans, Mar 1993; the British Psychological Society, Welsh Branch,&#x201D; Faces&#x201D; Conference, U Wales Coll of Cardiff, Sep 1993; and the British Society for the Philosophy of Science&#x201D; Roots of Joint Reference&#x201D; Conference, U Bristol, Nov 1993</italic></source>, (<publisher-loc>Mahwah</publisher-loc>: <publisher-name>Lawrence Erlbaum Associates, Inc</publisher-name>).</citation></ref>
<ref id="B3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Beuming</surname> <given-names>T.</given-names></name> <name><surname>Weinstein</surname> <given-names>H.</given-names></name></person-group> (<year>2004</year>). <article-title>A knowledge-based scale for the analysis and prediction of buried and exposed faces of transmembrane domain proteins.</article-title> <source><italic>Bioinformatics</italic></source> <volume>20</volume> <fpage>1822</fpage>&#x2013;<lpage>1835</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bth143</pub-id> <pub-id pub-id-type="pmid">14988128</pub-id></citation></ref>
<ref id="B4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chorowski</surname> <given-names>J.</given-names></name> <name><surname>Bahdanau</surname> <given-names>D.</given-names></name> <name><surname>Cho</surname> <given-names>K.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name></person-group> (<year>2014</year>). <article-title>End-to-end continuous speech recognition using attention-based recurrent nn: first results.</article-title> <source><italic>arXiv</italic> [Preprint]</source> <comment>arXiv: 1412.1602</comment>,</citation></ref>
<ref id="B5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dahl</surname> <given-names>G. E.</given-names></name> <name><surname>Sainath</surname> <given-names>T. N.</given-names></name> <name><surname>Hinton</surname> <given-names>G. E.</given-names></name></person-group> (<year>2013</year>). &#x201C;<article-title>Improving deep neural networks for LVCSR using rectified linear units and dropout</article-title>,&#x201D; in <source><italic>Proceeding of the 2013 IEEE International Conference on Acoustics, Speech and Signal Processing</italic></source>, (<publisher-loc>Vancouver, BC</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>8609</fpage>&#x2013;<lpage>8613</lpage>.</citation></ref>
<ref id="B6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ding</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>D.</given-names></name></person-group> (<year>2015</year>). <article-title>Identification of mitochondrial proteins of malaria parasite using analysis of variance.</article-title> <source><italic>Amino acids</italic></source> <volume>47</volume> <fpage>329</fpage>&#x2013;<lpage>333</lpage>. <pub-id pub-id-type="doi">10.1007/s00726-014-1862-4</pub-id> <pub-id pub-id-type="pmid">25385313</pub-id></citation></ref>
<ref id="B7"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fang</surname> <given-names>C.</given-names></name> <name><surname>Shang</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>Improving protein gamma-turn prediction using inception capsule networks.</article-title> <source><italic>Sci. Rep.</italic></source> <volume>8</volume>:<issue>15741</issue>.</citation></ref>
<ref id="B8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fenalti</surname> <given-names>G.</given-names></name> <name><surname>Giguere</surname> <given-names>P. M.</given-names></name> <name><surname>Katritch</surname> <given-names>V.</given-names></name> <name><surname>Huang</surname> <given-names>X.-P.</given-names></name> <name><surname>Thompson</surname> <given-names>A. A.</given-names></name> <name><surname>Cherezov</surname> <given-names>V.</given-names></name><etal/></person-group> (<year>2014</year>). <article-title>Molecular control of &#x03B4;-opioid receptor signalling.</article-title> <source><italic>Nature</italic></source> <volume>506</volume> <fpage>191</fpage>&#x2013;<lpage>196</lpage>.</citation></ref>
<ref id="B9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goddard</surname> <given-names>A. D.</given-names></name> <name><surname>Dijkman</surname> <given-names>P. M.</given-names></name> <name><surname>Adamson</surname> <given-names>R. J.</given-names></name> <name><surname>dos Reis</surname> <given-names>R. I.</given-names></name> <name><surname>Watts</surname> <given-names>A.</given-names></name></person-group> (<year>2015</year>). <article-title>Reconstitution of membrane proteins: a GPCR as an example.</article-title> <source><italic>Methods Enzymol.</italic></source> <volume>556</volume> <fpage>405</fpage>&#x2013;<lpage>424</lpage>.</citation></ref>
<ref id="B10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>F.</given-names></name> <name><surname>Wang</surname> <given-names>R.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Bao</surname> <given-names>L.</given-names></name> <name><surname>Xu</surname> <given-names>D.</given-names></name> <name><surname>Zhao</surname> <given-names>X.</given-names></name></person-group> (<year>2018</year>). <article-title>Large-scale prediction of protein ubiquitination sites using a multimodal deep architecture.</article-title> <source><italic>BMC Syst. Biol.</italic></source> <volume>12</volume>:<issue>109</issue>. <pub-id pub-id-type="doi">10.1186/s12918-018-0628-0</pub-id> <pub-id pub-id-type="pmid">30463553</pub-id></citation></ref>
<ref id="B11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Heffernan</surname> <given-names>R.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Paliwal</surname> <given-names>K.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name></person-group> (<year>2017</year>). <article-title>Capturing non-local interactions by long short-term memory bidirectional recurrent neural networks for improving prediction of protein secondary structure, backbone angles, contact numbers and solvent accessibility.</article-title> <source><italic>Bioinformatics</italic></source> <volume>33</volume> <fpage>2842</fpage>&#x2013;<lpage>2849</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx218</pub-id> <pub-id pub-id-type="pmid">28430949</pub-id></citation></ref>
<ref id="B12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Illerg&#x00E5;rd</surname> <given-names>K.</given-names></name> <name><surname>Callegari</surname> <given-names>S.</given-names></name> <name><surname>Elofsson</surname> <given-names>A.</given-names></name></person-group> (<year>2010</year>). <article-title>MPRAP: an accessibility predictor for a-helical transmem-brane proteins that performs well inside and outside the membrane.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>11</volume>:<issue>333</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-11-333</pub-id> <pub-id pub-id-type="pmid">20565847</pub-id></citation></ref>
<ref id="B13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jeong</surname> <given-names>J. C.</given-names></name> <name><surname>Lin</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>X.-W.</given-names></name></person-group> (<year>2010</year>). <article-title>On position-specific scoring matrix for protein function prediction.</article-title> <source><italic>IEEE/ACM Trans. Comput. Biol. Bioinform.</italic></source> <volume>8</volume> <fpage>308</fpage>&#x2013;<lpage>315</lpage>. <pub-id pub-id-type="doi">10.1109/tcbb.2010.93</pub-id> <pub-id pub-id-type="pmid">20855926</pub-id></citation></ref>
<ref id="B14"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kabsch</surname> <given-names>W.</given-names></name> <name><surname>Sander</surname> <given-names>C.</given-names></name></person-group> (<year>1983</year>). <article-title>Dictionary of protein secondary structure: pattern recognition of hydrogen&#x2212;bonded and geometrical features.</article-title> <source><italic>Biopolymers</italic></source> <volume>22</volume> <fpage>2577</fpage>&#x2013;<lpage>2637</lpage>. <pub-id pub-id-type="doi">10.1002/bip.360221211</pub-id> <pub-id pub-id-type="pmid">6667333</pub-id></citation></ref>
<ref id="B15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kauko</surname> <given-names>A.</given-names></name> <name><surname>Illerg&#x00E5;rd</surname> <given-names>K.</given-names></name> <name><surname>Elofsson</surname> <given-names>A.</given-names></name></person-group> (<year>2008</year>). <article-title>Coils in the membrane core are conserved and functionally important.</article-title> <source><italic>J. Mol. Biol.</italic></source> <volume>380</volume>, <fpage>170</fpage>&#x2013;<lpage>180</lpage>. <pub-id pub-id-type="doi">10.1016/j.jmb.2008.04.052</pub-id> <pub-id pub-id-type="pmid">18511074</pub-id></citation></ref>
<ref id="B16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lai</surname> <given-names>J.-S.</given-names></name> <name><surname>Cheng</surname> <given-names>C.-W.</given-names></name> <name><surname>Lo</surname> <given-names>A.</given-names></name> <name><surname>Sung</surname> <given-names>T.-Y.</given-names></name> <name><surname>Hsu</surname> <given-names>W.-L.</given-names></name></person-group> (<year>2013</year>). <article-title>Lipid exposure prediction enhances the inference of rotational angles of transmembrane helices.</article-title> <source><italic>BMC Bioinformatics</italic></source> <volume>14</volume>:<issue>304</issue>. <pub-id pub-id-type="doi">10.1186/1471-2105-14-304</pub-id> <pub-id pub-id-type="pmid">24112406</pub-id></citation></ref>
<ref id="B17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>B.</given-names></name> <name><surname>Richards</surname> <given-names>F. M.</given-names></name></person-group> (<year>1971</year>). <article-title>The interpretation of protein structures: estimation of static accessibility.</article-title> <source><italic>J. Mol. Biol.</italic></source> <volume>55</volume> <fpage>379</fpage>&#x2013;<lpage>400</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(71)90324-x</pub-id> <pub-id pub-id-type="pmid">5551392</pub-id></citation></ref>
<ref id="B18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>W.</given-names></name> <name><surname>Godzik</surname> <given-names>A.</given-names></name></person-group> (<year>2006</year>). <article-title>Cd-hit: a fast program for clustering and comparing large sets of protein or nucleotide sequences.</article-title> <source><italic>Bioinformatics</italic></source> <volume>22</volume> <fpage>1658</fpage>&#x2013;<lpage>1659</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btl158</pub-id> <pub-id pub-id-type="pmid">16731699</pub-id></citation></ref>
<ref id="B19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liwicki</surname> <given-names>M.</given-names></name> <name><surname>Graves</surname> <given-names>A.</given-names></name> <name><surname>Fern&#x00E0;ndez</surname> <given-names>S.</given-names></name> <name><surname>Bunke</surname> <given-names>H.</given-names></name> <name><surname>Schmidhuber</surname> <given-names>J.</given-names></name></person-group> (<year>2007</year>). &#x201C;<article-title>A novel approach to on-line handwriting recognition based on bidirectional long short-term memory networks</article-title>,&#x201D; in <source><italic>Proceedings of the 9th International Conference on Document Analysis and Recognition, ICDAR 2007, Parana.</italic></source><sup>&#x2217;</sup></citation></ref>
<ref id="B20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>C.</given-names></name> <name><surname>Gong</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Guo</surname> <given-names>Y.</given-names></name> <name><surname>Ma</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name></person-group> (<year>2020</year>). <article-title>TM-ZC: a deep learning-based predictor for the Z-coordinate of residues in &#x03B1;-helical transmembrane proteins.</article-title> <source><italic>IEEE Access</italic></source> <volume>8</volume> <fpage>40129</fpage>&#x2013;<lpage>40137</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2020.2976797</pub-id></citation></ref>
<ref id="B21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Kan</surname> <given-names>B.</given-names></name> <name><surname>Gong</surname> <given-names>Y.</given-names></name> <name><surname>Ma</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name></person-group> (<year>2019a</year>). <article-title>TMP-SSurface: a deep learning-based predictor for surface accessibility of transmembrane protein residues.</article-title> <source><italic>Crystals</italic></source> <volume>9</volume>:<issue>640</issue>. <pub-id pub-id-type="doi">10.3390/cryst9120640</pub-id></citation></ref>
<ref id="B22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>E.</given-names></name> <name><surname>He</surname> <given-names>F.</given-names></name> <name><surname>Ma</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name></person-group> (<year>2019b</year>). <article-title>MPLs-pred: predicting membrane protein-ligand binding sites using hybrid sequence-based features and ligand-specific models.</article-title> <source><italic>Int. J. Mol. Sci.</italic></source> <volume>20</volume>:<issue>3120</issue>. <pub-id pub-id-type="doi">10.3390/ijms20133120</pub-id> <pub-id pub-id-type="pmid">31247932</pub-id></citation></ref>
<ref id="B23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mihel</surname> <given-names>J.</given-names></name> <name><surname>&#x0160;iki&#x0107;</surname> <given-names>M.</given-names></name> <name><surname>Tomi&#x00E6;</surname> <given-names>S.</given-names></name> <name><surname>Jeren</surname> <given-names>B.</given-names></name> <name><surname>Vlahovi&#x010D;ek</surname> <given-names>K.</given-names></name></person-group> (<year>2008</year>). <article-title>PSAIA&#x2013;protein structure and interaction analyzer.</article-title> <source><italic>BMC Struct. Biol.</italic></source> <volume>8</volume>:<issue>21</issue>. <pub-id pub-id-type="doi">10.1186/1472-6807-8-21</pub-id> <pub-id pub-id-type="pmid">18400099</pub-id></citation></ref>
<ref id="B24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moon</surname> <given-names>Y. H.</given-names></name> <name><surname>Lim</surname> <given-names>W.</given-names></name> <name><surname>Jeong</surname> <given-names>B. C.</given-names></name></person-group> (<year>2019</year>). <article-title>Transmembrane protein 64 modulates prostate tumor progression by regulating Wnt3a secretion.</article-title> <source><italic>Oncol. Lett.</italic></source> <volume>18</volume> <fpage>283</fpage>&#x2013;<lpage>290</lpage>.</citation></ref>
<ref id="B25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oguro</surname> <given-names>A.</given-names></name> <name><surname>Imaoka</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>Thioredoxin-related transmembrane protein 2 (TMX2) regulates the ran protein gradient and importin-&#x03B2;-dependent nuclear cargo transport.</article-title> <source><italic>Sci. Rep.</italic></source> <volume>9</volume>:<issue>15296</issue>.</citation></ref>
<ref id="B26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Padmanabhan</surname> <given-names>S.</given-names></name></person-group> (<year>2014</year>). <source><italic>Handbook of Pharmacogenomics and Stratified Medicine.</italic></source> <publisher-loc>London</publisher-loc>: <publisher-name>Academic Press</publisher-name>.</citation></ref>
<ref id="B27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Puder</surname> <given-names>S.</given-names></name> <name><surname>Fischer</surname> <given-names>T.</given-names></name> <name><surname>Mierke</surname> <given-names>C. T.</given-names></name></person-group> (<year>2019</year>). <article-title>The transmembrane protein fibrocystin/polyductin regulates cell mechanics and cell motility.</article-title> <source><italic>Phys. Biol.</italic></source> <volume>16</volume>:<issue>066006</issue>. <pub-id pub-id-type="doi">10.1088/1478-3975/ab39fa</pub-id> <pub-id pub-id-type="pmid">31398719</pub-id></citation></ref>
<ref id="B28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rafi</surname> <given-names>S. K.</given-names></name> <name><surname>Fern&#x00E1;ndez-Ja&#x00E9;n</surname> <given-names>A.</given-names></name> <name><surname>&#x00C1;lvarez</surname> <given-names>S.</given-names></name> <name><surname>Nadeau</surname> <given-names>O. W.</given-names></name> <name><surname>Butler</surname> <given-names>M. G.</given-names></name></person-group> (<year>2019</year>). <article-title>High functioning autism with missense mutations in synaptotagmin-like protein 4 (sytl4) and transmembrane protein 187 (tmem187) genes: sytl4-protein modeling, protein-protein interaction, expression profiling and microrna studies.</article-title> <source><italic>Int. J. Mol. Sci.</italic></source> <volume>20</volume>:<issue>3358</issue>. <pub-id pub-id-type="doi">10.3390/ijms20133358</pub-id> <pub-id pub-id-type="pmid">31323913</pub-id></citation></ref>
<ref id="B29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rockt&#x00E4;schel</surname> <given-names>T.</given-names></name> <name><surname>Grefenstette</surname> <given-names>E.</given-names></name> <name><surname>Hermann</surname> <given-names>K. M.</given-names></name> <name><surname>Ko&#x00E8;isk&#x0131;</surname> <given-names>T.</given-names></name> <name><surname>Blunsom</surname> <given-names>P.</given-names></name></person-group> (<year>2015</year>). <article-title>Reasoning about entailment with neural attention.</article-title> <source><italic>arXiv</italic> [preprint]</source> <comment>arXiv: 1509.06664</comment>.</citation></ref>
<ref id="B30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Roy</surname> <given-names>A.</given-names></name></person-group> (<year>2015</year>). <article-title>Membrane preparation and solubilization.</article-title> <source><italic>Methods Enzymol.</italic></source> <volume>557</volume> <fpage>45</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.1016/bs.mie.2014.11.044</pub-id> <pub-id pub-id-type="pmid">25950959</pub-id></citation></ref>
<ref id="B31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sanner</surname> <given-names>M. F.</given-names></name> <name><surname>Olson</surname> <given-names>A. J.</given-names></name> <name><surname>Spehner</surname> <given-names>J. C.</given-names></name></person-group> (<year>1996</year>). <article-title>Reduced surface: an efficient way to compute molecular surfaces.</article-title> <source><italic>Biopolymers</italic></source> <volume>38</volume> <fpage>305</fpage>&#x2013;<lpage>320</lpage>. <pub-id pub-id-type="doi">10.1002/(sici)1097-0282(199603)38:3&#x003C;305::aid-bip4&#x003E;3.0.co;2-y</pub-id></citation></ref>
<ref id="B32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sharma</surname> <given-names>S.</given-names></name> <name><surname>Kiros</surname> <given-names>R.</given-names></name> <name><surname>Salakhutdinov</surname> <given-names>R.</given-names></name></person-group> (<year>2015</year>). <article-title>Action recognition using visual attention.</article-title> <source><italic>arXiv</italic> [preprint]</source> <comment>arXiv:1511.04119</comment>.</citation></ref>
<ref id="B33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>S&#x00F8;nderby</surname> <given-names>S. K.</given-names></name> <name><surname>S&#x00F8;nderby</surname> <given-names>C. K.</given-names></name> <name><surname>Nielsen</surname> <given-names>H.</given-names></name> <name><surname>Winther</surname> <given-names>O.</given-names></name></person-group> (<year>2015</year>). &#x201C;<article-title>Convolutional LSTM networks for subcellular localization of proteins</article-title>,&#x201D; in <source><italic>Proceedung of the International Conference on Algorithms for Computational Biology</italic></source>, (<publisher-name>Springer</publisher-name>), <fpage>68</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-21233-3_6</pub-id></citation></ref>
<ref id="B34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>S&#x00F8;nderby</surname> <given-names>S. K.</given-names></name> <name><surname>Winther</surname> <given-names>O.</given-names></name></person-group> (<year>2014</year>). <article-title>Protein secondary structure prediction with long short term memory networks.</article-title> <source><italic>arXiv</italic> [preprint]</source> <comment>arXiv:1412.7828</comment>,</citation></ref>
<ref id="B35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stillwell</surname> <given-names>W.</given-names></name></person-group> (<year>2016</year>). <source><italic>An Introduction to Biological Membranes: Composition, Structure and Function.</italic></source> <publisher-name>Elsevier</publisher-name>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://books.google.com/books?hl=en&#x0026;lr=&#x0026;id=Q_WpCwAAQBAJ&#x0026;oi=fnd&#x0026;pg=PP1&#x0026;dq=Stillwell,+W.+(2016).+An+Introduction+to+Biological+Membranes:&#x0026;ots=NCr6lWYhDS&#x0026;sig=VHh16aKevDFW2U96K7XEPlWG_u4#v=onepage&#x0026;q=Stillwell%2C%20W.%20(2016).%20An%20Introduction%20to%20Biological%20Membranes%3A&#x0026;f=false">https://books.google.com/books?hl=en&#x0026;lr=&#x0026;id=Q_WpCwAAQBAJ&#x0026;oi=fnd&#x0026;pg=PP1&#x0026;dq=Stillwell,+W.+(2016).+An+Introduction+to+Biological+Membranes:&#x0026;ots=NCr6lWYhDS&#x0026;sig=VHh16aKevDFW2U96K7XEPlWG_u4#v=onepage&#x0026;q=Stillwell%2C%20W.%20(2016).%20An%20Introduction%20to%20Biological%20Membranes%3A&#x0026;f=false</ext-link></citation></ref>
<ref id="B36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Studer</surname> <given-names>G.</given-names></name> <name><surname>Biasini</surname> <given-names>M.</given-names></name> <name><surname>Schwede</surname> <given-names>T.</given-names></name></person-group> (<year>2014</year>). <article-title>Assessing the local structural quality of transmembrane protein models using statistical potentials (QMEANBrane).</article-title> <source><italic>Bioinformatics</italic></source> <volume>30</volume> <fpage>i505</fpage>&#x2013;<lpage>i511</lpage>.</citation></ref>
<ref id="B37"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tanabe</surname> <given-names>Y.</given-names></name> <name><surname>Taira</surname> <given-names>T.</given-names></name> <name><surname>Shimotake</surname> <given-names>A.</given-names></name> <name><surname>Inoue</surname> <given-names>T.</given-names></name> <name><surname>Awaya</surname> <given-names>T.</given-names></name> <name><surname>Kato</surname> <given-names>T.</given-names></name><etal/></person-group> (<year>2019</year>). <article-title>An adult female with proline-rich transmembrane protein 2 related paroxysmal disorders manifesting paroxysmal kinesigenic choreoathetosis and epileptic seizures.</article-title> <source><italic>Rinsho shinkeigaku</italic></source> <volume>59</volume> <fpage>144</fpage>&#x2013;<lpage>148</lpage>. <pub-id pub-id-type="doi">10.5692/clinicalneurol.cn-001228</pub-id> <pub-id pub-id-type="pmid">30814447</pub-id></citation></ref>
<ref id="B38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tarafder</surname> <given-names>S.</given-names></name> <name><surname>Ahmed</surname> <given-names>M. T.</given-names></name> <name><surname>Iqbal</surname> <given-names>S.</given-names></name> <name><surname>Hoque</surname> <given-names>M. T.</given-names></name> <name><surname>Rahman</surname> <given-names>M. S.</given-names></name></person-group> (<year>2018</year>). <article-title>RBSURFpred: modeling protein accessible surface area in real and binary space using regularized and optimized regression.</article-title> <source><italic>J. Theoretical biol.</italic></source> <volume>441</volume> <fpage>44</fpage>&#x2013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1016/j.jtbi.2017.12.029</pub-id> <pub-id pub-id-type="pmid">29305182</pub-id></citation></ref>
<ref id="B39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tien</surname> <given-names>M. Z.</given-names></name> <name><surname>Meyer</surname> <given-names>A. G.</given-names></name> <name><surname>Sydykova</surname> <given-names>D. K.</given-names></name> <name><surname>Spielman</surname> <given-names>S. J.</given-names></name> <name><surname>Wilke</surname> <given-names>C. O.</given-names></name></person-group> (<year>2013</year>). <article-title>Maximum allowed solvent accessibilites of residues in proteins.</article-title> <source><italic>PloS one</italic></source> <volume>8</volume>:<issue>e80635</issue>. <pub-id pub-id-type="doi">10.1371/journal.pone.0080635</pub-id> <pub-id pub-id-type="pmid">24278298</pub-id></citation></ref>
<ref id="B40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Xi</surname> <given-names>L.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Yao</surname> <given-names>X.</given-names></name></person-group> (<year>2011</year>). <article-title>Accurate prediction of the burial status of transmembrane residues of &#x03B1;-helix membrane protein by incorporating the structural and physicochemical features.</article-title> <source><italic>Amino acids</italic></source> <volume>40</volume> <fpage>991</fpage>&#x2013;<lpage>1002</lpage>. <pub-id pub-id-type="doi">10.1007/s00726-010-0727-8</pub-id> <pub-id pub-id-type="pmid">20740371</pub-id></citation></ref>
<ref id="B41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Weihong</surname> <given-names>C.</given-names></name> <name><surname>Bin</surname> <given-names>C.</given-names></name> <name><surname>Jianfeng</surname> <given-names>Y.</given-names></name></person-group> (<year>2019</year>). <article-title>Transmembrane protein 126B protects against high fat diet (HFD)-induced renal injury by suppressing dyslipidemia via inhibition of ROS.</article-title> <source><italic>Biochem. Biophys. Res. Commun.</italic></source> <volume>509</volume> <fpage>40</fpage>&#x2013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbrc.2018.12.003</pub-id> <pub-id pub-id-type="pmid">30580996</pub-id></citation></ref>
<ref id="B42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wold</surname> <given-names>H.</given-names></name></person-group> (<year>1987</year>). <article-title>Response to DA freedman.</article-title> <source><italic>J. Educ. Stat.</italic></source> <volume>12</volume>, <fpage>202</fpage>&#x2013;<lpage>205</lpage>. <pub-id pub-id-type="doi">10.3102/10769986012002202</pub-id></citation></ref>
<ref id="B43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xiao</surname> <given-names>F.</given-names></name> <name><surname>Shen</surname> <given-names>H.-B.</given-names></name></person-group> (<year>2015</year>). <article-title>Prediction enhancement of residue real-value relative accessible surface area in transmembrane helical proteins by solving the output preference problem of machine learning-based predictors.</article-title> <source><italic>J. Chem. Inf. Mod.</italic></source> <volume>55</volume> <fpage>2464</fpage>&#x2013;<lpage>2474</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.5b00246</pub-id> <pub-id pub-id-type="pmid">26455366</pub-id></citation></ref>
<ref id="B44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname> <given-names>J.</given-names></name> <name><surname>Jiang</surname> <given-names>Y.</given-names></name> <name><surname>Lu</surname> <given-names>J.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Inhibiting of proliferation, migration, and invasion in lung cancer induced by silencing interferon-induced transmembrane protein 1 (IFITM1).</article-title> <source><italic>BioMed Res. Int.</italic></source> <volume>2019</volume>:<issue>9085435</issue>.</citation></ref>
<ref id="B45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yin</surname> <given-names>X.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Xiao</surname> <given-names>F.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Shen</surname> <given-names>H.-B.</given-names></name></person-group> (<year>2018</year>). <article-title>MemBrain: an easy-to-use online webserver for transmembrane protein structure prediction.</article-title> <source><italic>Nanomicro Lett.</italic></source> <volume>10</volume>:<issue>2</issue>.</citation></ref>
<ref id="B46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yuan</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>F.</given-names></name> <name><surname>Davis</surname> <given-names>M. J.</given-names></name> <name><surname>Bod&#x00E9;n</surname> <given-names>M.</given-names></name> <name><surname>Teasdale</surname> <given-names>R. D.</given-names></name></person-group> (<year>2006</year>). <article-title>Predicting the solvent accessibility of transmembrane residues from protein sequence.</article-title> <source><italic>J. Proteome Res.</italic></source> <volume>5</volume> <fpage>1063</fpage>&#x2013;<lpage>1070</lpage>. <pub-id pub-id-type="doi">10.1021/pr050397b</pub-id> <pub-id pub-id-type="pmid">16674095</pub-id></citation></ref>
<ref id="B47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zeng</surname> <given-names>B.</given-names></name> <name><surname>Hoenigschmid</surname> <given-names>P.</given-names></name> <name><surname>Frishman</surname> <given-names>D.</given-names></name></person-group> (<year>2019</year>). <article-title>Residue co-evolution helps predict interaction sites in &#x03B1;-helical membrane proteins.</article-title> <source><italic>J. Struct. Biol.</italic></source> <volume>206</volume> <fpage>156</fpage>&#x2013;<lpage>169</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2019.02.009</pub-id> <pub-id pub-id-type="pmid">30836197</pub-id></citation></ref>
<ref id="B48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Ma</surname> <given-names>Z.</given-names></name></person-group> (<year>2019</year>). <article-title>In-silico prediction of human secretory proteins in plasma based on discrete firefly optimization and application to cancer biomarkers identification.</article-title> <source><italic>Front. Genet.</italic></source> <volume>10</volume>:<issue>542</issue>. <pub-id pub-id-type="doi">10.3389/fgene.2019.00542</pub-id> <pub-id pub-id-type="pmid">31244885</pub-id></citation></ref>
</ref-list>
</back>
</article>