<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mol. Biosci.</journal-id>
<journal-title>Frontiers in Molecular Biosciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mol. Biosci.</abbrev-journal-title>
<issn pub-type="epub">2296-889X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">626837</article-id>
<article-id pub-id-type="doi">10.3389/fmolb.2021.626837</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Molecular Biosciences</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Characterizing Hydropathy of Amino Acid Side Chain in a Protein Environment by Investigating the Structural Changes of Water Molecules Network</article-title>
<alt-title alt-title-type="left-running-head">Di Rienzo et al.</alt-title>
<alt-title alt-title-type="right-running-head">Amino Acids Hydropathy Analyzing Proteins</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Di Rienzo</surname>
<given-names>Lorenzo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="http://loop.frontiersin.org/people/1156956/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Miotto</surname>
<given-names>Mattia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="http://loop.frontiersin.org/people/1088716/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>B&#xf2;</surname>
<given-names>Leonardo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="http://loop.frontiersin.org/people/1136731/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ruocco</surname>
<given-names>Giancarlo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="http://loop.frontiersin.org/people/377836/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Raimondo</surname>
<given-names>Domenico</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="http://loop.frontiersin.org/people/820492/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Milanetti</surname>
<given-names>Edoardo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="http://loop.frontiersin.org/people/921766/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<label>
<sup>1</sup>
</label>Center for Life Nanoscience, Istituto Italiano di Tecnologia, <addr-line>Rome</addr-line>, <country>Italy</country>
</aff>
<aff id="aff2">
<label>
<sup>2</sup>
</label>Department of Physics, Sapienza University, <addr-line>Rome</addr-line>, <country>Italy</country>
</aff>
<aff id="aff3">
<label>
<sup>3</sup>
</label>Department of Molecular Medicine, Sapienza University, <addr-line>Rome</addr-line>, <country>Italy</country>
</aff>
<author-notes>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/949186">Alfredo Iacoangeli</ext-link>, King&#x2019;s College London, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/472639">Alejandro Giorgetti</ext-link>, University of Verona, Italy</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/709910">Daniele Di Marino</ext-link>, Polytechnic University of Marche, Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Edoardo Milanetti, <email>edoardo.milanetti@uniroma1.it</email>; Domenico Raimondo, <email>domenico.raimondo@uniroma1.it</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Structural Biology, a section of the journal Frontiers in Molecular Biosciences</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>26</day>
<month>02</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>8</volume>
<elocation-id>626837</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>11</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>01</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Di Rienzo, Miotto, B&#xf2;, Ruocco, Raimondo and Milanetti.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Di Rienzo, Miotto, B&#xf2;, Ruocco, Raimondo and Milanetti</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="626837.pdf"/>
<abstract>
<p>Assessing the hydropathy properties of molecules, like proteins and chemical compounds, has a crucial role in many fields of computational biology, such as drug design, biomolecular interaction, and folding prediction. Over the past decades, many descriptors were devised to evaluate the hydrophobicity of side chains. In this field, recently we likewise have developed a computational method, based on molecular dynamics data, for the investigation of the hydrophilicity and hydrophobicity features of the 20 natural amino acids, analyzing the changes occurring in the hydrogen bond network of water molecules surrounding each given compound. The local environment of each residue is complex and depends on the chemical nature of the side chain and the location in the protein. Here, we characterize the solvation properties of each amino acid side chain in the protein environment by considering its spatial reorganization in the protein local structure, so that the computational evaluation of differences in terms of hydropathy profiles in different structural and dynamical conditions can be brought to bear. A set of atomistic molecular dynamics simulations have been used to characterize the dynamic hydrogen bond network at the interface between protein and solvent, from which we map out the local hydrophobicity and hydrophilicity of amino acid residues.</p>
</abstract>
<kwd-group>
<kwd>hydropathy</kwd>
<kwd>molecular dynamics simulation</kwd>
<kwd>hydrophobicity</kwd>
<kwd>local structural environment</kwd>
<kwd>water molecules network</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Hydration water molecules play a crucial role in living organisms as most biological processes occur in an aqueous environment (<xref ref-type="bibr" rid="B42">Rothschild and Mancinelli, 2001</xref>), which actively influences the structure and function of biomolecules and their interactions (<xref ref-type="bibr" rid="B24">Levy and Onuchic, 2006</xref>; <xref ref-type="bibr" rid="B5">Ball 2008</xref>). Compounds immersed in water display different behaviors depending on their chemical characteristics. In particular, the arrangement of the water molecules that hydrate compounds changes according to their properties (<xref ref-type="bibr" rid="B49">Vagenende and Trout, 2012</xref>; <xref ref-type="bibr" rid="B47">Tomobe et al., 2017</xref>). So we can extract information on the chemical nature and function of the solute by studying the attraction and repulsion of chemical compounds toward the water (<xref ref-type="bibr" rid="B12">Chothia, 1976</xref>). In general, both hydrophobic and hydrophilic effects are dominant driving forces for several biochemical processes, such as protein folding, nucleic acid stability, molecular recognition, and binding (<xref ref-type="bibr" rid="B45">Tanford, 1972</xref>; <xref ref-type="bibr" rid="B8">Brooks et al., 1998</xref>; <xref ref-type="bibr" rid="B1">Aftabuddin and Kundu, 2007</xref>; <xref ref-type="bibr" rid="B32">Moret and Zebende, 2007</xref>; <xref ref-type="bibr" rid="B29">Miotto et al., 2018</xref>).</p>
<p>In light of this, solvation water should be considered an integral part of biological macromolecules. In particular, water molecules in solutions are divided into 1) internal water molecules that occupy cavities in the biomolecule structure and can be identified in crystallography; 2) water molecules that interact with the molecular surface and 3) bulk water. Depending on the category, the organization of the water molecules is associated with different time scales. The relaxation times for internal waters range from tens of ns to ms since they require local rearrangement of the protein to occur. On the other hand, the motion of bulk water has the time scale of the picoseconds. In between, there is the motion of surface water molecules that are characterized by residence times on the order of tens of picoseconds (<xref ref-type="bibr" rid="B46">Tarek and Tobias, 2000</xref>; <xref ref-type="bibr" rid="B38">Qvist et al., 2009</xref>; <xref ref-type="bibr" rid="B30">Mondal et al., 2017</xref>).</p>
<p>In general, the investigation of the behavior of water in the hydration shells of organic compounds is a fundamental analysis to better understand most biological processes both from a theoretical and practical point of view (<xref ref-type="bibr" rid="B40">Raschke, 2006</xref>).</p>
<p>An effective measure of the interaction between water and amino acids, the hydropathy index (a number representing the hydrophobic or hydrophilic properties of its side chain), was firstly proposed in 1982 by Kyte <italic>et al.</italic> (<xref ref-type="bibr" rid="B21">Kyte and Doolittle, 1982</xref>). Indeed, in the computational biology field, attributing a single number, the hydropathy index, to each amino acid is very useful for studying the chemical-physical and structural properties of proteins. Over the past few decades, many hydrophobicity and hydrophilicity scales, based on both experimental and theoretical approaches, have been defined, and these schematizations have proven their usefulness in the characterization of protein regions and the development of computational methods (<xref ref-type="bibr" rid="B11">Chothia, 1974</xref>; <xref ref-type="bibr" rid="B18">Jones, 1975</xref>; <xref ref-type="bibr" rid="B21">Kyte and Doolittle, 1982</xref>; <xref ref-type="bibr" rid="B44">Sweet and Eisenberg, 1983</xref>; <xref ref-type="bibr" rid="B41">Rose et al., 1985</xref>; <xref ref-type="bibr" rid="B52">Wilce et al., 1995</xref>). For instance, one of the typical use of the hydrophobicity and hydrophilicity values for the 20 amino acids is the prediction of transmembrane regions in protein structure modeling (<xref ref-type="bibr" rid="B13">Deber et al., 2001</xref>).</p>
<p>Recently we have developed a new theoretical-computational method analyzing the orientation of water molecules surrounding a small organic compound, as computed from molecular dynamics simulations (<xref ref-type="bibr" rid="B6">Bonella et al., 2014</xref>). The procedure is based on the calculation of the conditional probability density of finding a water molecule with a specific orientation, given its distance from the nearest atom of the solute (<xref ref-type="bibr" rid="B4">Babiaczyk et al., 2010</xref>; <xref ref-type="bibr" rid="B6">Bonella et al., 2014</xref>).</p>
<p>We thus applied this method to the 20 natural amino acids defining the <italic>WOPHS</italic> (Water Orientation Probability Hydropathy Scale) hydropathy scale, the first scale to be <italic>vectorial</italic> as it associates three indices for each amino acid (<xref ref-type="bibr" rid="B6">Bonella et al., 2014</xref>). In fact, we argued that assigning a single number is not enough to characterize the solvation properties of amino acids, in particular when both hydrophobic and hydrophilic regions are present in the same residue. In this respect, our characterization can be used to understand some of the known ambiguities in the ranking of amino acids in the current scales available in the literature. This method presents several advantages over previously developed computational and experimental approaches: it is sensitive to the specific environment of the amino acids and can be applied to unnatural and modified amino acids, as well as to other small organic molecules (<xref ref-type="bibr" rid="B6">Bonella et al., 2014</xref>; <xref ref-type="bibr" rid="B22">Leopizzi et al., 2017</xref>). In particular, analyzing the structural changes of the dynamic hydrogen bond network, we studied both the <italic>trans</italic>-membrane passive permeation properties for a set of neutral drugs (<xref ref-type="bibr" rid="B28">Milanetti et al., 2016</xref>) and the properties of non-steroidal anti-inflammatory drugs to predict the extraction recovery of NSAIDs from biological fluids set by solid-phase extraction (<xref ref-type="bibr" rid="B27">Milanetti et al., 2019</xref>). When amino acids solvation properties are studied, the main limitation of this method relied on considering a single amino acid in solution instead of inserting it in a functional protein chain. Moreover, the method was developed uniquely for the TIP4 water model, limiting its use to most molecular dynamics simulations (<xref ref-type="bibr" rid="B4">Babiaczyk et al., 2010</xref>).</p>
<p>Since the characteristics of the neighboring residues influence the hydropathy of the examined amino-acid, in this work we define the hydropathy properties of each amino acid taking into account the structural environment that surrounds it. In this way, we incorporate the effects of the own characteristics of each amino acid, as well as the chemical and structural properties induced by the surrounding environment.</p>
<p>Furthermore, the method has atomic resolution (<xref ref-type="bibr" rid="B22">Leopizzi et al., 2017</xref>), meaning that, given a protein, it is possible characterizing not only a single residue or a set of residues, but we can also quantify the hydrophobic and hydrophilic properties of a set of atoms that contribute to the formation of a portion of the molecular surface. This perspective is particularly important for the improvement of predictive methods of protein-protein interactions (<xref ref-type="bibr" rid="B33">Nicolau et al., 2014</xref>). In addition, we have also extended the method to other models of water molecules, especially those typically used for molecular dynamics simulations of proteins, enabling the application of our approach also to the trajectories of simulations already performed.</p>
<p>In particular, we have selected a representative set of experimentally solved protein structures and for each of them, we performed an extensive molecular dynamics simulation. We thus studied the hydropathy profile of the amino acid when they are in different protein structural environments, underlining that, especially for some residues, the solvation properties can sensibly differ according to the characteristics of the different neighborhoods. The analysis of our results allows us to define different regions in a plane describing the hydrophobicity and hydrophilicity properties: each residue belonging to the proteins in our dataset is a point on this plane and its position is not only due to its own chemical properties but also to the nature of the residues closest in structure.</p>
<p>The goodness of the characterization proposed here was evaluated considering the average positions of the residues on the two planes, classifying them by amino acids. These results are in perfect agreement with the hydrophobicity measurement of a biological experimental scale, which is considered the state of the art in this field (<xref ref-type="bibr" rid="B17">Hessa et al., 2005</xref>). Furthermore, the dispersion of the residue set for each amino acid was analyzed to underline how the nature of the residues belonging to the structural neighborhood has an important effect on the single residue characterization.</p>
</sec>
<sec sec-type="results|discussion" id="s2">
<title>2 Results and Discussion</title>
<sec id="s2-1">
<title>2.1 Hydropathy Profile for Single Residue in a Specific Protein Environment</title>
<p>In this section we explain the idea we adopted for the calculation of the amino acid solvation properties, studying the distance and the orientation of water molecules with respect to a solute molecule. We investigated the hydropathy of residues in their natural environment, i.e. inserted in a functional and folded protein chain.</p>
<p>To do so, we selected 20 proteins of known structure from the dataset collected by Hensen <italic>et al.</italic> (<xref ref-type="bibr" rid="B15">Hensen et al., 2012</xref>) (see Methods for details), searching very different proteins in terms of structural features to make the analysis as general as possible. In this perspective, we analyzed the SCOP class (<xref ref-type="bibr" rid="B2">Andreeva et al., 2014</xref>; <xref ref-type="bibr" rid="B3">Andreeva et al., 2020</xref>; of each of the selected protein, demonstrating as our dataset covers several different folds and therefore ensuring the generality of our findings (See <xref ref-type="sec" rid="s8">Supplementary Table S1</xref>). For each of these proteins, a molecular dynamics simulation of 60 ns was performed, studying the behavior of the explicit solvent molecules around the solute (<xref ref-type="fig" rid="F1">Figure 1A</xref>), after the equilibration time (<xref ref-type="fig" rid="F1">Figure 1C</xref>). To testify that we sampled configuration only after the equilibration in all the simulations we performed, we reported in Supporting Information the Root Mean Square Deviation and the Solvent Accessible surface as a function of time for all the proteins (See <xref ref-type="sec" rid="s8">Supplementary Figures S1&#x2013;S2</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>
<bold>(A)</bold> Snapshot taken from the molecular dynamics simulation of Concanavalin B (PDB id: 1CNV) performed with explicit solvent. The protein structure is represented in grey, while blue sticks (also zoomed on the right) highlight the position and orientation of an explicate residue, Lys 258, with respect to the surrounding water molecules <bold>(B)</bold> The disposition of each water molecule around a given residue is described representing each solvent molecule as a tetrahedron and evaluating the angles, &#x3b8;, formed by each vertex of the tetrahedron with the vector, <inline-formula id="inf1">
<mml:math id="minf1">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>R</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> that joins the nearest heavy atom of the residue with the water oxygen atom. The oxygen atom is used as both center of the tetrahedron and origin for the angle definition. The water dipole, <inline-formula id="inf2">
<mml:math id="minf2">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>d</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> is depicted as a red arrow <bold>(C)</bold> Root mean square deviation (RMSD) of the protein-heavy atoms as a function of time, using the initial structure as a reference <bold>(D)</bold> Joined probability distribution, <inline-formula id="inf3">
<mml:math id="minf3">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, of finding a water molecule at distance <italic>R</italic> from the residue with one angle, &#x3b8; of the corresponding tetrahedron. Top, left and right side plots show the marginal probabilities, <inline-formula id="inf4">
<mml:math id="minf4">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf5">
<mml:math id="minf5">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf6">
<mml:math id="minf6">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, respectively <bold>(E)</bold> Conditional probability distribution, <inline-formula id="inf7">
<mml:math id="minf7">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Left and right side plots show the marginal probabilities, <inline-formula id="inf8">
<mml:math id="minf8">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf9">
<mml:math id="minf9">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, respectively.</p>
</caption>
<graphic xlink:href="fmolb-08-626837-g001.tif"/>
</fig>
<p>We note that the explored time span allows us to well grasp the organization of surface waters, while much longer simulations would be needed to consider also the effect of structural water molecules.</p>
<p>According to our method, each solvent molecule can be schematized as a tetrahedron, with the water oxygen in the center and the vertices constituted by the two hydrogen atoms and the two lone pair electrons (<xref ref-type="fig" rid="F1">Figure 1B</xref>), so as each water molecule can form up to four hydrogen bonds (HB). In particular, we associate any water molecule to the closest atoms of the solute focusing only on the first hydration shells, i.e. water molecules closer to any solute atoms than 6&#xa0;<italic>&#xc5;</italic>. Since each water molecule is assigned to one solute atom, for each water molecule the solvent behavior is represented by three quantities representing the position and the orientation with respect to the solute: the distance <italic>R</italic> between the oxygen atom and the closest heavy atom of the solute, the <italic>hydrogen bond angle</italic> &#x3b8; and the <italic>dipole angle</italic> &#x3d5;. Each hydrogen bond angle is defined as the angle formed between the <italic>R</italic> and each vertex of the tetrahedron using the oxygen atom as the origin. Similarly, the dipole angle is built using the vector <italic>R</italic> and the dipole moment <inline-formula id="inf10">
<mml:math id="minf10">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>d</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> (see <xref ref-type="fig" rid="F1">Figure 1B</xref> for a sketch). In this work, we focus on <italic>R</italic> and &#x3b8;, since these quantities allow a complete characterization of the solute hydropathy. Indeed, a non-polar (hydrophobic) molecule in an aqueous solution interacts with the solvent only through van der Waals forces. Since the Coulombic interaction among <inline-formula id="inf11">
<mml:math id="minf11">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mi>O</mml:mi>
<mml:mtext>s</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is strong, water molecules privilege their internal HBs contacts. Alternatively, the interplay between polar or charged molecules and solvent occurs mainly via Coulombic forces, attracting one of the hydrogens or one of the lone pair electrons toward the solvent atom. Therefore, when a hydrophobic solute is examined, water molecules place one of the faces of the tetrahedron toward the solute in order to leave all possible HBs available; on the contrary, a water molecule close to a polar or a charged solute reorients itself to point toward him one of its lone pairs or hydrogens.</p>
<p>In a nutshell, given the set of atoms composing an amino acid, we carry out statistical analysis of the orientations of the water molecules that hydrate them. In <xref ref-type="fig" rid="F1">Figure 1D</xref> we show a colormap reporting the joint probability to observe a water molecule with a given <italic>R</italic> and &#x3b8; in the surroundings of the Lys 258 belonging to Concanavalin B (PDB id: 1CNV). As we can see also from the marginal distributions on the panel sides, well-defined peaks reflect the solvation properties of the residue in the protein environment.</p>
<p>On top of <xref ref-type="fig" rid="F1">Figure 1D</xref>, we report <inline-formula id="inf12">
<mml:math id="minf12">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the probability density distribution of finding a water molecule at a distance <italic>R</italic> from the solute, where <italic>j</italic> is the subscript indicating that the probability density is extracted from the joint probability. The curve is characterized by two maxima (this happens for almost all the amino acids), and it is, therefore, possible to identify the first and the second shell of hydration, after which there is the bulk water. On the right and left part of <xref ref-type="fig" rid="F1">Figure 1D</xref> we show <inline-formula id="inf13">
<mml:math id="minf13">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf14">
<mml:math id="minf14">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the probability density distribution of finding a water molecule with a certain HB orientation in first or second shell respectively, that is having a <italic>R</italic> in the shells defining interval (see Methods).</p>
<p>It has been demonstrated that, in order to improve the resolution of the description of first and second solvation shells and to achieve a better characterization of the solute features, the adoption of the <italic>conditional probabiliy</italic> represent a powerful tool (<xref ref-type="bibr" rid="B4">Babiaczyk et al., 2010</xref>). Indeed in this formalism, we report the probability of having a certain &#x3b8;, conditional on the solvent locating at a distance R from the solute atom (See Methods for further details). <xref ref-type="fig" rid="F1">Figure 1E</xref> shows the colormap of the conditional probabilities related to Lys 258 and the corresponding probability densities will be indicated with the subscript <italic>c</italic>.</p>
</sec>
<sec id="s2-2">
<title>2.2 Joint and Conditional Probability for Residue Characterization</title>
<p>For each solvent-exposed residue in our dataset, we built an hydropathy profile juxtaposing their <inline-formula id="inf15">
<mml:math id="minf15">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf16">
<mml:math id="minf16">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf17">
<mml:math id="minf17">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. In this way, each residue is statically characterized by the positions and the orientations of the water molecules surrounding it during the simulation. We obtained a very interesting separation of the amino acid hydropathy by applying a Principal Component Analysis (PCA), where the system is rotated to go into the reference system which maximizes the variance of the data. In <xref ref-type="fig" rid="F2">Figure 2A</xref>, we show the two principal components (percentage of explained variance equal to 88%): each point in this plot represents a given residue explored in its protein environment at physiological pH, and the 20 natural amino acids are colored differently. In particular, charged residues are colored in shades of blue, the non-charged polar residues in red while the hydrophobic residues are depicted in shades of yellow. Interestingly, PCA analysis reveals that residues with similar features are clearly grouped together. In particular, the negatively charged residues, Glu and Asp, form an isolated group, underlining their peculiar behavior in solvent interaction, while in the main cluster of residues each region is characterized by a preference for a certain type of residues.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>
<bold>(A)</bold> Projection along the first two principal components of the residues in the Protein dataset as obtained by a PCA analysis using <inline-formula id="inf18">
<mml:math id="minf18">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf19">
<mml:math id="minf19">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf20">
<mml:math id="minf20">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as descriptors for each residue. Each dot in the plane represents a residue, with different colors corresponding to different amino acids <bold>(B)</bold> Same as <bold>A)</bold> but using only the <inline-formula id="inf21">
<mml:math id="minf21">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mtext>s</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> as descriptors for each residue <bold>(C)</bold> Same as <bold>A)</bold> but using only the <inline-formula id="inf22">
<mml:math id="minf22">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mtext>s</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> as descriptors for each residue <bold>(D)</bold> Same as <bold>A)</bold> but using only the <inline-formula id="inf23">
<mml:math id="minf23">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mtext>s</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> as descriptors for each residue.</p>
</caption>
<graphic xlink:href="fmolb-08-626837-g002.tif"/>
</fig>
<p>We also performed a PCA analysis considering separately <inline-formula id="inf24">
<mml:math id="minf24">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf25">
<mml:math id="minf25">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf26">
<mml:math id="minf26">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Results are reported in <xref ref-type="fig" rid="F2">Figures 2B&#x2013;D</xref> respectively. We can notice that the two PCA analyses gave very similar results. According to us, this could mean that, when the joint probability is used to build the profile, the dominating signal is related to the water molecules position, while the information about its orientation gets mainly overwhelmed.</p>
<p>To obtain a finer representation of the all water molecule &#x201c;signals&#x201d;, we decided to use the conditional probability to amplify the angular aspect of the hydropathy profile.</p>
<p>To this aim, we performed the same PCA analysis using the <inline-formula id="inf27">
<mml:math id="minf27">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf28">
<mml:math id="minf28">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as obtained from the conditional probabilities together with <italic>p</italic>(R). The result is reported in <xref ref-type="fig" rid="F3">Figure 3A</xref>. We can identify four macro-regions: the negatively charged (blue dots) amino acid region (<inline-formula id="inf29">
<mml:math id="minf29">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2243;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf30">
<mml:math id="minf30">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2243;</mml:mo>
<mml:mn>0.8</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>), the positively (cyan dots) charged region ( <inline-formula id="inf31">
<mml:math id="minf31">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2243;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf32">
<mml:math id="minf32">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2243;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.5</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, hydrophobic (red dots) amino acid portion (<inline-formula id="inf33">
<mml:math id="minf33">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2243;</mml:mo>
<mml:mn>0.8</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf34">
<mml:math id="minf34">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2243;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) and the polar non charged (yellow dots) residue zone (<inline-formula id="inf35">
<mml:math id="minf35">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2243;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf36">
<mml:math id="minf36">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>&#x2243;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>
<bold>(A)</bold> Projection along the first two principal components of the residues in the Protein dataset as obtained by a PCA analysis using <inline-formula id="inf37">
<mml:math id="minf37">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf38">
<mml:math id="minf38">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf39">
<mml:math id="minf39">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as descriptors for each residue. Each dot in the plane represents a residue, with different colors corresponding to different amino acids <bold>(B)</bold> Cluster of the residue forming the Protein dataset using the <inline-formula id="inf40">
<mml:math id="minf40">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> <bold>(top)</bold> or <inline-formula id="inf41">
<mml:math id="minf41">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> <bold>(bottom)</bold> as descriptors for each residue <bold>(C)</bold> Average silhouette score as a function of the number of clusters considered in <bold>(B)</bold>.</p>
</caption>
<graphic xlink:href="fmolb-08-626837-g003.tif"/>
</fig>
<p>Next, we performed hierarchical clustering of the residues based separately on the two angular density distributions (see <xref ref-type="fig" rid="F3">Figure 3B</xref>). The high values achieved by the silhouette analysis (see <xref ref-type="fig" rid="F3">Figure 3C</xref>) indicate that different subdivisions of residues are possible. For different types of groupings of residues, we note that both <inline-formula id="inf42">
<mml:math id="minf42">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf43">
<mml:math id="minf43">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> are able to separate amino acids in several clusters composed of amino acids with different biochemical features.</p>
<p>It is worth noting that <inline-formula id="inf44">
<mml:math id="minf44">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> well isolates a group of hydrophobic (red) residues from the charged residues (both the positively and negatively charged) but this separation is even more clear by using the <inline-formula id="inf45">
<mml:math id="minf45">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> parameter.</p>
</sec>
<sec id="s2-3">
<title>2.3 Hydrophobic and Hydrophilic Properties of Amino Acid Side Chains in the Native Structure</title>
<p>The PCA plane we obtained using conditional probabilities (<xref ref-type="fig" rid="F3">Figure 3A</xref>), is a schematic and meaningful description of the solvation properties of the amino acids when they are studied in the native environment. In fact, it is a clever representation of the behavior of the solvent molecules that hydrate protein residues. In <xref ref-type="fig" rid="F4">Figure 4</xref> we depicted in the PCA plane the points regarding each of the 20 natural amino acids of our dataset using different colors. This way to measure hydropathy characteristics, reporting them as &#x201c;explored regions&#x201d; with different chemico-physical features by the amino acid rather than single values assumed by the molecule itself, allowed us to better illustrate the results we obtained. In fact, we demonstrate in this way that some amino acids explore peculiar regions in this plane while other amino acids like Arg, Tyr, Trp, and Thr, clearly populate overlapping regions of the plane. According to us, this may reflect the plasticity of some residues, to emphasize differently hydrophobic or hydrophilic aspects of their atomic structure in different protein local environments due to different biological contests. We summarize this concept of &#x201c;hydropathy explored regions&#x201d; in <xref ref-type="fig" rid="F4">Figure 4</xref> where we defined four portions of the PCA plane according to the kind of residues that explores these areas. We identified the explored hydrophobic area (&#x201c;Hb&#x201d; area, depicted in red in <xref ref-type="fig" rid="F4">Figure 4</xref>) in which Ile, Leu, Phe, Val, Pro, and Met residues are very well focused and in good qualitative agreement with previous hydrophobic scales. Then we mapped a clear negative charge explored area (&#x201c;Neg&#x201d; area depicted in cyan) where Asp and Glu clusterize. A third portion of PCA plane was defined as positive charge explored area (&#x201c;Pos&#x201d; area, depicted in blue in <xref ref-type="fig" rid="F4">Figure 4</xref>) where almost all Lysines of our dataset well converge and Arginine side chain is present for half of the observed configurations; according to us, Lysine explores in few cases the Hb area probably due to the long aliphatic chain, that in some cases outweighs the hydrophilic character.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Representations in the plane identified by the first and second principal components of all the residues comprising the 20 proteins of the Protein dataset (grey dots). The PCA analysis has been carried out using for each residue the observed <inline-formula id="inf46">
<mml:math id="minf46">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf47">
<mml:math id="minf47">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf48">
<mml:math id="minf48">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> computed as described in the Methods. In each panel, dots corresponding to the same kind of amino acid are highlighted with different colors.</p>
</caption>
<graphic xlink:href="fmolb-08-626837-g004.tif"/>
</fig>
<p>The presence of Arginine even in the Hb area is biologically very relevant because our result is connecting biological and biophysical principles of Arginine behavior in native proteins: this trend may be impossible to explain by using a just single hydropathy value. In fact, according to us, Arginine hydropathy can vary drastically within a protein environment and so we could define it as a Janus-headed side chain. This observation agrees with experimental data related to this amino acid. In fact, previous experiments by C. Preston Moon and Karen G. Fleming <italic>et al.</italic> (<xref ref-type="bibr" rid="B31">Moon and Fleming, 2011</xref>) clearly demonstrated that a membrane protein can accommodate an Arginine side-chain placed near the apolar middle of a lipid bilayer with much less cost in energy than has been previously predicted (<xref ref-type="bibr" rid="B14">Dorairaj and Allen, 2007</xref>; <xref ref-type="bibr" rid="B25">MacCallum et al., 2007</xref>). In fact, the guanidino group of Arginine could interact with non-polar aromatic and aliphatic side chains above and below the guanidinium plane while hydrogen bonding with polar side chains is restricted to in-plane positions. Related to this point we would like to remember that the first solved structure of a voltage-gated potassium channel (<xref ref-type="bibr" rid="B43">Schow et al., 2011</xref>), gave rise to many discussions about the energetics of the interactions between Arginines and lipids, as the structure suggested a gating mechanism in which charged Arginines were exposed to the hydrophobic bilayer interior.</p>
<p>We further observed on the left side of the PCA plane and located between Neg and Pos areas, a region we defined polar explored region (&#x201c;Pol&#x201d; area, depicted in yellow in <xref ref-type="fig" rid="F4">Figure 4</xref>) were polar, uncharged amino acids, at physiological pH, are positioned: the location of the area qualitatively agrees with the residue group features of these amino acids that are more hydrophilic than those of the Hb area because they contain functional groups that form hydrogen bonds with water. This class of amino acids includes Ser, Thr, Cys, Asp, and Gln. The presence of this polar area agrees with studies of Peters <italic>et al.</italic> about the assessment of the most accurate hydrophobicity scale (<xref ref-type="bibr" rid="B37">Peters and Elofsson, 2014</xref>). They demonstrated that better hydrophobic scales rank the polar amino acids Gln and (in particular) Asn as less hydrophobic. It is interesting to underline that even this polar area overlaps with the Hb area, in agreement with the concept of the ability of amino acids to explore several hydrophilicity-hydrophobicity regions.</p>
<p>To better point up this concept, we would like to report the case of the Threonine (<xref ref-type="fig" rid="F5">Figure 5A</xref>) hydropathy analysis in two different contexts. We selected two Threonine residues, Thr 599 and Thr 302 both belonging to the same proteins (PDB:1xwl), characterized by different positions on the PCA plane. The reason for this different behavior in terms of solvent interaction has to be sought in the neighbor residues. In particular, the Thr within the polar region is surrounded by three charged residues (RDKK, reported in blue in the Figure) that inevitably influence his hydrophilic behavior; on the other hand, the Thr within the non-polar zone is enclosed in a set of non-polar residues (FLFFL, in red in the Figure), thus forming an overall hydrophobic region.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>
<bold>(A)</bold> Projection along the first two principal components of the residues in the Protein dataset as obtained by a PCA analysis using <inline-formula id="inf49">
<mml:math id="minf49">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf50">
<mml:math id="minf50">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf51">
<mml:math id="minf51">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as descriptors for each residue. Each dot in the plane represents a residue. Blue dots highlight the positions in the plane of all Threonine (Thr). Green dots correspond to two cases where the surroundings the considered Threonine is composed of hydrophilic (blue) or hydrophobic (red) amino acids. The average value of the hydrophobicity index <bold>(B)</bold> and of the hydrophobicity index <bold>C)</bold> of the WOPHS scale have been calculated for each element of the grid with which the plan is partitioned.</p>
</caption>
<graphic xlink:href="fmolb-08-626837-g005.tif"/>
</fig>
<p>Another interesting example is represented by Threonine and Tryptophan. They are straddling the polar and hydrophobic areas and this behavior confirms that our approach is correct. In fact, Tryptophan and Tyrosine can be involved in interactions with ligands that contain aromatic groups via stacking interactions. However, tryptophan has nitrogen in its side chain and Tyrosine has oxygen, allowing hydrogen bonding interactions to be made with other residues or even solvent molecules, commonly seen in polar amino acids like Serine, which has oxygen in its side chain. But we should also keep in mind that Tryptophan has an indole function, but its lone pair of nitrogen is involved in the aromatic system. Thus, it makes only weal H-bonding, which could be not good enough to categorize as &#x201c;polar&#x201d;. All these observations are in agreement with the fact that Tyrosine and Triptophane side chains are the typical cases for which numerical values obtained for characterization of the hydrophobicity are controversial, being identified as hydrophobic in some studies (<xref ref-type="bibr" rid="B23">Levitt, 1976</xref>; <xref ref-type="bibr" rid="B44">Sweet and Eisenberg, 1983</xref>) but hydrophilic in others (<xref ref-type="bibr" rid="B35">Ooi et al., 1987</xref>; <xref ref-type="bibr" rid="B34">Oobatake and Ooi, 1988</xref>) and our concept of &#x201c;explored region&#x201d; should be the right approach.</p>
<p>At the end of this qualitative analysis, we decided to support our speculations by introducing also quantitative data relative to the side chain hydropathy characterization in the native protein context. Although it was not our aim, as proof of the significance of our hydrophobicity/hydrophilicity representation, we developed a mean hydrophobicity measure for each residue <inline-formula id="inf52">
<mml:math id="minf52">
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> (see methods for details). We achieved a very good agreement with the biological hydrophobicity scale (or the Hessa scale), which is based on <italic>in vitro</italic> experiments where the recognition of artificial helices by the Sec translocon was measured (<xref ref-type="bibr" rid="B17">Hessa et al., 2005</xref>). However, it can be noted that, in this case, the local microenvironment is not known. For example, residues in the helical segment might be interacting with other parts of the protein rather than interacting with lipids or water. The insertion by the translocon might also be a non-equilibrium process. In particular, in order to highlight the mean properties obtainable from this plot, we calculated the centroids of the points regarding each of the 20 natural residues. Using as reference the position of Isoleucine, indicated as the most hydrophobic residue here (<xref ref-type="bibr" rid="B17">Hessa et al., 2005</xref>), we calculate the radial and the angular distance of each centroid with the Isoleucine centroid (see Methods for details). In this framework, the higher is the distance with Isoleucine higher is the hydrophilicity of the residues. Notably, we found a strong linear correlation (R &#x3d; 0.84) between the <inline-formula id="inf53">
<mml:math id="minf53">
<mml:mrow>
<mml:mtext>&#x394;</mml:mtext>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> of amino acids side chains in the translocon scale and their values of mean hydrophobicity, <inline-formula id="inf54">
<mml:math id="minf54">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> of our native-protein scale (<xref ref-type="fig" rid="F4">Figure 4</xref> and in <xref ref-type="table" rid="T1">Table 1</xref>). meaning that our solvation analysis greatly reproduces one of the best performing hydrophobicity scales (<xref ref-type="bibr" rid="B37">Peters and Elofsson, 2014</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Results of the analysis of the essential plane shown in <xref ref-type="fig" rid="F3">Figure 3A</xref>. For each amino acid, we report the number of cases in which it is found solvent-exposed in simulation and the percentage with respect to all the solvent exposed residues (Occurrence); the hydrophobicity values we obtained with our geometrical characterization and the gyration radius, a measure of the dispersion of the points regarding each residues.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Res</th>
<th align="center">Occurrence</th>
<th align="center">
<inline-formula id="inf55">
<mml:math id="minf55">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">Gyration radius</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">ALA</td>
<td align="center">148 (5.4%)</td>
<td align="center">0.47</td>
<td align="center">0.57</td>
</tr>
<tr>
<td align="left">ARG</td>
<td align="center">168 (6.1%)</td>
<td align="center">1.60</td>
<td align="center">0.64</td>
</tr>
<tr>
<td align="left">ASN</td>
<td align="center">237 (8.6%)</td>
<td align="center">2.03</td>
<td align="center">0.68</td>
</tr>
<tr>
<td align="left">ASP</td>
<td align="center">300 (10.9%)</td>
<td align="center">3.20</td>
<td align="center">0.59</td>
</tr>
<tr>
<td align="left">CYS</td>
<td align="center">7 (0.3%)</td>
<td align="center">0.59</td>
<td align="center">0.20</td>
</tr>
<tr>
<td align="left">GLN</td>
<td align="center">205 (7.5%)</td>
<td align="center">1.26</td>
<td align="center">0.67</td>
</tr>
<tr>
<td align="left">GLU</td>
<td align="center">266 (9.7%)</td>
<td align="center">3.32</td>
<td align="center">0.55</td>
</tr>
<tr>
<td align="left">GLY</td>
<td align="center">166 (6.1%)</td>
<td align="center">0.80</td>
<td align="center">0.52</td>
</tr>
<tr>
<td align="left">HIS</td>
<td align="center">60 (2.2%)</td>
<td align="center">2.29</td>
<td align="center">0.76</td>
</tr>
<tr>
<td align="left">ILE</td>
<td align="center">38 (1.4%)</td>
<td align="center">0.00</td>
<td align="center">0.33</td>
</tr>
<tr>
<td align="left">LEU</td>
<td align="center">59 (2.2%)</td>
<td align="center">0.79</td>
<td align="center">0.41</td>
</tr>
<tr>
<td align="left">LYS</td>
<td align="center">268 (10.4%)</td>
<td align="center">2.76</td>
<td align="center">0.61</td>
</tr>
<tr>
<td align="left">MET</td>
<td align="center">12 (0.4%)</td>
<td align="center">0.38</td>
<td align="center">0.76</td>
</tr>
<tr>
<td align="left">PHE</td>
<td align="center">34 (1.2%)</td>
<td align="center">0.24</td>
<td align="center">0.52</td>
</tr>
<tr>
<td align="left">PRO</td>
<td align="center">97 (3.5%)</td>
<td align="center">0.55</td>
<td align="center">0.27</td>
</tr>
<tr>
<td align="left">SER</td>
<td align="center">254 (9.3%)</td>
<td align="center">1.46</td>
<td align="center">0.67</td>
</tr>
<tr>
<td align="left">THR</td>
<td align="center">197 (7.2%)</td>
<td align="center">0.66</td>
<td align="center">0.53</td>
</tr>
<tr>
<td align="left">TRP</td>
<td align="center">38 (1.4%)</td>
<td align="center">0.65</td>
<td align="center">0.37</td>
</tr>
<tr>
<td align="left">TYR</td>
<td align="center">129 (4.7%)</td>
<td align="center">0.83</td>
<td align="center">0.77</td>
</tr>
<tr>
<td align="left">VAL</td>
<td align="center">39 (1.4%)</td>
<td align="center">0.46</td>
<td align="center">0.35</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Indeed, it is interesting to note that even if the mean properties of the 20 residues can be successfully described using this representation, looking at the plots in <xref ref-type="fig" rid="F4">Figure 4</xref> it emerges clearly that points belonging to the same amino acid category can spread a lot on this plane, meaning that even the same amino acid can be characterized by very different hydropathy when it is inserted in different environments. Quantitatively, as a measure of the dispersion of the points regarding the various residues, we calculated the amino acid gyration radius (see Methods). We report the results in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<p>It results that residues with a well known hydrophobic tendency, such as Proline, Isoleucine, Valine, experience a low variability since they repel water very strongly. On the other hand, residues with a less defined solvent preference, such as Asparagine, Tyrosine, Methionine, are characterized by higher gyration radius values, meaning that they can modify their features influenced by the surroundings.</p>
<p>In light of all these considerations, using the hydrophobicity and hydrophilicity scales presented here (<xref ref-type="bibr" rid="B6">Bonella et al., 2014</xref>), we built two maps of these characteristics on the conditional probabilities PCA plane reported in <xref ref-type="fig" rid="F3">Figure 3A</xref>. In particular, by placing a square grid on it we can collect all the points inside each square pixel: since each of these points represents a residue with its hydrophobicity and hydrophobicity values, we can mediate these values obtaining a colormap with the hydrophobicity and hydrophilicity observed in that region of the plane. After a smoothing procedure, we obtain the maps depicted in <xref ref-type="fig" rid="F5">Figures 5B</xref>,C. From this perspective, the evaluation of the hydropathy properties of a given amino acid, located in a specific protein sequence and structure, depends on the position it assumes on this plane, and this position surely depends on their own chemico-physical features but also on the characteristics of its structural neighborhood.</p>
<p>An additional analysis showing the correlation between the secondary structure of a residue and its hydration properties is reported in Supporting Information (See <xref ref-type="sec" rid="s8">Supplementary Figures S3&#x2013;S5</xref>). Using DSSP <xref ref-type="bibr" rid="B48">Touw et al. (2015)</xref>; <xref ref-type="bibr" rid="B20">Kabsch and Sander. (1983)</xref> we labeled each residue with its secondary structure and we evaluated how the different secondary structures are located in the plane reported in <xref ref-type="fig" rid="F4">Figure 4</xref>. It is worth noting that some non-polar residue, such as ALA and LEU, are usually characterized by a low value of the Hydrophobicity index, but when they are found in loops they can exhibit even high value of the index, probably because of the usual high solvent exposure of this secondary structure.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Conclusions</title>
<p>Investigating the properties of the hydrogen bond network at the interface between hydration water molecules and solute plays a crucial role in the characterization of the physico-chemical properties of the latter. Here, we presented a completely <italic>in-silico</italic> method capable of analyzing the positions and the orientations of water molecules around any residue of protein structures. This allows us to emphasize the contribution to the solvation properties caused by the local structural environment, underlining that not only the nature of single amino acid determines its hydropathy features, but also the types of residues close to it.</p>
<p>In particular, we analyzed the motion of the water molecules belonging to the first two hydration shells for a set of proteins, defining a new description of both the hydrophilicity and hydrophobicity properties. Studying the probability of water molecule&#x2019;s orientation conditional to the distance to the solute, we built an essential plane of hydrophilicity and hydrophobicity, through a dimensionality reduction of the probability density distribution. On average, the location of each amino acid on this plane is in perfect agreement with its biochemical properties, in fact, an index defined considering the average position of each amino acid has an excellent correlation with one of the state-of-art hydrophobicity scales.</p>
<p>This notwithstanding, the dispersion of each amino acid (considering all the occurrences of a given residue in the proteins of our dataset) is a good marker of its variability in terms of solvation features. Indeed, this dispersion well classifies amino acids with marked properties, such as strong, from amino acids with less pronounced or intermediate hydropathy properties, meaning that the local structural environment in these cases plays a predominant role in modifying their interaction with the solvent.</p>
</sec>
<sec id="s4">
<title>4 Materials and Methods</title>
<sec id="s4-1">
<title>4.1 Protein Dataset and Residue Selection</title>
<p>We consider the dataset proposed by Hensen <italic>et al.</italic> (<xref ref-type="bibr" rid="B15">Hensen et al., 2012</xref>), where a collection of 112 representative proteins for each family were reported. From this initial set, we selected the 20 proteins, having 1) longer sequences and 2) no missing or incomplete residues. Considering all proteins together, we ended up with a total of 6,745 residues. For each protein, a molecular dynamics simulation with explicit solvent was performed. Since we were interested in characterizing solvation-related features, we consider only residues found in interaction with more than 50,000 water molecules during the whole analyzed simulation. An interaction between a residue and a water molecule is established if the distance between the oxygen atom of the water and any of the residue heavy atom is smaller than 6&#xa0;<italic>&#xc5;</italic>. We ended up with 2,775 residues.</p>
</sec>
<sec id="s4-2">
<title>4.2 Molecular Dynamics Simulation</title>
<p>The following protocol was used for each of the 20 simulations. We used Gromacs 2020 (<xref ref-type="bibr" rid="B50">Spoel et al., 2005</xref>) and built the system topology using the CHARMM-27 force field (<xref ref-type="bibr" rid="B7">Brooks et al., 2009</xref>). The protein was placed in a dodecahedric simulative box, with periodic boundary conditions, filled with TIP3P water molecules (<xref ref-type="bibr" rid="B19">Jorgensen et al., 1983</xref>). We checked that each atom of the protein was at least at a distance of <inline-formula id="inf56">
<mml:math id="minf56">
<mml:mrow>
<mml:mn>1.1</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>nm</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> from the box borders. The system was then minimized with the steepest descent algorithm. Next, a relaxation of water molecules and thermalization of the system was run in NVT and NPT environments each for <inline-formula id="inf57">
<mml:math id="minf57">
<mml:mrow>
<mml:mn>0.1</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>ns</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> at <inline-formula id="inf58">
<mml:math id="minf58">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>fs</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> time-step. The temperature was kept constant at <inline-formula id="inf59">
<mml:math id="minf59">
<mml:mrow>
<mml:mn>300</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>K</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> with v-rescale algorithm (<xref ref-type="bibr" rid="B9">Bussi et al., 2007</xref>); the final pressure was fixed at <inline-formula id="inf60">
<mml:math id="minf60">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>bar</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> with the Parrinello-Rahman algorithm (<xref ref-type="bibr" rid="B36">Parrinello and Rahman, 1980</xref>). LINCS algorithm <xref ref-type="bibr" rid="B16">Hess et al. (1997)</xref> was used to constraint h-bonds. A cut-off of <inline-formula id="inf61">
<mml:math id="minf61">
<mml:mrow>
<mml:mn>12</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>&#xc5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> was imposed for the evaluation of short-range non-bonded interactions and the Particle Mesh Ewald method <xref ref-type="bibr" rid="B10">Cheatham et al. (1995)</xref> for the long-range electrostatic interactions. Finally, we performed <inline-formula id="inf62">
<mml:math id="minf62">
<mml:mrow>
<mml:mn>60</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>ns</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> of molecular dynamics with a time step of <inline-formula id="inf63">
<mml:math id="minf63">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>fs</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>, saving configurations every <inline-formula id="inf64">
<mml:math id="minf64">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>ps</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>. We considered the last <inline-formula id="inf65">
<mml:math id="minf65">
<mml:mrow>
<mml:mn>20</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>ns</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> (10,000 frames) for the analyzes.</p>
</sec>
<sec id="s4-3">
<title>4.3 Evaluation of Solvent-Residue Geometrical Descriptors</title>
<p>Molecular dynamics simulation data were used to characterize the geometrical disposition of the water molecules around protein residues. In particular, for each protein of the Protein dataset, we sampled 10,000 configurations (one each <inline-formula id="inf66">
<mml:math id="minf66">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mtext>ps</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula>) from the corresponding molecular dynamics simulation. For every water molecule in each frame, we evaluate the minimum distance, <italic>R</italic>, between the water oxygen and the heavy atoms of each protein residue.</p>
<p>Solvent molecules whose oxygen atom had a distance bigger than <inline-formula id="inf67">
<mml:math id="minf67">
<mml:mrow>
<mml:mn>6</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>&#xc5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> were discarded from all subsequent analyses. All remaining water molecules were then assigned to their nearer residue, again on the basis of the distance <italic>R</italic>.</p>
<p>Then, for each water molecule, we build the tetrahedron having the oxygen atom as the center and the two hydrogen atoms occupying two of the four vertexes. In this way, we ensure that the tetrahedron is always well defined. We indicate with <inline-formula id="inf68">
<mml:math id="minf68">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf69">
<mml:math id="minf69">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> the vectors originating in the tetrahedron center and pointing to the hydrogen atoms; while we refer to the vectors linking the center with the other to vertex as <inline-formula id="inf70">
<mml:math id="minf70">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>lp</mml:mtext>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf71">
<mml:math id="minf71">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>lp</mml:mtext>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (where lp stands for <italic>lone pairs</italic>). Finally, we define also the vector joining the nearest heavy atom of the residue with the oxygen atom of the water molecule, <inline-formula id="inf72">
<mml:math id="minf72">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>R</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, and the dipole moment vector, <inline-formula id="inf73">
<mml:math id="minf73">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>d</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> (see <xref ref-type="fig" rid="F1">Figure 1</xref> for a sketch).</p>
<p>Once we know the set of six vectors <inline-formula id="inf74">
<mml:math id="minf74">
<mml:mrow>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>R</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>d</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf75">
<mml:math id="minf75">
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>lp</mml:mtext>
</mml:mrow>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>lp</mml:mtext>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, we can compute the five angles that efficiently summerize the disposition of the water molecule with respect to the protein residue. In particular,<disp-formula id="e1">
<mml:math id="me1">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>arccos</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>R</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>and<disp-formula id="e2">
<mml:math id="me2">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>arccos</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>R</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>r</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>with <inline-formula id="inf76">
<mml:math id="minf76">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1,2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> identify the orientation of the tetrahedron vertexes with respect to the direction identified by <inline-formula id="inf77">
<mml:math id="minf77">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>R</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, while<disp-formula id="e3">
<mml:math id="me3">
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>arccos</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>R</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>d</mml:mi>
<mml:mo stretchy="true">&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>measures the orientation of the water dipole.</p>
</sec>
<sec id="s4-4">
<title>4.4 Joint and Conditional Probability</title>
<p>For each of the 2,775 residues, we computed the hydrogen joint probability, <inline-formula id="inf78">
<mml:math id="minf78">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, which gives the probability of finding a water molecule with a given <inline-formula id="inf79">
<mml:math id="minf79">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mi>H</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>l</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> angle at distance <italic>R</italic> from the nearest heavy atom of the reside and dipole joint probability, <inline-formula id="inf80">
<mml:math id="minf80">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3d5;</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, of finding a water molecule with a given &#x3d5; angle at distance <italic>R</italic>. In both cases, the probabilities are computed discretizing the distance range 0&#x2013;6&#xa0;<italic>&#xc5;</italic> with steps of <inline-formula id="inf81">
<mml:math id="minf81">
<mml:mrow>
<mml:mn>0.05</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>&#xc5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and the angular interval <inline-formula id="inf82">
<mml:math id="minf82">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mn>180</mml:mn>
</mml:mrow>
<mml:mo>&#x2218;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> with a step of <inline-formula id="inf83">
<mml:math id="minf83">
<mml:mrow>
<mml:msup>
<mml:mn>1</mml:mn>
<mml:mo>&#x2218;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. See <xref ref-type="fig" rid="F1">Figure 1</xref> for an example.</p>
<p>From the joint probabilities, we obtained the distance marginal probability, <inline-formula id="inf84">
<mml:math id="minf84">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as<disp-formula id="e4">
<mml:math id="me4">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>while we calculated the conditional probabilities as<disp-formula id="e5">
<mml:math id="me5">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>Considering each residue as a reference, <inline-formula id="inf85">
<mml:math id="minf85">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> encodes the overall probability of finding a water molecule at distance <italic>R</italic> from the reference. As one can see from <xref ref-type="fig" rid="F1">Figure 1</xref>, the typical shape of probability is that of a damped sinusoidal function, showing a series of maxima (and minima) with decreasing amplitude. This behavior originates from the molecular interactions between the residue and the water molecules and those between water molecules. Water molecules tend to form a shell around the residue with a higher density of molecules in correspondence of the <inline-formula id="inf86">
<mml:math id="minf86">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> maxima and a lower density in its minima.</p>
<p>Using the <inline-formula id="inf87">
<mml:math id="minf87">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> profile, we identified the shells as follows:<list list-type="bullet">
<list-item>
<p>the first shell starts at <inline-formula id="inf88">
<mml:math id="minf88">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> the first non-null value of <inline-formula id="inf89">
<mml:math id="minf89">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>;</p>
</list-item>
<list-item>
<p>the border between the first shell and the second <inline-formula id="inf90">
<mml:math id="minf90">
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> coincide with the minimum following the maximum in the first shell;</p>
</list-item>
<list-item>
<p>the end of the second shell, <inline-formula id="inf91">
<mml:math id="minf91">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> coincides with the minimum following the maximum in the second shell.</p>
</list-item>
</list>
</p>
<p>When the <inline-formula id="inf92">
<mml:math id="minf92">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> profile does not allow us to identify the minima, we add them according to their average values calculated on the respective residue.</p>
<p>Once the shells were identified, we calculated <inline-formula id="inf93">
<mml:math id="minf93">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf94">
<mml:math id="minf94">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as<disp-formula id="e6">
<mml:math id="me6">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where <inline-formula id="inf95">
<mml:math id="minf95">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1,2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and &#x3b8; can be either the hydrogen angle or the dipole one. The <inline-formula id="inf96">
<mml:math id="minf96">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> were calculated on both the joint <inline-formula id="inf97">
<mml:math id="minf97">
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mn>1,2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and conditional <inline-formula id="inf98">
<mml:math id="minf98">
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mn>1,2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> probability. Since some <inline-formula id="inf99">
<mml:math id="minf99">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> exhibited an anomalous profile they were discarded from subsequent analyzes, reducing the dataset to 2,740 residuals. Ultimately, we obtained three descriptors for the conditional and joint probability histograms of the OH-lp and dipole angles: <inline-formula id="inf100">
<mml:math id="minf100">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf101">
<mml:math id="minf101">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf102">
<mml:math id="minf102">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Analyses were performed using R standard libraries (<xref ref-type="bibr" rid="B39">R Core Team, 2020</xref>).</p>
</sec>
<sec id="s4-5">
<title>4.5 Principal Component Analysis and Clustering</title>
<p>Principal component analysis (PCA) was performed over 1) the vector obtained by concatenating the discretized (75 points) probability distribution <inline-formula id="inf103">
<mml:math id="minf103">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> for each of the 2,740 residues; 2) on the vector obtained by concatenating the discretized (180 points) probability distribution <inline-formula id="inf104">
<mml:math id="minf104">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> for each of the 2,740 residues; 3) on the vector obtained by concatenating <inline-formula id="inf105">
<mml:math id="minf105">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of all 2,740 residues and 4) using the vector obtained by concatenating together all the previous probabilities. We used the &#x201c;prcomp&#x201d; function of R software (<xref ref-type="bibr" rid="B39">R Core Team, 2020</xref>). The same procedure has been repeated also using <inline-formula id="inf106">
<mml:math id="minf106">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and the two conditional probability marginals, <inline-formula id="inf107">
<mml:math id="minf107">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf108">
<mml:math id="minf108">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>A clustering analysis was performed on the points on the first two components plane relating to <inline-formula id="inf109">
<mml:math id="minf109">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf110">
<mml:math id="minf110">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> through a hierarchical clustering, using the Euclidean distance and the Ward method as linkage function (<xref ref-type="bibr" rid="B51">Ward, 1963</xref>) via the &#x201c;hclust&#x201d; function of the &#x201c;Stats&#x201d; package of R (<xref ref-type="bibr" rid="B39">R Core Team, 2020</xref>). Finally, we computed the Silhouette for the hierarchical cluster via the R package &#x201c;cluster&#x201d; (<xref ref-type="bibr" rid="B26">Maechler et al., 2019</xref>).</p>
<p>Finally, to measure the dispersion of the points regarding the various residues in the PCA plane, we calculated the amino acids gyration radius as<disp-formula id="e7">
<mml:math id="me7">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:msqrt>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf111">
<mml:math id="minf111">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the distances between each of the N points and the centroid.</p>
</sec>
<sec id="s4-6">
<title>4.6 Hydrophobicity Measure in Principal Component Plane</title>
<p>Starting from the plane shown in <xref ref-type="fig" rid="F3">Figure 3A</xref>, we defined a measure of hydrophobicity. We take as reference the point <italic>C</italic>, the centroid of all the Ile points with coordinates PC1 &#x3d; 0.75 and PC2 &#x3d; &#x2212;0.39. For a generic point in the plane, <italic>i</italic>, we calculated the distance <inline-formula id="inf112">
<mml:math id="minf112">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> from <italic>C</italic>. Defining the angle variable &#x3b1;, like the one starting from the <italic>x</italic>-axis in an anticlockwise direction, we thus fixed a reference angle, <inline-formula id="inf113">
<mml:math id="minf113">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2.8</mml:mn>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Now it is possible to define, for a generic point <italic>i</italic> on the plane with distance <inline-formula id="inf114">
<mml:math id="minf114">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and angle <inline-formula id="inf115">
<mml:math id="minf115">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the Hydrophobicity index as follows:<disp-formula id="e8">
<mml:math id="me8">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf116">
<mml:math id="minf116">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
</sec>
</body>
<back>
<sec id="s5">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s8">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>EM conceived research; LDR and MM designed and performed computational analysis. LB performed molecular dynamics simulations and statistical analysis. EM, DR, and GR supervised the research and performed the analysis. All authors analyzed results, wrote and revised the paper.</p>
</sec>
<sec id="s7" sec-type="COI-statement">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s8">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmolb.2021.626837/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmolb.2021.626837/full&#x23;supplementary-material</ext-link>.</p>
<supplementary-material xlink:href="datasheet1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aftabuddin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kundu</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Hydrophobic, hydrophilic, and charged amino acid networks within protein</article-title>. <source>Biophys. J.</source> <volume>93</volume>, <fpage>225</fpage>&#x2013;<lpage>231</lpage>. <pub-id pub-id-type="doi">10.1529/biophysj.106.098004</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Andreeva</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Howorth</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Chothia</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kulesha</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Murzin</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Scop2 prototype: a new approach to protein structure mining</article-title>. <source>Nucleic Acids Res.</source> <volume>42</volume>, <fpage>D310</fpage>&#x2013;<lpage>D314</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkt1242</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Andreeva</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kulesha</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Gough</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Murzin</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The scop database in 2020: expanded classification of representative family and superfamily domains of known protein structures</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume>, <fpage>D376</fpage>&#x2013;<lpage>D382</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz1064</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Babiaczyk</surname>
<given-names>W. I.</given-names>
</name>
<name>
<surname>Bonella</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Guidoni</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ciccotti</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Hydration structure of the quaternary ammonium cations</article-title>. <source>J. Phys. Chem. B</source> <volume>114</volume>, <fpage>15018</fpage>&#x2013;<lpage>15028</lpage>. <pub-id pub-id-type="doi">10.1021/jp106282w</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ball</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Water as an active constituent in cell biology</article-title>. <source>Chem. Rev.</source> <volume>108</volume>, <fpage>74</fpage>&#x2013;<lpage>108</lpage>. <pub-id pub-id-type="doi">10.1021/cr068037a</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bonella</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Raimondo</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Milanetti</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Tramontano</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ciccotti</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Mapping the hydropathy of amino acids based on their local solvation structure</article-title>. <source>J. Phys. Chem. B</source> <volume>118</volume>, <fpage>6604</fpage>&#x2013;<lpage>6613</lpage>. <pub-id pub-id-type="doi">10.1021/jp500980x</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brooks</surname>
<given-names>B. R.</given-names>
</name>
<name>
<surname>Brooks</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Mackerell</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Nilsson</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Petrella</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Roux</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>CHARMM: the biomolecular simulation program</article-title>. <source>J. Comput. Chem.</source> <volume>30</volume>, <fpage>1545</fpage>&#x2013;<lpage>1614</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.21287</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brooks</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Gruebele</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Onuchic</surname>
<given-names>J. N.</given-names>
</name>
<name>
<surname>Wolynes</surname>
<given-names>P. G.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>Chemical physics of protein folding</article-title>. <source>Proc. Natl. Acad. Sci. United States</source> <volume>95</volume>, <fpage>11037</fpage>&#x2013;<lpage>11038</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.95.19.11037</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bussi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Donadio</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Parrinello</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Canonical sampling through velocity rescaling</article-title>. <source>J. Chem. Phys.</source> <volume>126</volume>, <fpage>014101</fpage>. <pub-id pub-id-type="doi">10.1063/1.2408420</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheatham</surname>
<given-names>T. E. I.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Fox</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Darden</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Kollman</surname>
<given-names>P. A.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Molecular dynamics simulations on solvated biomolecular systems: the particle mesh ewald method leads to stable trajectories of DNA, RNA, and proteins</article-title>. <source>J. Am. Chem. Soc</source> <volume>117</volume>, <fpage>4193</fpage>&#x2013;<lpage>4194</lpage>. <pub-id pub-id-type="doi">10.1021/ja00119a045</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chothia</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>1974</year>). <article-title>Hydrophobic bonding and accessible surface area in proteins</article-title>. <source>Nature</source> <volume>248</volume>, <fpage>338</fpage>&#x2013;<lpage>339</lpage>. <pub-id pub-id-type="doi">10.1038/248338a0</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chothia</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>1976</year>). <article-title>The nature of the accessible and buried surfaces in proteins</article-title>. <source>J. Mol. Biol.</source> <volume>105</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(76)90191-1</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deber</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>L. P.</given-names>
</name>
<name>
<surname>Prior</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Agrawal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Muskat</surname>
<given-names>B. L.</given-names>
</name>
<etal/>
</person-group> (<year>2001</year>). <article-title>Tm finder: a prediction program for transmembrane protein segments using a combination of hydrophobicity and nonpolar phase helicity scales</article-title>. <source>Protein Sci.</source> <volume>10</volume>, <fpage>212</fpage>&#x2013;<lpage>219</lpage>. <pub-id pub-id-type="doi">10.1110/ps.30301</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dorairaj</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Allen</surname>
<given-names>T. W.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>On the thermodynamic stability of a charged arginine side chain in a transmembrane helix</article-title>. <source>Proc. Natl. Acad. Sci. United States</source> <volume>104</volume>, <fpage>4943</fpage>&#x2013;<lpage>4948</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0610470104</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hensen</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Meyer</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Haas</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rex</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Vriend</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Grubm&#xfc;ller</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Exploring protein dynamics space: the dynasome as the missing link between protein structure and function</article-title>. <source>PLoS ONE</source> <volume>7</volume>, <fpage>e33931</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0033931</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hess</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bekker</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Berendsen</surname>
<given-names>H. J. C.</given-names>
</name>
<name>
<surname>Fraaije</surname>
<given-names>J. G. E. M.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>LINCS: a linear constraint solver for molecular simulations</article-title>. <source>J. Comput. Chem.</source> <volume>18</volume>, <fpage>1463</fpage>&#x2013;<lpage>1472</lpage>. <pub-id pub-id-type="doi">10.1002/(sici)1096-987x(199709)18:12&#x3c;1463::aid-jcc4&#x3e;3.0.co;2-h</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hessa</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Bihlmaier</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lundin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Boekel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Andersson</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>Recognition of transmembrane helices by the endoplasmic reticulum translocon</article-title>. <source>Nature</source> <volume>433</volume>, <fpage>377</fpage>&#x2013;<lpage>381</lpage>. <pub-id pub-id-type="doi">10.1038/nature03216</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jones</surname>
<given-names>D. D.</given-names>
</name>
</person-group> (<year>1975</year>). <article-title>Amino acid properties and side-chain orientation in proteins: a cross correlation appraoch</article-title>. <source>J. Theor. Biol.</source> <volume>50</volume>, <fpage>167</fpage>&#x2013;<lpage>183</lpage>. <pub-id pub-id-type="doi">10.1016/0022-5193(75)90031-4</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jorgensen</surname>
<given-names>W. L.</given-names>
</name>
<name>
<surname>Chandrasekhar</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Madura</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Impey</surname>
<given-names>R. W.</given-names>
</name>
<name>
<surname>Klein</surname>
<given-names>M. L.</given-names>
</name>
</person-group> (<year>1983</year>). <article-title>Comparison of simple potential functions for simulating liquid water</article-title>. <source>J. Chem. Phys.</source> <volume>79</volume>, <fpage>926</fpage>&#x2013;<lpage>935</lpage>. <pub-id pub-id-type="doi">10.1063/1.445869</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kabsch</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Sander</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>1983</year>). <article-title>Dictionary of protein secondary structure: pattern recognition of hydrogen-bonded and geometrical features</article-title>. <source>Biopolymers</source> <volume>22</volume>, <fpage>2577</fpage>&#x2013;<lpage>2637</lpage>. <pub-id pub-id-type="doi">10.1002/bip.360221211</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kyte</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Doolittle</surname>
<given-names>R. F.</given-names>
</name>
</person-group> (<year>1982</year>). <article-title>A simple method for displaying the hydropathic character of a protein</article-title>. <source>J. Mol. Biol.</source> <volume>157</volume>, <fpage>105</fpage>&#x2013;<lpage>132</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(82)90515-0</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leopizzi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cocchiola</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Milanetti</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Raimondo</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Politi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Giordano</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>IKK&#x3b1; inibition by a glucosamine derivative enhances Maspin expression in osteosarcoma cell line</article-title>. <source>Chem. Biol. Interact.</source> <volume>262</volume>, <fpage>19</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1016/j.cbi.2016.12.005</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Levitt</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1976</year>). <article-title>A simplified representation of protein conformations for rapid simulation of protein folding</article-title>. <source>J. Mol. Biol.</source> <volume>104</volume>, <fpage>59</fpage>&#x2013;<lpage>107</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(76)90004-8</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Levy</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Onuchic</surname>
<given-names>J. N.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Water mediation IN protein folding and molecular recognition</article-title>. <source>Annu. Rev. Biophys. Biomol. Struct.</source> <volume>35</volume>, <fpage>389</fpage>&#x2013;<lpage>415</lpage>. <pub-id pub-id-type="doi">10.1146/annurev.biophys.35.040405.102134</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>MacCallum</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Bennett</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Tieleman</surname>
<given-names>D. P.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Partitioning of amino acid side chains into lipid bilayers: results from computer simulations and comparison to experiment</article-title>. <source>J. Gen. Physiol.</source> <volume>129</volume>, <fpage>371</fpage>&#x2013;<lpage>377</lpage>. <pub-id pub-id-type="doi">10.1085/jgp.200709745</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Maechler</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rousseeuw</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Struyf</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hubert</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hornik</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <source>
<italic>cluster: cluster Analysis Basics and Extensions</italic>. R package version 2.1.0&#x2014;for new features, see the &#x201c;Changelog&#x201d; file (in the package source)</source>. <ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=cluster">https://CRAN.R-project.org/package&#x3d;cluster</ext-link>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Milanetti</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Carlucci</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Olimpieri</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Palumbo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Carlucci</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ferrone</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Correlation analysis based on the hydropathy properties of non-steroidal anti-inflammatory drugs in solid-phase extraction (spe) and reversed-phase high performance liquid chromatography (hplc) with photodiode array detection and their applications to biological samples</article-title>. <source>J. Chromatogr. A</source> <volume>1605</volume>, <fpage>360351</fpage>. <pub-id pub-id-type="doi">10.1016/j.chroma.2019.07.005</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Milanetti</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Raimondo</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Tramontano</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Prediction of the permeability of neutral drugs inferred from their solvation properties</article-title>. <source>Bioinformatics</source> <volume>32</volume>, <fpage>1163</fpage>&#x2013;<lpage>1169</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv725</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Miotto</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Olimpieri</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Di Rienzo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ambrosetti</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Corsi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Lepore</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Insights on protein thermal stability: a graph representation of molecular interactions</article-title>. <source>Bioinformatics</source> <volume>35</volume>, <fpage>2569</fpage>&#x2013;<lpage>2577</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty1011</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mondal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mukherjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bagchi</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Origin of diverse time scales in the protein hydration layer solvation dynamics: a simulation study</article-title>. <source>J. Chem. Phys.</source> <volume>147</volume>, <fpage>154901</fpage>. <pub-id pub-id-type="doi">10.1063/1.4995420</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moon</surname>
<given-names>C. P.</given-names>
</name>
<name>
<surname>Fleming</surname>
<given-names>K. G.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Side-chain hydrophobicity scale derived from transmembrane protein folding into lipid bilayers</article-title>. <source>Proc. Natl. Acad. Sci. United States</source> <volume>108</volume>, <fpage>10174</fpage>&#x2013;<lpage>10177</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1103979108</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moret</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zebende</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Amino acid hydrophobicity and accessible surface area</article-title>. <source>Phys. Rev. E Stat. Nonlin Soft Matter Phys.</source> <volume>75</volume>, <fpage>011920</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevE.75.011920</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nicolau</surname>
<given-names>D. V.</given-names>
</name>
<name>
<surname>Paszek</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Fulga</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Nicolau</surname>
<given-names>D. V.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Mapping hydrophobicity on the protein molecular surface at atom-level resolution</article-title>. <source>PLoS One</source> <volume>9</volume>, <fpage>e114042</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0114042</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oobatake</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ooi</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>1988</year>). <article-title>Characteristic thermodynamic properties of hydrated water for 20 amino acid residues in globular proteins</article-title>. <source>J. Biochem.</source> <volume>104</volume>, <fpage>433</fpage>&#x2013;<lpage>439</lpage>. <pub-id pub-id-type="doi">10.1093/oxfordjournals.jbchem.a122485</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ooi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Oobatake</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>N&#xe9;methy</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Scheraga</surname>
<given-names>H. A.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>Accessible surface areas as a measure of the thermodynamic parameters of hydration of peptides</article-title>. <source>Proc. Natl. Acad. Sci. United States</source> <volume>84</volume>, <fpage>3086</fpage>&#x2013;<lpage>3090</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.84.10.3086</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Parrinello</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Rahman</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>1980</year>). <article-title>Crystal structure and pair potentials: a molecular-dynamics study</article-title>. <source>Phys. Rev. Lett.</source> <volume>45</volume>, <fpage>1196</fpage>&#x2013;<lpage>1199</lpage>. <pub-id pub-id-type="doi">10.1103/physrevlett.45.1196</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peters</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Elofsson</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Why is the biological hydrophobicity scale more accurate than earlier experimental hydrophobicity scales?</article-title> <source>Proteins</source> <volume>82</volume>, <fpage>2190</fpage>&#x2013;<lpage>2198</lpage>. <pub-id pub-id-type="doi">10.1002/prot.24582</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qvist</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Persson</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Mattea</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Halle</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Time scales of water dynamics at biological interfaces: peptides, proteins and cells</article-title>. <source>Faraday Discuss</source> <volume>141</volume>, <fpage>131</fpage>&#x2013;<lpage>207</lpage>. <pub-id pub-id-type="doi">10.1039/b806194g</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<collab>R Core Team</collab> (<year>2020</year>). <source>R: a language and environment for statistical computing</source>. <publisher-loc>Vienna, Austria</publisher-loc>: <publisher-name>R Foundation for Statistical Computing</publisher-name>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raschke</surname>
<given-names>T. M.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Water structure and interactions with protein surfaces</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>16</volume>, <fpage>152</fpage>&#x2013;<lpage>159</lpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2006.03.002</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rose</surname>
<given-names>G. D.</given-names>
</name>
<name>
<surname>Geselowitz</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Lesser</surname>
<given-names>G. J.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>R. H.</given-names>
</name>
<name>
<surname>Zehfus</surname>
<given-names>M. H.</given-names>
</name>
</person-group> (<year>1985</year>). <article-title>Hydrophobicity of amino acid residues in globular proteins</article-title>. <source>Science</source> <volume>229</volume>, <fpage>834</fpage>&#x2013;<lpage>838</lpage>. <pub-id pub-id-type="doi">10.1126/science.4023714</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rothschild</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Mancinelli</surname>
<given-names>R. L.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Life in extreme environments</article-title>. <source>Nature</source> <volume>409</volume>, <fpage>1092</fpage>&#x2013;<lpage>1101</lpage>. <pub-id pub-id-type="doi">10.1038/35059215</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schow</surname>
<given-names>E. V.</given-names>
</name>
<name>
<surname>Freites</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bernsel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Von Heijne</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>White</surname>
<given-names>S. H.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Arginine in membranes: the connection between molecular dynamics simulations and translocon-mediated insertion experiments</article-title>. <source>J. Membr. Biol.</source> <volume>239</volume>, <fpage>35</fpage>&#x2013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.1007/s00232-010-9330-x</pub-id> </citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sweet</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Eisenberg</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>1983</year>). <article-title>Correlation of sequence hydrophobicities measures similarity in three-dimensional protein structure</article-title>. <source>J. Mol. Biol.</source> <volume>171</volume>, <fpage>479</fpage>&#x2013;<lpage>488</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(83)90041-4</pub-id> </citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tanford</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>1972</year>). <article-title>Hydrophobic free energy, micelle formation and the association of proteins with amphiphiles</article-title>. <source>J. Mol. Biol.</source> <volume>67</volume>, <fpage>59</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(72)90386-5</pub-id> </citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tarek</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tobias</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>The dynamics of protein hydration water: a quantitative comparison of molecular dynamics simulations and neutron-scattering experiments</article-title>. <source>Biophys. J.</source> <volume>79</volume>, <fpage>3244</fpage>&#x2013;<lpage>3257</lpage>. <pub-id pub-id-type="doi">10.1016/S0006-3495(00)76557-X</pub-id> </citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tomobe</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yamamoto</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Koji&#x107;</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sato</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yasui</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yasuoka</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Origin of the blueshift of water molecules at interfaces of hydrophilic cyclic compounds</article-title>. <source>Sci. Adv.</source> <volume>3</volume>, <fpage>e1701400</fpage>. <pub-id pub-id-type="doi">10.1126/sciadv.1701400</pub-id> </citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Touw</surname>
<given-names>W. G.</given-names>
</name>
<name>
<surname>Baakman</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Black</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Te Beek</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Krieger</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Joosten</surname>
<given-names>R. P.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>A series of pdb-related databanks for everyday needs</article-title>. <source>Nucleic Acids Res.</source> <volume>43</volume>, <fpage>D364</fpage>&#x2013;<lpage>D368</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gku1028</pub-id> </citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vagenende</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Trout</surname>
<given-names>B. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Quantitative characterization of local protein solvation to predict solvent effects on protein structure</article-title>. <source>Biophys. J</source>. <volume>103</volume>, <fpage>1354</fpage>&#x2013;<lpage>1362</lpage>. <pub-id pub-id-type="doi">10.1016/j.bpj.2012.08.011</pub-id> </citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van Der Spoel</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Lindahl</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Hess</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Groenhof</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Mark</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Berendsen</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>GROMACS: fast, flexible, and free</article-title>. <source>J. Comput. Chem.</source> <volume>26</volume>, <fpage>1701</fpage>&#x2013;<lpage>1718</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.20291</pub-id> </citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ward</surname>
<given-names>J. H.</given-names>
</name>
</person-group> (<year>1963</year>). <article-title>Hierarchical grouping to optimize an objective function</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>58</volume>, <fpage>236</fpage>&#x2013;<lpage>244</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.1963.10500845</pub-id> </citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilce</surname>
<given-names>M. C. J.</given-names>
</name>
<name>
<surname>Aguilar</surname>
<given-names>M.-I.</given-names>
</name>
<name>
<surname>Hearn</surname>
<given-names>M. T. W.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Physicochemical basis of amino acid hydrophobicity scales: evaluation of four new scales of amino acid hydrophobicity coefficients derived from rp-hplc of peptides</article-title>. <source>Anal. Chem.</source> <volume>67</volume>, <fpage>1210</fpage>&#x2013;<lpage>1219</lpage>. <pub-id pub-id-type="doi">10.1021/ac00103a012</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>
