<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mol. Biosci.</journal-id>
<journal-title>Frontiers in Molecular Biosciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mol. Biosci.</abbrev-journal-title>
<issn pub-type="epub">2296-889X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">734169</article-id>
<article-id pub-id-type="doi">10.3389/fmolb.2021.734169</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Molecular Biosciences</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>An Unbound Proline-Rich Signaling Peptide Frequently Samples <italic>Cis</italic> Conformations in Gaussian Accelerated Molecular Dynamics Simulations</article-title>
<alt-title alt-title-type="left-running-head">Alcantara et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">Cis Conformations of a Proline-Rich Peptide</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Alcantara</surname>
<given-names>Juan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1483929/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Stix</surname>
<given-names>Robyn</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1391985/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Katherine</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Connor</surname>
<given-names>Acadia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>East</surname>
<given-names>Ray</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jaramillo-Martinez</surname>
<given-names>Valeria</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1506154/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Stollar</surname>
<given-names>Elliott J.</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1513301/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Ball</surname>
<given-names>K. Aurelia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1383272/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<label>
<sup>1</sup>
</label>Department of Chemistry, Skidmore College, <addr-line>Saratoga Springs</addr-line>, <addr-line>NY</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff2">
<label>
<sup>2</sup>
</label>Department of Neuroscience and Pharmacology, Texas Teach University Health Science Center, <addr-line>Lubbock</addr-line>, <addr-line>TX</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff3">
<label>
<sup>3</sup>
</label>School of Life Sciences, University of Liverpool, <addr-line>Liverpool</addr-line>, <country>United&#x20;Kingdom</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1053162/overview">Damiano Piovesan</ext-link>, University of Padua, Italy</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1253341/overview">Steven T. Whitten</ext-link>, Texas State University, United&#x20;States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/502370/overview">Giovanni Minervini</ext-link>, University of Padua, Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: K. Aurelia Ball, <email>kball@skidmore.edu</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Molecular Recognition, a section of the journal Frontiers in Molecular Biosciences</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>15</day>
<month>11</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>8</volume>
<elocation-id>734169</elocation-id>
<history>
<date date-type="received">
<day>30</day>
<month>06</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>27</day>
<month>10</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Alcantara, Stix, Huang, Connor, East, Jaramillo-Martinez, Stollar and Ball.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Alcantara, Stix, Huang, Connor, East, Jaramillo-Martinez, Stollar and Ball</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>Disordered proline-rich motifs are common across the proteomes of many species and are often involved in protein-protein interactions. Proline is a unique amino acid due to the covalent bond between the backbone nitrogen and the proline side chain. The resulting five-membered ring allows proline to sample the <italic>cis</italic> state about its peptide bond, which other residues cannot do as readily. Because proline-rich disordered sequences exist as ensembles that likely include structures with the proline peptide bond in <italic>cis</italic>, a robust methodology to accurately account for these conformations in the overall ensemble is crucial. Observing the <italic>cis</italic> conformations of proline in a disordered sequence is challenging both experimentally and computationally. Nitrogen-hydrogen NMR spectroscopy cannot directly observe proline residues, which lack an amide bond, and computational methods struggle to overcome the large kinetic barrier between the <italic>cis</italic> and <italic>trans</italic> states, since isomerization usually occurs on the order of seconds. In the current work, Gaussian accelerated molecular dynamics was used to overcome this free energy barrier and simulate proline isomerization in a tetrapeptide (KPTP) and in the 12-residue proline-rich SH3 binding peptide, ArkA. We found that Gaussian accelerated molecular dynamics, when combined with a lowered peptide bond dihedral angle potential energy barrier (15&#xa0;kcal/mol), allowed sufficient sampling of the proline <italic>cis</italic> and <italic>trans</italic> states on a microsecond timescale. All ArkA prolines spend a significant fraction of time in <italic>cis</italic>, leading to a more compact ensemble with less polyproline II helix structure than an ArkA ensemble with all peptide bonds in <italic>trans</italic>. The ensemble containing <italic>cis</italic> prolines also matches more closely to <italic>in&#x20;vitro</italic> circular dichroism data than the all-<italic>trans</italic> ensemble. The ability of the ArkA prolines to isomerize likely affects the peptide&#x2019;s ability to bind its partner SH3 domain, and should be studied further. This is the first molecular dynamics simulation study of proline isomerization in a biologically relevant proline-rich sequence that we know of, and a similar protocol could be applied to study multi-proline isomerization in other proline-containing proteins to improve conformational diversity and agreement with <italic>in&#x20;vitro</italic>&#x20;data.</p>
</abstract>
<kwd-group>
<kwd>proline</kwd>
<kwd>isomerization</kwd>
<kwd>molecular dynamics</kwd>
<kwd>intrinsically disordered proteins</kwd>
<kwd>circular dichroism</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Proline-rich disordered sequences are one of the common binding motifs for protein-protein interaction domains found in biology. Proline-rich regions are found widely in prokaryotes and eukaryotes and are present in twenty-five percent of human proteins (<xref ref-type="bibr" rid="B36">Williamson, 1994</xref>; <xref ref-type="bibr" rid="B13">Kaneko et&#x20;al., 2008</xref>). Despite their prevalence, much is unknown about the structural properties of disordered proline-rich sequences since they are challenging to study both experimentally and computationally. Although a proline-rich region will canonically adopt a polyproline II (PPII) helix when bound to its interaction partner, such as an SH3 domain, these sequences are also flexible and their structures can vary based on the identity of the other amino acids present (<xref ref-type="bibr" rid="B36">Williamson, 1994</xref>; <xref ref-type="bibr" rid="B13">Kaneko et&#x20;al., 2008</xref>).</p>
<p>One unique attribute of proline is its ability to isomerize around the peptide bond and sample a <italic>cis</italic> conformation. In the typical <italic>trans</italic> conformation of the peptide bond, the <italic>&#x3c9;</italic> dihedral angle is 180&#xb0;, while in the <italic>cis</italic> state, the <italic>&#x3c9;</italic> angle is 0. The <italic>cis</italic> conformation is energetically disfavored because in this conformation the <italic>&#x3b1;</italic>-carbon atom is positioned on the same side of the backbone as the <italic>&#x3b1;</italic>-carbon for the preceding residue, causing a steric interaction. However, in the case of proline, the unique ring structure means that the <italic>&#x3b4;</italic>-carbon atom of the proline side chain is bonded to the nitrogen in the backbone, causing an unfavorable steric interaction between the <italic>&#x3b4;</italic>-carbon and the <italic>&#x3b1;</italic>-carbon of the preceding residue in the <italic>trans</italic> conformation. Thus, the difference in energies between the <italic>cis</italic> and <italic>trans</italic> is reduced for proline when compared to other amino acids, making it thermodynamically possible to find proline residues in the <italic>cis</italic> conformation, while all other residues are very rarely found in <italic>cis</italic>. In the Protein Data Bank (PDB), 5&#x2013;7% of prolines are found in the <italic>cis</italic> state (<xref ref-type="bibr" rid="B17">MacArthur and Thornton, 1991</xref>). A proline residue in a disordered sequence might be expected to sample the <italic>cis</italic> state even more frequently, perhaps in 10% of its equilibrium ensemble, although the identity of the residue preceding the proline will also affect the <italic>&#x3c9;</italic> dihedral angle (<xref ref-type="bibr" rid="B36">Williamson, 1994</xref>; <xref ref-type="bibr" rid="B5">Doose et&#x20;al., 2007</xref>). For proline-rich sequences, the typical secondary structure is a left-handed PPII helix, which requires that all peptide bonds in the helix are in the <italic>trans</italic> state. When sequential prolines are in <italic>cis</italic>, a right-handed polyproline I (PPI) helix can form instead (<xref ref-type="bibr" rid="B35">Wedemeyer et&#x20;al., 2002</xref>; <xref ref-type="bibr" rid="B25">Moradi et&#x20;al., 2009</xref>; William J). If just a single peptide bond in a disordered sequence is in the <italic>cis</italic> conformation, this results in a kink at that point in the peptide&#x20;chain.</p>
<p>Although the <italic>cis</italic> and <italic>trans</italic> states for proline are thermodynamically similar, there is a high free energy barrier of 10&#x2013;20&#xa0;kcal/mol that must be overcome to switch between the two states (<xref ref-type="bibr" rid="B35">Wedemeyer et&#x20;al., 2002</xref>; <xref ref-type="bibr" rid="B25">Moradi et&#x20;al., 2009</xref>; William J). This is primarily due to the partial double bond character of the peptide bond which must be broken to allow rotation around the <italic>&#x3c9;</italic> dihedral angle. Peptidylprolyl isomerases often catalyze this transition <italic>in vivo</italic> when it is required for folding or protein function (William J <xref ref-type="bibr" rid="B35">Wedemeyer et&#x20;al., 2002</xref>). The uncatalyzed isomerization transition between <italic>cis</italic> and <italic>trans</italic> therefore exhibits slow kinetics, taking place on a time scale of seconds to minutes (<xref ref-type="bibr" rid="B35">Wedemeyer et&#x20;al., 2002</xref>; <xref ref-type="bibr" rid="B25">Moradi et&#x20;al., 2009</xref>; William J). For proteins with a stable folded structure, the timescale of the isomerization of a proline residue in the <italic>cis</italic> state can determine the folding rate of the protein (William J <xref ref-type="bibr" rid="B35">Wedemeyer et&#x20;al., 2002</xref>). In some cases, proline isomerization is also important for protein function. For example, the mechanosensing ability of filamin, transcription regulation of the histone H3, and the binding affinity of NCBD to ACTR are all affected by isomerization of a key proline residue (<xref ref-type="bibr" rid="B28">Nelson et&#x20;al., 2006</xref>; <xref ref-type="bibr" rid="B30">Rognoni et&#x20;al., 2014</xref>; <xref ref-type="bibr" rid="B39">Zosel et&#x20;al., 2018</xref>). In the case of proline-rich intrinsically disordered proteins (IDPs), the role of proline isomerization in function has been less well studied. Although many proline-rich disordered sequences have been shown to adopt a conformation with all prolines in <italic>trans</italic> when bound to their interaction partner, these peptides may also interact with their partners when one or multiple proline residues are in <italic>cis</italic>, and how these alternate isomeric states affect binding is largely unknown.</p>
<p>A number of factors contribute to the difficulty of studying the isomerization state of proline in proline-rich disordered proteins. IDP structure is often studied using NMR spectroscopy; however common 2-D protein-NMR experiments using <sup>1</sup>H and <sup>15</sup>N are not able to report on proline residues directly because proline lacks an N-H bond that is present in all other amino acids. NOESY experiments and 1-D hydrogen NMR experiments have been performed on polyproline peptides, but it is difficult to specifically identify each residue in such experiments (<xref ref-type="bibr" rid="B14">Kelly et&#x20;al., 2001</xref>; <xref ref-type="bibr" rid="B2">Best et&#x20;al., 2007</xref>). Circular dichroism (CD) reports in an average way on the secondary structure of a protein and the CD ellipticity measurement at 228&#xa0;nm has been found to be related to the amount of PPII helix present in a peptide (<xref ref-type="bibr" rid="B14">Kelly et&#x20;al., 2001</xref>). However, CD cannot directly report on the <italic>cis</italic> conformations for specific proline residues in a protein&#x2019;s structural ensemble. FRET and PET can also reveal some information about proline-rich peptide conformations, but only indirectly. Studies using NMR, CD, PET, and FRET have found that polyproline peptide chains sample a significant amount of <italic>cis</italic> conformations in their structural ensembles, which disrupts the canonical PPII structure of these peptides (<xref ref-type="bibr" rid="B14">Kelly et&#x20;al., 2001</xref>; <xref ref-type="bibr" rid="B2">Best et&#x20;al., 2007</xref>; <xref ref-type="bibr" rid="B5">Doose et&#x20;al., 2007</xref>), but little is known experimentally about the <italic>cis</italic> conformational states of more biologically relevant proline-rich disordered sequences.</p>
<p>Computational methods have also encountered challenges in studying the <italic>cis</italic>-<italic>trans</italic> isomerization of the peptide bond in proline-rich proteins. Because of the high energetic barrier between the <italic>cis</italic> and <italic>trans</italic> states, traditional molecular dynamics (MD) simulations cannot capture the isomerization transition, which takes place on a timescale much longer than the longest MD simulations of proteins. Therefore, all traditional MD simulations of IDPs containing proline have kept the isomerization state of the peptide bond constant throughout the simulation, either producing an ensemble where all the prolines are in <italic>trans</italic> or independently simulating a separate ensemble in <italic>cis</italic> and afterward combining the two ensembles with some weight based on the expected time in <italic>cis</italic> (<xref ref-type="bibr" rid="B37">Yedvabny et&#x20;al., 2014</xref>; <xref ref-type="bibr" rid="B38">Zhan and Ytreberg, 2015</xref>; <xref ref-type="bibr" rid="B39">Zosel et&#x20;al., 2018</xref>). This method could be fairly effective for a protein containing only one proline residue, although it assumes the probability of this proline being in <italic>cis</italic> is known <italic>a priori</italic>; however, this method is insufficient for many disordered protein interaction sequences that contain multiple proline residues.</p>
<p>In order to account for both <italic>cis</italic> and <italic>trans</italic> isomers of proline in an MD simulation, an advanced sampling method is required to overcome the high energetic barrier between the two states. Researchers have used accelerated MD to overcome the isomerization barrier for very short peptides containing just one proline (<xref ref-type="bibr" rid="B9">Hamelberg et&#x20;al., 2005</xref>; <xref ref-type="bibr" rid="B6">Doshi and Hamelberg, 2009</xref>). These studies were important for showing that boosting the potential of low energy states could be an effective way to accelerate the transition between <italic>cis</italic> and <italic>trans</italic> isomers, but did not apply the method to a longer biologically relevant sequence containing multiple proline residues. Other studies have focused on polyproline, a homogenous sequence containing only proline residues, which is not usually the binding motif for protein-protein interaction domains. Using either Monte Carlo sampling or adaptively biased MD and implicit solvent, these studies found that a significant amount of the resulting ensemble contains prolines in <italic>cis</italic> despite the canonical assumption that polyproline sequences adopt a PPII structure with all prolines in <italic>trans</italic> (<xref ref-type="bibr" rid="B33">Vila et&#x20;al., 2004</xref>; <xref ref-type="bibr" rid="B25">Moradi et&#x20;al., 2009</xref>; <xref ref-type="bibr" rid="B29">Radhakrishnan et&#x20;al., 2012</xref>). One Monte Carlo simulation study of a disordered protein ensemble in implicit solvent included <italic>cis</italic> conformations for all proline residues (<xref ref-type="bibr" rid="B20">Martin et&#x20;al., 2016</xref>). Recently, another study has used MD to simulate isomerization in a protein containing multiple proline residues, but for an antibody containing two prolines in a loop region, not a proline-rich disordered sequence (<xref ref-type="bibr" rid="B21">Masiero et&#x20;al., 2020</xref>). These results suggest that important proline-rich signaling peptides also likely sample ensembles containing <italic>cis</italic> conformers, and accounting for these structures in MD simulations could be important for understanding proline-rich IDP function.</p>
<p>We focus on the proline-rich peptide ArkA, a disordered region of the yeast actin patch kinase Ark1p. This proline-rich IDP helps regulate the actin cytoskeleton assembly by binding to an SH3 domain of Actin Binding Protein 1 (Abp1p). The key binding region of Ark1p is 12 residues in length (K<sub>(3)</sub>P<sub>(2)</sub>T<sub>(1)</sub>P<sub>(0)</sub>P<sub>(-1)</sub>P<sub>(-2)</sub>K<sub>(-3)</sub>P<sub>(-4)</sub>S<sub>(-5)</sub>H<sub>(-6)</sub>L<sub>(-7)</sub>K<sub>(-8)</sub>) and contains five prolines. In an NMR structure of ArkA bound to the SH3 domain, all five prolines are in the <italic>trans</italic> conformation and the N-terminal proline-rich region of the peptide adopts a PPII helix (<xref ref-type="bibr" rid="B32">Stollar et&#x20;al., 2009</xref>). However, when this sequence is not bound to the SH3 domain, the proline residues may indeed sample <italic>cis</italic> conformations in equilibrium with the all-<italic>trans</italic> state. Recently we have published a paper on the binding of ArkA to the Abp1p SH3 domain, but in those simulations, the large energy barrier for isomerization of the peptide bond prevented us from sampling any conformations containing <italic>cis</italic> prolines (<xref ref-type="bibr" rid="B7">Gerlach et&#x20;al., 2020</xref>). Here, we attempt to capture the complete conformational ensemble of unbound ArkA by using Gaussian accelerated MD (GaMD) to overcome this barrier and simulate numerous transitions between the <italic>cis</italic> and <italic>trans</italic> states for each proline (<xref ref-type="bibr" rid="B22">Miao et&#x20;al., 2015</xref>). We find that in order to achieve adequate sampling of the transition we must first lower the potential barrier between <italic>cis</italic> and <italic>trans</italic> for the peptide bond before running GaMD simulations. The resulting ArkA ensemble contains a significant population of structures with one or more proline residues in <italic>cis</italic>, which breaks up the PPII helix structure. This GaMD generated ensemble is also more consistent with <italic>in&#x20;vitro</italic> CD data than an ensemble containing only the <italic>trans</italic> isomer. In future studies of proline-rich disordered sequences like ArkA, it is important to include <italic>cis</italic> proline conformational states in the unbound ensemble and consider what role these conformations have on binding to partner proteins and biological function.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and Methods</title>
<sec id="s2-1">
<title>Structures and Simulations Overview</title>
<p>GaMD simulations (<xref ref-type="bibr" rid="B22">Miao et&#x20;al., 2015</xref>) were conducted on three different peptide systems: a KPTP tetrapeptide with the Amber ff14SB force field (default barrier), KPTP with a modified Amber ff14SB force field that has a lowered potential barrier between the <italic>cis</italic> and <italic>trans</italic> conformers of the peptide bond in the dihedral angle potential energy term (lowered barrier), and a 12-residue segment of ArkA with the lowered barrier (<xref ref-type="fig" rid="sch1">Scheme 1</xref>). The residue numbering for ArkA is based on the standard system (<xref ref-type="bibr" rid="B15">Lim et&#x20;al., 1994</xref>). The ArkA peptide simulations were run with a fully extended peptide starting structure, or with a starting structure from the NMR structure of ArkA bound to the Abp1p SH3 domain (<xref ref-type="bibr" rid="B32">Stollar et&#x20;al., 2009</xref>) (PDB: 2RPN). Both starting structures were capped with an acetyl group at the N-terminus and amino group at the C-terminus to neutralize terminal charges. The KPTP simulations were conducted before the ArkA simulations to determine which potential energy function would be used for the peptide bond dihedral in the ArkA simulations to increase the likelihood of capturing proline isomerization. Overall simulation lengths and relevant properties are listed in <xref ref-type="table" rid="T1">Table&#x20;1</xref>.</p>
<fig id="sch1">
<label>SCHEME 1</label>
<caption>
<p>ArkA peptide sequence and numbering.</p>
</caption>
<graphic xlink:href="fmolb-08-734169-g008.tif"/>
</fig>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Summary of simulations run. Barrier height denotes the parameter <italic>V</italic>
<sub>
<italic>2</italic>
</sub> used in the peptide bond dihedral function that determines the potential energy barrier between the <italic>cis</italic> and <italic>trans</italic> states.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Peptide system</th>
<th align="center">Starting structure</th>
<th align="left">Number of independent simulations</th>
<th align="left">Simulation time (&#x3bc;s)</th>
<th align="center">Barrier height <italic>V</italic>
<sub>2</sub> (kcal/mol)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">KPTP</td>
<td align="left">Extended</td>
<td align="char" char=".">4</td>
<td align="char" char=".">0.5</td>
<td align="left">20 (default)</td>
</tr>
<tr>
<td align="left">KPTP</td>
<td align="left">Extended</td>
<td align="char" char=".">4</td>
<td align="char" char=".">0.5</td>
<td align="left">15 (lowered)</td>
</tr>
<tr>
<td align="left">ArkA</td>
<td align="left">Extended</td>
<td align="char" char=".">4</td>
<td align="char" char=".">1</td>
<td align="left">15 (lowered)</td>
</tr>
<tr>
<td align="left">ArkA</td>
<td align="left">NMR</td>
<td align="char" char=".">4</td>
<td align="char" char=".">1</td>
<td align="left">15 (lowered)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-2">
<title>Gaussian Accelerated MD Simulations</title>
<p>Simulations were conducted on a Linux cluster using Amber 18 pmemd CUDA for GPU functionality (<xref ref-type="bibr" rid="B4">Case et&#x20;al., 2018</xref>). GaMD was used for enhanced sampling and to overcome the barrier between the <italic>cis</italic> and <italic>trans</italic> states of the <italic>&#x3c9;</italic> angle (<xref ref-type="bibr" rid="B22">Miao et&#x20;al., 2015</xref>). The Amber <italic>LEaP</italic> module was used to create topology and initial coordinate files for all simulated starting structures. The simulations were run with the Amber ff14SB force field (<xref ref-type="bibr" rid="B18">Maier et&#x20;al., 2015</xref>). For the lowered barrier simulations, a modification was made to the <italic>V</italic>
<sub>
<italic>2</italic>
</sub> constant in the peptide bond dihedral potential function. This adjustment was made similarly to the way that Doshi and Hamelberg increased the <italic>V</italic>
<sub>
<italic>2</italic>
</sub> constant to increase the barrier between the <italic>cis</italic> and <italic>trans</italic> states of proline (<xref ref-type="bibr" rid="B6">Doshi and Hamelberg, 2009</xref>). The <italic>V</italic>
<sub>
<italic>2</italic>
</sub> constant in these force fields is solely responsible for determining the energy barrier height for peptide bond (<italic>&#x3c9;</italic> angle) isomerization and does not affect other dihedral angles or any other conformational sampling coordinate. <italic>V</italic>
<sub>
<italic>2</italic>
</sub> was decreased from the default of 20&#xa0;kcal/mol to 15&#xa0;kcal/mol. Simulations were solvated with the TIP3P-FB water model (<xref ref-type="bibr" rid="B34">Wang et&#x20;al., 2014</xref>), such that the edge of the box was at least 15&#xa0;&#xc5; from any atom in the peptide. Chloride ions were added to neutralize system (1 for KPTP and 3 for ArkA).</p>
<p>Systems were minimized in two rounds with the first one placing a harmonic potential restraint on the peptide of 10&#xa0;kcal/mol/&#xc5;<sup>2</sup>. Systems were then heated from 100 to 300&#xa0;K using a 2 fs integration time step for 20,000 steps (40&#xa0;ps). Equilibration was also performed in two rounds: the first with a 10&#xa0;kcal/mol/&#xc5;<sup>2</sup> restraint and pressure and density convergence to 1.013&#xa0;bar with the Berendsen barostat for 50&#xa0;ps; the second was run without the restraint for 500 ps and the barostat was switched to a Monte Carlo barostat to maintain isobaric conditions. A final equilibration was performed for 52 ns to calculate the GaMD potentials that dictate how the potential energy landscape will be boosted for enhanced conformational sampling during the production stage. Boosts were calculated and applied with respect to the total potential of the system (igamd &#x3d; 3) with an energy threshold boost biasing value equal to the upper bound of the potential energy (iE &#x3d; 2). A 9&#xa0;kcal/mol upper limit was set for the total potential acceleration standard deviation for the purposes of reweighting the produced ensembles (<italic>&#x3c3;</italic>
<sub>0P</sub>). Productions were conducted under isobaric conditions with a 2 fs time step and a 0.1 ps frame output in order to ensure the ensembles could be appropriately reweighted once completed. Independent simulations were started with new random velocities. Bonds to hydrogen were constrained using the SHAKE algorithm during all simulations. The particle-mesh Ewald procedure was used to handle long-range electrostatic interactions with a non-bonded cutoff of 9&#xa0;&#xc5; for the direct space&#x20;sum.</p>
<p>The NMR structure GaMD equilibration step experienced a random jump in potential energy (<xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>), slightly increasing the applied GaMD energy boost relative to the extended structure simulations (a known problem with the Amber 18 implementation of GaMD). Because of this, the extended structure production steps were rerun with the boost potentials calculated in the NMR GaMD equilibration to maintain consistency between the two sets of simulations and improve sampling of the peptide bond isomerization. The random boost was not substantial enough to create a physically irrelevant potential energy landscape, so any abnormally high energy structure sampled was adjusted for during the reweighting process.</p>
</sec>
<sec id="s2-3">
<title>Trajectory Analysis</title>
<p>The <italic>cpptraj</italic> module in AmberTools 18 was used for all trajectory analysis (<xref ref-type="bibr" rid="B4">Case et&#x20;al., 2018</xref>). In house <italic>Python</italic> scripts were used for all data processing and plots. Visual Molecular Dynamics was used to create structural figures (<xref ref-type="bibr" rid="B10">Humphrey et&#x20;al., 1996</xref>).</p>
<p>Simulations were reweighted to create a potential of mean force (PMF) for the proline <italic>&#x3c9;</italic> dihedral angles using both the cumulant expansion on the second order and Maclaurin series expansion to the 10th order provided by the PyReweighting tool kit (<xref ref-type="bibr" rid="B23">Miao et&#x20;al., 2014</xref>). The cumulant expansion on the second order more accurately reproduced the free energy barriers to isomerization, but the Maclaurin expansion method allowed us to reweight independent of the reaction coordinate. Therefore, all other equilibrium data presented from the GaMD simulations have been reweighted using the Maclaurin expansion series to the 10th order. The PyReweighting tool kit was also used to calculate the anharmonicity along the proline <italic>&#x3c9;</italic> dihedral for the GaMD simulations. An average <italic>&#x3c9;</italic> angle anharmonicity value &#x3c; 10<sup>&#x2212;3</sup>&#xa0;kcal/mol was used as the cutoff to determine whether adequate sampling had been reached (<xref ref-type="bibr" rid="B22">Miao et&#x20;al., 2015</xref>).</p>
<p>Histograms for the <italic>&#x3c9;</italic> angles in the KPTP peptide simulations were created to visualize the distributions of angles sampled during the simulations to define ranges for <italic>cis</italic> and <italic>trans</italic> values for the subsequent ArkA analysis. Two <italic>&#x3c9;</italic> angle states close to the canonical 0&#xb0; and 180&#xb0; <italic>cis</italic> and <italic>trans</italic> values were observed in the KPTP system (<xref ref-type="sec" rid="s10">Supplementary Figure S2</xref>). Based on the distribution of the <italic>&#x3c9;</italic> angles in KPTP, the ranges <italic>&#x3c9;</italic> &#x3d; -90&#xb0; to &#x2b;50&#xb0; and <italic>&#x3c9;</italic> &#x3d; &#x2b;100&#xb0; to &#x2b;240&#xb0; were used to define the <italic>cis</italic> and <italic>trans</italic> states, respectively. Percentages of time each proline spent either in <italic>cis</italic> or <italic>trans</italic> as well as the flipping frequencies between them were calculated for KPTP and ArkA simulations.</p>
<p>Proline correlation analysis was performed to compare the likelihood of multiple prolines being physically dependent on one another to isomerize to either cis or trans. All possible combinations of prolines were analyzed for correlation in the KPTP and ArkA simulations. Expected percentages of time each group of prolines sampled <italic>cis</italic> together were calculated by assuming such events were independent of each other and thus equal to the product of their individual <italic>cis</italic> sampling times. These expected values were then compared to the actual percentages of time the proline combinations sampled <italic>cis</italic> simultaneously within the simulations.</p>
<p>Bend, turn, and 3<sub>10</sub> helix secondary structures for ArkA were calculated using the DSSP algorithm in <italic>cpptraj</italic>. PPI and PPII helix structures were calculated based on canonical values of <italic>&#x3d5;</italic>, <italic>&#x3c8;</italic>, and <italic>&#x3c9;</italic> angle ranges as shown in <xref ref-type="table" rid="T2">Table&#x20;2</xref> (<xref ref-type="bibr" rid="B19">Mansiaux et&#x20;al., 2011</xref>; <xref ref-type="bibr" rid="B29">Radhakrishnan et&#x20;al., 2012</xref>). Running averages of bend, turn, 3<sub>10</sub> helix, and end-to-end distance were used to test structure convergence between extended and NMR starting structure systems of ArkA. Every 10th frame (every 1&#xa0;ps) of the ArkA trajectories was used for testing convergence. Proline ring pucker states were calculated using the <italic>&#x3c7;2</italic> dihedral angle (C<italic>&#x3b1;</italic>&#x2014;C<italic>&#x3b2;</italic>&#x2014;C<italic>&#x3b3;</italic>&#x2014;C<italic>&#x3b4;</italic>) and defined by canonical ranges shown in <xref ref-type="table" rid="T3">Table&#x20;3</xref> (<xref ref-type="bibr" rid="B29">Radhakrishnan et&#x20;al., 2012</xref>).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Canonical backbone dihedral angles that define PPI and PPII. Note that the <italic>&#x3c9;</italic> angle range for PPII could also be written as &#x2b;100&#xb0; to &#x2b;240&#xb0;.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Dihedral angle</th>
<th align="center">PPI (&#xb0;)</th>
<th align="center">PPII (&#xb0;)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<italic>&#x3d5;</italic>
</td>
<td align="center">&#x2212;104.6 to &#x2212;46.6</td>
<td align="center">&#x2212;104.6 to &#x2212;46.6</td>
</tr>
<tr>
<td align="left">
<italic>&#x3c8;</italic>
</td>
<td align="center">&#x2b;131 to &#x2b;189</td>
<td align="center">&#x2b;107.9 to &#x2b;165.9</td>
</tr>
<tr>
<td align="left">
<italic>&#x3c9;</italic>
</td>
<td align="center">&#x2212;90 to &#x2b;50</td>
<td align="center">&#x2b;100 to &#x2b;180 or &#x2212;180 to &#x2212;120</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Proline side chain dihedral angle <italic>&#x3c7;</italic>
<sub>2</sub> defines the canonical proline ring pucker states.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Pucker type</th>
<th align="center">
<italic>&#x3c7;</italic>
<sub>2</sub> (&#xb0;)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Up pucker</td>
<td align="center">&#x3e; &#x2b;10</td>
</tr>
<tr>
<td align="left">Down pucker</td>
<td align="center">&#x3c; &#x2212;10</td>
</tr>
<tr>
<td align="left">Planar</td>
<td align="center">&#x2212;10 to &#x2b;10</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-4">
<title>Replica Exchange Data</title>
<p>Replica Exchange MD (REMD) ArkA simulations previously run as a starting point for binding simulations were used to construct a comparison structural ensemble that contains only <italic>trans</italic> proline conformations (<xref ref-type="bibr" rid="B7">Gerlach et&#x20;al., 2020</xref>). REMD simulation lengths and structures are listed in <xref ref-type="table" rid="T4">Table&#x20;4</xref>. Structures were saved every 25&#xa0;ps and the first 50&#xa0;ns of each simulation was not used for analysis. Data from NMR and extended structure starting systems were combined and analyzed together.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>12-residue ArkA REMD simulation data adapted from <xref ref-type="bibr" rid="B7">Gerlach et&#x20;al.,&#x20;2020</xref>.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Peptide system</th>
<th align="center">Starting structure</th>
<th align="center">Number of independent simulations</th>
<th align="center">Total simulation time amongst independent simulations (&#x3bc;s)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">ArkA</td>
<td align="left">NMR</td>
<td align="char" char=".">3</td>
<td align="center">&#x223c;0.5</td>
</tr>
<tr>
<td align="left">ArkA</td>
<td align="left">Extended</td>
<td align="char" char=".">3</td>
<td align="center">&#x223c;0.5</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2-5">
<title>Computational Calculation of Circular Dichroism Spectra</title>
<p>CD can be used to differentiate between different secondary structure compositions of a protein. The SESCA CD program (v0.95) was used to calculate theoretical CD spectra for the ArkA simulation ensembles (<xref ref-type="bibr" rid="B26">Nagy et&#x20;al., 2019</xref>). The DS5-4SC1 basis-set was chosen to calculate the theoretical spectra for ArkA, primarily because of its Turn 1 secondary structure definition containing a PPII component, which allows the calculated spectrum to differentiate between ensembles with different amounts of PPII content.</p>
<p>An ensemble containing every 100th frame (10&#xa0;ps) of trajectory data from the ArkA GaMD simulations was used for the SESCA CD spectra calculations. The REMD simulations were also analyzed with SESCA. Percentages of basis-set secondary structure compositions were also calculated. A scaling factor for the <italic>in&#x20;vitro</italic> ArkA CD spectrum was calculated from the REMD ensemble to correct for measurement cell concentration errors that may have influenced the measured CD spectrum. However, to compare all theoretical spectra to the experimental one, and to avoid modifying the experimentally measured values, the inverse of this scaling factor (0.510) was applied to all theoretically calculated CD spectra.</p>
<p>PPII content percentages were also extracted from the CD data using an approximation that calculates the composition of PPII in a peptide,<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:mtext>%&#xa0;PPII</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>6100</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>13700</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:math>
<label>[1]</label>
</disp-formula>based on the CD ellipticity (10<sup>3</sup> deg cm2/dmol) at the 228&#xa0;nm wavelength (<inline-formula id="inf1">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) (<xref ref-type="bibr" rid="B14">Kelly et&#x20;al., 2001</xref>). Average PPII percentages from the <italic>in&#x20;vitro</italic> and calculated CD spectra were compared to average PPII percentages calculated directly from the simulation trajectory&#x20;data.</p>
</sec>
<sec id="s2-6">
<title>Circular Dichroism Spectroscopy of ArkA Peptide</title>
<p>The 12-residue ArkA peptide (KPTPPPKPSHLK) was synthesized by Peptide 2.0 with acetylated N-terminus and amidated C-terminus and purified to &#x3e;95% purity. The peptide was resuspended and dialyzed against 5&#xa0;mM Sodium Phosphate buffer, pH 7.0. CD spectra were collected using a Chirascan&#x2122;-plus CD spectrometer with a 250&#xa0;&#xb5;l 70&#xa0;&#xb5;M sample at a pathlength of 1&#xa0;mm. Measurements were obtained at 2&#xa0;s/point for wavelengths between 180&#x2013;260&#xa0;nm with a step size of 1&#xa0;nm. All spectra were blank corrected with buffer only (5&#xa0;mM Sodium Phosphate buffer, pH 7.0). Data were collected in millidegrees and were converted to degrees and mean residue ellipticity, <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, according to<disp-formula id="e2">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>100</mml:mn>
<mml:mo>&#x2217;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo>&#x002C;</mml:mo>
</mml:math>
<label>[2]</label>
</disp-formula>where the mean residue molar concentration, <inline-formula id="inf3">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, is calculated based on the molar concentration, <italic>c</italic>,<disp-formula id="e3">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>&#x002E;</mml:mo>
</mml:math>
<label>[3]</label>
</disp-formula>
</p>
<p>The number of residues, <italic>n</italic>, equals 12 in the case of&#x20;ArkA.</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s3">
<title>Results and Discussion</title>
<p>In order to understand the role that peptide bond isomerization has in a proline-rich IDP&#x2019;s structural ensemble, a method that can overcome the large energy barrier between the <italic>cis</italic> and <italic>trans</italic> states to adequately sample isomerization must be implemented. We chose to use the GaMD method because it does not require defining a reaction coordinate, can be implemented in explicit solvent, and allows for accurate reweighting of the resulting conformations to recover the unbiased ensemble (<xref ref-type="bibr" rid="B22">Miao et&#x20;al., 2015</xref>). To determine whether GaMD is capable of sampling proline isomerization in the SH3 binding peptide ArkA, the short tetramer peptide KPTP (consisting of the first four residues of ArkA) was simulated as a test before simulating the longer ArkA (12-residues).</p>
<sec id="s3-1">
<title>Proline Isomerization of Tetramer KPTP Using Gaussian Accelerated MD</title>
<p>Initially, GaMD simulations were run on KPTP using the Amber ff14SB force field (<xref ref-type="bibr" rid="B18">Maier et&#x20;al., 2015</xref>), which contains a peptide bond torsional potential term with a 20&#xa0;kcal/mol barrier between the <italic>cis</italic> and <italic>trans</italic> minima. This potential barrier has been shown to be too low to reproduce the experimentally observed free energy barrier between the <italic>cis</italic> and <italic>trans</italic> states (<xref ref-type="bibr" rid="B6">Doshi and Hamelberg, 2009</xref>). However, it is still a large barrier to overcome in MD simulations, and even using the GaMD method we only saw isomerization occur on a time scale of around once every 8&#xa0;ns(<xref ref-type="fig" rid="F1">Figure&#x20;1A</xref>). Therefore, to increase sampling and because we were much more interested in the conformations in the <italic>cis</italic> and <italic>trans</italic> energy minima than in the energies during the transition, we implemented a lowered potential barrier for peptide bond isomerization of 15&#xa0;kcal/mol (see GaMD simulations in methods). GaMD simulations of KPTP with the lowered barrier resulted in an increase of proline isomerization frequency of more than an order of magnitude (<xref ref-type="fig" rid="F1">Figure&#x20;1B</xref>). Both the default barrier and lowered barrier simulations showed that the proline peptide bond <italic>&#x3c9;</italic> dihedral angle can clearly occupy two distinct states that are similar to the canonical <italic>cis</italic> and <italic>trans</italic> values (<xref ref-type="sec" rid="s10">Supplementary Figure&#x20;S2</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Time course of the <italic>&#x3c9;</italic> angle for the first proline in KPTP from 100 ns of a single GaMD simulation using the default <bold>(A)</bold>, and lowered barrier force fields <bold>(B)</bold>. The <italic>&#x3c9;</italic> angle is around 180&#xb0; when in the <italic>trans</italic> state and around 0&#xb0; when in the <italic>cis</italic> state. The isomerization frequency represents the average over all independent simulations.</p>
</caption>
<graphic xlink:href="fmolb-08-734169-g001.tif"/>
</fig>
<p>Cumulant expansion on the second order was used to reweight the GaMD ensembles and construct a PMF along the <italic>&#x3c9;</italic> angle reaction coordinate (<xref ref-type="fig" rid="F2">Figure&#x20;2A</xref>). Very high free energy values at the barriers between the minima for the default barrier simulations indicate that sampling of the transition is insufficient to accurately assess the barrier height. However, with the increased isomerization in the lowered barrier simulations, the barrier height falls closer to where we expect, between 10 and 15&#xa0;kcal/mol, and the <italic>cis</italic> and <italic>trans</italic> minima for both sets of simulations remained relatively equivalent in energy. Average anharmonicity (Anharm<sub>Def</sub> &#x3d; &#x2212;0.0013<sub>,</sub> Anharm<sub>Low</sub> &#x3d; &#x2212;0.0014) values for both systems were below the cutoff of 10<sup>&#x2212;3</sup>, indicating that accurate reweighting of the boosted ensemble is possible using cumulant expansion on the second order (<xref ref-type="sec" rid="s10">Supplementary Figure S3</xref>). We also used Maclaurin series expansion to the 10th order for reweighting, which results in a smoother PMF plot, but tends to underestimate free energy barrier heights and can also result in slight shifts in the location of minima (<xref ref-type="fig" rid="F2">Figure&#x20;2B</xref>). While the free energy barriers in this PMF are underestimated, the depth and location of the <italic>cis</italic> and <italic>trans</italic> minima are similar to the PMF found using cumulant expansion on the second order. Maclaurin series expansion reweighting also has the advantage that the resulting weights are reaction coordinate independent and can be applied to all further analyses of the ensemble. Therefore, we used the Maclaurin series expansion reweighting for all subsequent analyses of both the KPTP and ArkA conformational ensembles. Although the isomerization rates were different for the default and lowered barrier simulations, the percent of the ensemble in <italic>cis</italic> for each proline (after reweighting) was similar in both sets of simulations (first proline: &#x223c;14% for both, second proline: &#x223c;10% for both) (<xref ref-type="sec" rid="s10">Supplementary Table&#x20;S1</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>PMF free energy landscapes over the <italic>&#x3c9;</italic> dihedral angle for GaMD default and lowered barrier KPTP simulations for the first proline reweighted using <bold>(A)</bold> cumulant expansion on the second order and <bold>(B)</bold> Maclaurin series expansion.</p>
</caption>
<graphic xlink:href="fmolb-08-734169-g002.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>Proline Isomerization of the Proline-Rich Intrinsically Disordered Protein ArkA</title>
<p>To simulate proline isomerization for the 12-residue ArkA peptide, we chose to use the lowered potential energy barrier to represent the peptide bond dihedral angle since it allowed more frequent isomerization with GaMD and therefore improved sampling of the isomer states. Simulations were conducted using two different starting structures, an extended structure and an NMR structure taken from the AbpSH3 bound state (PDB: 2RPN) (<xref ref-type="bibr" rid="B32">Stollar et&#x20;al., 2009</xref>), to assess sampling by comparing the resulting ensembles. Both starting structures contained all peptide bonds in the <italic>trans</italic> state. Recently published work using the same extended and NMR starting structures, but using the REMD enhanced sampling method, did not capture any <italic>cis</italic> sampling (<xref ref-type="bibr" rid="B7">Gerlach et&#x20;al., 2020</xref>), which is consistent with other studies that show REMD is not an effective method for observing proline isomerization (<xref ref-type="bibr" rid="B37">Yedvabny et&#x20;al., 2014</xref>; <xref ref-type="bibr" rid="B27">Neale et&#x20;al., 2016</xref>; <xref ref-type="bibr" rid="B39">Zosel et&#x20;al., 2018</xref>).</p>
<p>In the GaMD simulations, all five ArkA prolines isomerized with a high frequency, sampling both the <italic>cis</italic> and <italic>trans</italic> state (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>). Turn and 3<sub>10</sub> helix secondary structure, as well as end-to-end distance, showed that the two sets of simulations started to converge over time, though additional simulation time would be needed for complete sampling (<xref ref-type="sec" rid="s10">Supplementary Figure S4</xref>). Because the extended and NMR starting structure simulations were not completely converged, the extended structure simulations have slightly higher <italic>cis</italic> content for each proline (<xref ref-type="fig" rid="F3">Figure&#x20;3</xref>). However, the <italic>cis</italic> percentage differences were small enough that we combined the data from all ArkA simulations into one ensemble for further analysis. The PMF and anharmonicity values for both GaMD extended and NMR simulations show sampling was adequate for accurate reweighting of the ensemble (<xref ref-type="sec" rid="s10">Supplementary Figure S5</xref>). These results show that using GaMD with a lowered peptide bond dihedral angle potential energy barrier allows for adequate sampling of the proline <italic>cis</italic> state for all prolines in ArkA on the nanosecond to microsecond time scale and that this method would likely work for other biologically relevant proline-rich peptides. In contrast to a previous Monte Carlo simulation of the 81-residue disordered transcription factor Ash1 in implicit solvent (<xref ref-type="bibr" rid="B20">Martin et&#x20;al., 2016</xref>), several of the ArkA prolines are in <italic>cis</italic> in more than 10% of the ensemble. The higher <italic>trans</italic> occupancy for the Ash1 prolines might be due to the larger size of the disordered region, differences in adjacent residues to the prolines, or differences in the simulation method.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Average ArkA proline <italic>cis</italic> state percentages in the GaMD simulations, for the NMR <bold>(left bar, blue)</bold> and extended <bold>(right bar, red)</bold> starting structures. Error bars represent the standard deviation over all independent simulations for the given starting structure.</p>
</caption>
<graphic xlink:href="fmolb-08-734169-g003.tif"/>
</fig>
<p>In order to visualize the ArkA conformational ensemble, we plotted the probability density based on the number of prolines in <italic>cis</italic> and the peptide end-to-end distance (<xref ref-type="fig" rid="F4">Figure&#x20;4</xref>). Using these reaction coordinates, the most populated ArkA conformational state has an end-to-end distance of &#x223c;24&#xa0;&#xc5; with no <italic>cis</italic> prolines (<xref ref-type="fig" rid="F4">Figure&#x20;4</xref>). However, the ensemble also contains many conformations with multiple prolines in the <italic>cis</italic> conformation simultaneously. In fact, with five proline residues that can sample both <italic>cis</italic> and <italic>trans</italic> isomers, ArkA only spends 36% (&#xb1;7%) of the GaMD ensemble with all peptide bonds in trans. There is also an inverse relationship between the number of prolines in <italic>cis</italic> and how extended the peptide structure is, since the <italic>cis</italic> conformation introduces a kink into the peptide chain, as has been previously observed for polyproline peptides (<xref ref-type="bibr" rid="B25">Moradi et&#x20;al., 2009</xref>; <xref ref-type="bibr" rid="B29">Radhakrishnan et&#x20;al., 2012</xref>). These kinks and slight end-to-end shortening can be observed in the example peptide snapshots (<xref ref-type="fig" rid="F4">Figure&#x20;4</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Joint distribution showing the probability of conformations based on end-to-end distance and number of proline residues in <italic>cis</italic> for the combined ensemble from the GaMD extended and NMR structure simulations of ArkA. Example peptide snapshots are oriented such that the N-terminus is pointed leftward. The backbone is depicted in cyan, <italic>trans</italic> prolines in blue and <italic>cis</italic> prolines in red. Approximate end-to-end distances for the representative snapshots are included below&#x20;each.</p>
</caption>
<graphic xlink:href="fmolb-08-734169-g004.tif"/>
</fig>
<p>The peptide bond dihedral angle of proline affects the overall structure of the ArkA peptide. In the ensemble generated using GaMD, ArkA samples more bend and PPI helix structure than in the previously published REMD ensemble (<xref ref-type="bibr" rid="B7">Gerlach et&#x20;al., 2020</xref>), while spending less time in PPII and 3<sub>10</sub> helix (<xref ref-type="fig" rid="F5">Figure&#x20;5</xref>). PPI helix was not observed in the REMD systems because it requires a <italic>cis &#x3c9;</italic> angle (<xref ref-type="fig" rid="F5">Figure&#x20;5D</xref>), but is a stable structure for polyproline sequences when all the prolines are in <italic>cis</italic> and has been observed in simulations of polyproline (<xref ref-type="bibr" rid="B25">Moradi et&#x20;al., 2009</xref>; William J <xref ref-type="bibr" rid="B35">Wedemeyer et&#x20;al., 2002</xref>). The increase in the bend character of ArkA in the GaMD simulations compared to the REMD ensemble is also likely due to the kinks in the peptide structure caused by the presence of <italic>cis</italic> prolines. Looking at the dihedral angles of the proline side chain, we also see that the down-pucker state of proline is more prevalent than the up-pucker state in the GaMD ensemble (<xref ref-type="sec" rid="s10">Supplementary Table S3</xref>), consistent with previous studies that show <italic>cis</italic> isomers favor this pucker state while <italic>trans</italic> isomers have no preference (<xref ref-type="bibr" rid="B29">Radhakrishnan et&#x20;al., 2012</xref>). The overall PPII helix content of 29&#x20;&#xb1; 4% is lower than would be expected based on other studies that have quantified PPII propensity based on sequence (<xref ref-type="bibr" rid="B11">Jha et&#x20;al., 2005</xref>; <xref ref-type="bibr" rid="B1">Elam et&#x20;al., 2013</xref>); however, these studies typically do not account for the fact that proline residues in disordered regions will not always be in PPII (<italic>trans</italic>) conformation since they will sample some <italic>cis</italic> conformation which cannot be PPII. While our results indicate that PPII content may be lower than previously thought, further experimental studies are needed to determine precise levels of PPII helix content.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>ArkA secondary structure composition for <bold>(A)</bold> bend, <bold>(B)</bold> 3<sub>10</sub> helix, <bold>(C)</bold> PPII helix, and <bold>(D)</bold> PPI helix. The fraction of the ensemble in the structure is plotted by residue for both the GaMD ensemble (blue solid line), which contains <italic>cis</italic> proline conformations, and the REMD ensemble (red dashed line), which contains only <italic>trans</italic> prolines. The shaded errors represent the standard deviation over all independent simulations.</p>
</caption>
<graphic xlink:href="fmolb-08-734169-g005.tif"/>
</fig>
<p>Because ArkA contains multiple prolines that can isomerize, we also wanted to test whether there is any correlation between their isomeric states, such as two adjacent prolines being more likely to sample <italic>cis</italic> at the same time. To do this, we calculated the expected joint probabilities of each pair of prolines being in <italic>cis</italic> simultaneously assuming that every proline&#x2019;s isomeric state were independent of the other residues. These calculated probabilities were then compared to the observed simultaneous <italic>cis</italic> occurrences (<xref ref-type="fig" rid="F6">Figure&#x20;6</xref>), but no significant difference was found. We also looked for correlations among more than two prolines, but still no significant correlations were detected (<xref ref-type="sec" rid="s10">Supplementary Figure S7</xref>). This indicates that there is no strong cooperative effect based on proline ring stacking or other interactions that make it more favorable for consecutive prolines to adopt the same structure, and instead, the isomerization is purely stochastic. A similar result has been observed for polyproline sequences in Monte Carlo simulations (<xref ref-type="bibr" rid="B33">Vila et&#x20;al., 2004</xref>; <xref ref-type="bibr" rid="B29">Radhakrishnan et&#x20;al., 2012</xref>). It is possible that other proline-rich peptides might exhibit more correlations between the proline residues depending on the adjacent sequence. It has been shown in several studies that the identity of surrounding amino acids will affect the relative stability of the proline <italic>cis</italic> and <italic>trans</italic> isomers (<xref ref-type="bibr" rid="B17">MacArthur and Thornton, 1991</xref>; <xref ref-type="bibr" rid="B9">Hamelberg et&#x20;al., 2005</xref>; <xref ref-type="bibr" rid="B3">Brown and Zondlo, 2012</xref>). Additionally, more distal sequence effects could be present in the context of the full-length protein. Regardless, the combinatorial effect of uncorrelated proline isomerization suggests that biologically relevant proline-rich disordered sequences will, in general, contain a significant number of structures with <italic>cis</italic> isomers based on the combined independent probabilities of <italic>cis</italic> for each proline residue.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Pair-wise proline <italic>cis</italic> state correlations in the KPTP (solid bars) and in the ArkA GaMD (open bars) systems. Error bars represent the standard deviation between independent simulations. Calculated expected percentages (on the left in blue) are calculated based on the individual <italic>cis</italic> percentages for each proline assuming all proline pairs sample <italic>cis</italic> independently of each&#x20;other.</p>
</caption>
<graphic xlink:href="fmolb-08-734169-g006.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>Effect of <italic>Cis</italic> Conformations on ArkA Circular Dichroism Spectrum</title>
<p>CD was used to compare the ArkA secondary structures sampled in our simulated ensembles to those observed <italic>in&#x20;vitro</italic>. Although CD can be used to determine the amount of &#x3b1;-helix and &#x3b2;-sheet in a protein, it is difficult to calculate the exact amount of specific secondary structure content in an IDP ensemble based on CD data because there are multiple possible structural ensembles that would be consistent with the same CD spectrum, especially when more secondary structure types besides &#x3b1;-helix and &#x3b2;-sheet are considered (<xref ref-type="bibr" rid="B12">Johnson, 1999</xref>; <xref ref-type="bibr" rid="B31">Sreerama et&#x20;al., 2000</xref>; <xref ref-type="bibr" rid="B8">Greenfield, 2007</xref>; <xref ref-type="bibr" rid="B16">Louis-Jeune et&#x20;al., 2012</xref>; <xref ref-type="bibr" rid="B24">Micsonai et&#x20;al., 2015</xref>). Therefore, we chose to calculate theoretical CD spectra based on our simulated ensembles using the SESCA program (<xref ref-type="bibr" rid="B26">Nagy et&#x20;al., 2019</xref>). These theoretical spectra can then be compared to <italic>in&#x20;vitro</italic> data. The SESCA DS5-4SC1 basis set was chosen for the calculation because it includes a secondary structure category, Turn 1, that includes PPII helix turns, and therefore can be used to differentiate between structures with and without <italic>cis</italic> prolines.</p>
<p>We calculated theoretical CD spectra using both our ArkA GaMD ensemble and the all-<italic>trans</italic> REMD ensemble and compared both to <italic>in&#x20;vitro</italic> data on the peptide (<xref ref-type="fig" rid="F7">Figure&#x20;7</xref>). The calculated spectrum from the GaMD ensemble shows a closer resemblance to the <italic>in&#x20;vitro</italic> data in both the wavelength and ellipticity intensities compared to the REMD ensemble spectrum, suggesting that <italic>cis</italic> proline isomers are present in the <italic>in&#x20;vitro</italic> ArkA conformational ensemble. To understand why the spectrum calculated from our GaMD ensemble is closer than the REMD ensemble to the <italic>in&#x20;vitro</italic> CD spectrum, we examined the secondary structure percentages that were used to generate the calculated spectra. For both the GaMD and REMD ensembles, &#x201c;Turn 1&#x201d; is the most common secondary structure (<xref ref-type="table" rid="T5">Table&#x20;5</xref>). The average Turn 1 character for the REMD ensemble is much larger than the GaMD ensemble since it contains more PPII helix with all of the prolines in <italic>trans</italic>. Instead, the GaMD ensemble contains more &#x201c;Turn 2&#x201d; and &#x201c;Other&#x201d; structure, which may include PPI helix and other conformations with one or more prolines in&#x20;<italic>cis</italic>.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>ArkA <italic>in&#x20;vitro</italic> CD spectrum (black solid line with data points as circles) compared to the calculated theoretical CD spectrum based on the ArkA GaMD (solid blue line) and REMD (dashed red line) ensembles. The theoretical spectra were calculated using SESCA (<xref ref-type="bibr" rid="B26">Nagy et&#x20;al., 2019</xref>) and scaled by a factor of 0.51. The shaded region represents the standard deviation over all independent simulations for each system (the REMD shaded region is the same width as the line).</p>
</caption>
<graphic xlink:href="fmolb-08-734169-g007.tif"/>
</fig>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Average CD secondary structure compositions for GaMD and REMD systems as defined by the DS5-4SC1 basis-set.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">DS5-4SC1 basis set structures</th>
<th align="center">GaMD simulation percentage</th>
<th align="center">REMD simulation percentage</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Turn 1</td>
<td align="char" char="plusmn">45&#x20;&#xb1; 7</td>
<td align="char" char="plusmn">82.7&#x20;&#xb1; 0.3</td>
</tr>
<tr>
<td align="left">Turn 2</td>
<td align="char" char="plusmn">26.5&#x20;&#xb1; 0.8</td>
<td align="char" char="plusmn">12.4&#x20;&#xb1; 0.3</td>
</tr>
<tr>
<td align="left">Other</td>
<td align="char" char="plusmn">25&#x20;&#xb1; 6</td>
<td align="char" char="plusmn">4.7&#x20;&#xb1; 0.2</td>
</tr>
<tr>
<td align="left">Helix 1</td>
<td align="char" char="plusmn">1.3&#x20;&#xb1; 0.5</td>
<td align="char" char="plusmn">0.10&#x20;&#xb1; 0.08</td>
</tr>
<tr>
<td align="left">Beta 1</td>
<td align="char" char="plusmn">1.1&#x20;&#xb1; 0.3</td>
<td align="char" char="plusmn">0&#x20;&#xb1; 0</td>
</tr>
<tr>
<td align="left">Helix 2</td>
<td align="char" char="plusmn">0.60&#x20;&#xb1; 0.04</td>
<td align="char" char="plusmn">0.12&#x20;&#xb1; 0.07</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Ellipticity intensity minimums for the GaMD and REMD ensemble are shifted to a lower wavelength (196&#xa0;nm) compared to the <italic>in&#x20;vitro</italic> ensemble (202&#xa0;nm). This can be explained by examining the basis set spectra used to construct the theoretical ensembles. According to the basis spectra for DS5-4SC1 in <xref ref-type="sec" rid="s10">Supplementary Figure S12</xref> of (<xref ref-type="bibr" rid="B26">Nagy et&#x20;al., 2019</xref>), the Turn 1 spectrum has a minimum at 195&#xa0;nm and maximum at 218&#xa0;nm, while the Turn 2 spectrum has a minimum at 202&#xa0;nm and no maximum at 218&#xa0;nm. The calculated GaMD ensemble spectrum has a smaller minimum at 196&#xa0;nm and smaller maximum at 218&#xa0;nm than the REMD ensemble spectrum because it has less Turn 1 content; however, it may still contain more Turn 1 content than the <italic>in&#x20;vitro</italic> spectrum. Since Turn 1 includes PPII conformations, this indicates that our calculated spectra for the GaMD ensemble may still contain more PPII conformations and less <italic>cis</italic> proline conformations than the true <italic>in&#x20;vitro</italic> ArkA ensemble. Additionally, the SESCA basis sets were all developed for proteins with peptide bonds in <italic>trans</italic> and a basis set that explicitly takes into account <italic>cis</italic> prolines and PPI structure might be able to more clearly identify differences between the <italic>in&#x20;vitro</italic> and MD ensembles.</p>
<p>We also used another method to directly calculate the amount of PPII structure in the ensemble from the CD data developed by Kelly and coworkers (<xref ref-type="bibr" rid="B14">Kelly et&#x20;al., 2001</xref>). This empirical method based on data from model proteins with varying degrees of PPII character uses CD ellipticity values at wavelengths of 228&#xa0;nm to calculate the PPII percentage of the sample (<xref ref-type="disp-formula" rid="e1">Eq. 1</xref>) (<xref ref-type="bibr" rid="B14">Kelly et&#x20;al., 2001</xref>). This equation was used to obtain the PPII percentage from the <italic>in&#x20;vitro</italic> CD spectrum and the calculated CD spectra from our GaMD and REMD ensembles. For the MD ensembles, this number was compared to the actual PPII percentage calculated directly from the simulated structures. Based on the ellipticity at 228&#xa0;nm, the PPII percentage from the GaMD ensemble is relatively consistent with the PPII composition calculated from both the GaMD and the <italic>in&#x20;vitro</italic> CD spectra, while the REMD ensemble PPII composition is considerably higher (<xref ref-type="table" rid="T6">Table&#x20;6</xref>). Although Kelly and coworkers caution that their empirically derived equation may not be accurate for all peptides, ArkA resembles the type of sequences (short and proline-rich) that Kelly et&#x20;al. used in their derivation, and the location of the maximum was consistent at 228&#xa0;nm in the ArkA CD spectrum. We therefore apply this analysis to our ArkA data, but with the knowledge that it does not constitute a precise quantitative result. Overall, our CD analysis suggests that an ArkA ensemble that contains a significant population of <italic>cis</italic> proline conformations (as calculated by GaMD) more closely resembles the true <italic>in&#x20;vitro</italic> ArkA ensemble.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Percent averages for PPII in the GaMD and REMD systems obtained from averaging per-residue PPII ratios over the whole peptide (<xref ref-type="fig" rid="F5">Figure&#x20;5</xref>), and calculated from CD data using <xref ref-type="disp-formula" rid="e1">Eq. 1</xref>. The average ellipticity values at 228&#xa0;nm used to calculate PPII% are also included.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Simulation systems</th>
<th align="center">PPII % from structure data</th>
<th align="center">PPII % from CD ellipticity at 228&#xa0;nm</th>
<th align="center">Ellipticity at 228&#xa0;nm (10<sup>3</sup> deg cm<sup>2</sup>dmol<sup>&#x2212;1</sup>)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">GaMD</td>
<td align="char" char="plusmn">29&#x20;&#xb1; 4</td>
<td align="center">38.7&#x20;&#xb1; 0.6</td>
<td align="center">&#x223c; &#x2212;0.796</td>
</tr>
<tr>
<td align="left">REMD</td>
<td align="char" char="plusmn">55.9&#x20;&#xb1; 0.3</td>
<td align="center">42.1&#x20;&#xb1; 0.1</td>
<td align="center">&#x223c; &#x2212;0.334</td>
</tr>
<tr>
<td align="left">
<italic>In vitro</italic> Ensemble</td>
<td align="center">&#x2212;</td>
<td align="center">35.5</td>
<td align="center">&#x2212;1.30</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>Conclusion</title>
<p>Using the SH3 domain binding IDP ArkA as a model peptide, we have performed the first, to our knowledge, MD simulations of a biologically relevant proline-rich sequence that exhibits isomerization of multiple proline residues. We are able to overcome the large free energy barrier between the <italic>cis</italic> and <italic>trans</italic> states of proline by using the GaMD advanced sampling technique as well as a perturbed peptide bond dihedral angle potential with a lower energy barrier. Using this method, we observe many isomerization events on the microsecond time scale of our simulations allowing us to thoroughly sample all possible combinations of proline residues in <italic>cis</italic> and <italic>trans</italic>. For the ArkA system, the computational time to perform a 1-&#x3bc;s GaMD simulation on a GPU was a few days, allowing for good sampling of the ArkA ensemble on a practical time scale. This method could likely be implemented for other disordered proline-rich sequences in the future to achieve a more realistic picture of their conformational ensembles that includes the <italic>cis</italic> conformation of the peptide bond dihedral.</p>
<p>After using GaMD to sample the ArkA conformational ensemble, we find that this ensemble contains a large number of structures with one or more of the five ArkA proline residues in <italic>cis</italic>, and only about a third of the ensemble consists of all-<italic>trans</italic> conformations. The inclusion of these <italic>cis</italic> proline conformations also provides a better match between our MD ensemble and <italic>in&#x20;vitro</italic> CD data collected on ArkA. The <italic>cis</italic> peptide bond angle introduces a kink into the ArkA structure, breaking up the PPII helix in the N-terminal region of ArkA and resulting in a more compact conformation. In the final domain-peptide complex, ArkA adopts a PPII helix with an extended conformation, and therefore the <italic>cis</italic> proline conformations in the unbound ArkA ensemble would be unlikely to bind to the SH3 partner with high affinity. Consequently, the kinetics of binding could be affected by the fraction of the unbound peptide ensemble that contains prolines in <italic>cis</italic>. In a previous study, we characterized the binding pathway of ArkA to the Abp1p SH3 domain with all ArkA prolines trapped in the <italic>trans</italic> conformation and identified an intermediate encounter complex ensemble that forms quickly and is stabilized by nonspecific electrostatic and hydrophobic interactions (<xref ref-type="bibr" rid="B7">Gerlach et&#x20;al., 2020</xref>). While a <italic>cis</italic> conformation of ArkA would not be able to form the canonical fully bound state, it is possible that a <italic>cis</italic>-containing peptide might still interact with the SH3 domain nonspecifically in an encounter complex. It is unknown whether an encounter complex formed with a <italic>cis</italic> conformation of ArkA might be less stable or shorter lived than the <italic>trans</italic> encounter complex, and this is something we hope to investigate in future work. Because many IDP binding sequences, including most SH3 binding regions, contain multiple proline residues, the interactions involving <italic>cis</italic> conformations need to be considered in order to fully understand the complete binding pathway for these proteins.</p>
</sec>
</body>
<back>
<sec id="s5">
<title>Data Availability Statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found below: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.5281/zenodo.5044581">https://doi.org/10.5281/zenodo.5044581</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>KB and RS designed the simulations. JA, KH, AC, and RE ran and analyzed the simulations. KB and JA designed the analysis. JA produced the figures. VJ-M performed and analyzed the CD experiments. ES designed the CD experiments. KB and JA wrote the manuscript. All authors revised and approved the final version of the manuscript.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was supported by National Science Foundation award MCB-1852677 to KB. Resources were provided in part by the MERCURY consortium (<ext-link ext-link-type="uri" xlink:href="http://mercuryconsortium.org/">http://mercuryconsortium.org/</ext-link>) under National Science Foundation grant CHE-2018427.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>The authors thank Michael Donnelly for computational support. KB thanks the MERCURY Consortium for mentoring support. The authors thank Alex Holehouse and Paul Nerenberg for helpful discussions.</p>
</ack>
<sec id="s10">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmolb.2021.734169/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmolb.2021.734169/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.PDF" id="SM1" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Austin Elam</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Schrank</surname>
<given-names>T. P.</given-names>
</name>
<name>
<surname>Campagnolo</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Hilser</surname>
<given-names>V. J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Evolutionary Conservation of the Polyproline II Conformation Surrounding Intrinsically Disordered Phosphorylation Sites</article-title>. <source>Protein Sci.</source> <volume>22</volume>, <fpage>405</fpage>&#x2013;<lpage>417</lpage>. <pub-id pub-id-type="doi">10.1002/pro.2217</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Best</surname>
<given-names>R. B.</given-names>
</name>
<name>
<surname>Merchant</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Gopich</surname>
<given-names>I. V.</given-names>
</name>
<name>
<surname>Schuler</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bax</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Eaton</surname>
<given-names>W. A.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Effect of Flexibility and Cis Residues in Single-Molecule FRET Studies of Polyproline</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>104</volume>, <fpage>18964</fpage>&#x2013;<lpage>18969</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0709567104</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brown</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Zondlo</surname>
<given-names>N. J.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>A Propensity Scale for Type II Polyproline Helices (PPII): Aromatic Amino Acids in Proline-Rich Sequences Strongly Disfavor PPII Due to Proline-Aromatic Interactions</article-title>. <source>Biochemistry</source> <volume>51</volume>, <fpage>5041</fpage>&#x2013;<lpage>5051</lpage>. <pub-id pub-id-type="doi">10.1021/bi3002924</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Case</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Ben-Shalom</surname>
<given-names>I. Y.</given-names>
</name>
<name>
<surname>Brozell</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Cerutti</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Cheatham</surname>
<given-names>T. E.</given-names>
</name>
<name>
<surname>Cruzeiro</surname>
<given-names>V. W. D.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <source>AMBER 18</source>. </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Doose</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Neuweiler</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Barsch</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sauer</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Probing Polyproline Structure and Dynamics by Photoinduced Electron Transfer Provides Evidence for Deviations from a Regular Polyproline Type II helix</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>104</volume>, <fpage>17400</fpage>&#x2013;<lpage>17405</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0705605104</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Doshi</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Hamelberg</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Reoptimization of the AMBER Force Field Parameters for Peptide Bond (Omega) Torsions Using Accelerated Molecular Dynamics</article-title>. <source>J.&#x20;Phys. Chem. B.</source> <volume>113</volume>, <fpage>16590</fpage>&#x2013;<lpage>16595</lpage>. <pub-id pub-id-type="doi">10.1021/jp907388m</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gerlach</surname>
<given-names>G. J.</given-names>
</name>
<name>
<surname>Carrock</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Stix</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Stollar</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Ball</surname>
<given-names>K. A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A Disordered Encounter Complex Is central to the Yeast Abp1p SH3 Domain Binding Pathway</article-title>. <source>Plos Comput. Biol.</source> <volume>16</volume>, <fpage>e1007815</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1007815</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Greenfield</surname>
<given-names>N. J.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Analysis of the Kinetics of Folding of Proteins and Peptides Using Circular Dichroism</article-title>. <source>Nat. Protoc.</source> <volume>1</volume>, <fpage>2891</fpage>&#x2013;<lpage>2899</lpage>. <pub-id pub-id-type="doi">10.1038/nprot.2006.244</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hamelberg</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>McCammon</surname>
<given-names>J.&#x20;A.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Phosphorylation Effects on Cis/trans Isomerization and the Backbone Conformation of Serine&#x2212;Proline Motifs: Accelerated Molecular Dynamics Analysis</article-title>. <source>J.&#x20;Am. Chem. Soc.</source> <volume>127</volume>, <fpage>1969</fpage>&#x2013;<lpage>1974</lpage>. <pub-id pub-id-type="doi">10.1021/ja0446707</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Humphrey</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Dalke</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Schulten</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>VMD: Visual Molecular Dynamics</article-title>. <source>J.&#x20;Mol. Graph</source> <volume>14</volume> (<issue>33&#x2013;8</issue>), <fpage>33</fpage>&#x2013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.1016/0263-7855(96)00018-5</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jha</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Colubri</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zaman</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Koide</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sosnick</surname>
<given-names>T. R.</given-names>
</name>
<name>
<surname>Freed</surname>
<given-names>K. F.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Helix, Sheet, and Polyproline II Frequencies and Strong Nearest Neighbor Effects in a Restricted Coil Library</article-title>. <source>Biochemistry</source> <volume>44</volume>, <fpage>9691</fpage>&#x2013;<lpage>9702</lpage>. <pub-id pub-id-type="doi">10.1021/bi0474822</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Johnson</surname>
<given-names>W. C.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Analyzing Protein Circular Dichroism Spectra for Accurate Secondary Structures</article-title>. <source>Proteins</source> <volume>35</volume>, <fpage>307</fpage>&#x2013;<lpage>312</lpage>. <pub-id pub-id-type="doi">10.1002/(sici)1097-0134(19990515)35:3&#x3c;307:aid-prot4&#x3e;3.0.co;2-3</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kaneko</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S. S.-C.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>The SH3 Domain- a Family of Versatile Peptide- and Protein-Recognition Module</article-title>. <source>Front. Biosci.</source> <volume>13</volume>, <fpage>4938</fpage>&#x2013;<lpage>4952</lpage>. <pub-id pub-id-type="doi">10.2741/3053</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kelly</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Chellgren</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Rucker</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Troutman</surname>
<given-names>J.&#x20;M.</given-names>
</name>
<name>
<surname>Fried</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>A.-F.</given-names>
</name>
<etal/>
</person-group> (<year>2001</year>). <article-title>Host&#x2212;Guest Study of Left-Handed Polyproline II Helix Formation</article-title>. <source>Biochemistry</source> <volume>40</volume>, <fpage>14376</fpage>&#x2013;<lpage>14383</lpage>. <pub-id pub-id-type="doi">10.1021/bi011043a</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lim</surname>
<given-names>W. A.</given-names>
</name>
<name>
<surname>Richards</surname>
<given-names>F. M.</given-names>
</name>
<name>
<surname>Fox</surname>
<given-names>R. O.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>Structural Determinants of Peptide-Binding Orientation and of Sequence Specificity in SH3 Domains</article-title>. <source>Nature</source> <volume>372</volume>, <fpage>375</fpage>&#x2013;<lpage>379</lpage>. <pub-id pub-id-type="doi">10.1038/372375a0</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Louis-Jeune</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Andrade-Navarro</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Perez-Iratxeta</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Prediction of Protein Secondary Structure from Circular Dichroism Using Theoretically Derived Spectra</article-title>. <source>Proteins</source> <volume>80</volume>, <fpage>374</fpage>&#x2013;<lpage>381</lpage>. <pub-id pub-id-type="doi">10.1002/prot.23188</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>MacArthur</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Thornton</surname>
<given-names>J.&#x20;M.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>Influence of Proline Residues on Protein Conformation</article-title>. <source>J.&#x20;Mol. Biol.</source> <volume>218</volume>, <fpage>397</fpage>&#x2013;<lpage>412</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(91)90721-H</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maier</surname>
<given-names>J.&#x20;A.</given-names>
</name>
<name>
<surname>Martinez</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kasavajhala</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wickstrom</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hauser</surname>
<given-names>K. E.</given-names>
</name>
<name>
<surname>Simmerling</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>ff14SB: Improving the Accuracy of Protein Side Chain and Backbone Parameters from ff99SB</article-title>. <source>J.&#x20;Chem. Theor. Comput.</source> <volume>11</volume>, <fpage>3696</fpage>&#x2013;<lpage>3713</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jctc.5b00255</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mansiaux</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Joseph</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Gelly</surname>
<given-names>J.-C.</given-names>
</name>
<name>
<surname>de Brevern</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Assignment of PolyProline II Conformation and Analysis of Sequence - Structure Relationship</article-title>. <source>PLoS ONE</source> <volume>6</volume>, <fpage>e18401</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0018401</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martin</surname>
<given-names>E. W.</given-names>
</name>
<name>
<surname>Holehouse</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Grace</surname>
<given-names>C. R.</given-names>
</name>
<name>
<surname>Hughes</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pappu</surname>
<given-names>R. V.</given-names>
</name>
<name>
<surname>Mittag</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Sequence Determinants of the Conformational Properties of an Intrinsically Disordered Protein Prior to and upon Multisite Phosphorylation</article-title>. <source>J.&#x20;Am. Chem. Soc.</source> <volume>138</volume>, <fpage>15323</fpage>&#x2013;<lpage>15335</lpage>. <pub-id pub-id-type="doi">10.1021/jacs.6b10272</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Masiero</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nelly</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Marianne</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Christophe</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Florian</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ronan</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>The Impact of Proline Isomerization on Antigen Binding and the Analytical Profile of a Trispecific Anti-HIV Antibody</article-title>. <source>mAbs</source> <volume>12</volume>, <fpage>1698128</fpage>. <pub-id pub-id-type="doi">10.1080/19420862.2019.1698128</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Miao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Feher</surname>
<given-names>V. A.</given-names>
</name>
<name>
<surname>McCammon</surname>
<given-names>J.&#x20;A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Gaussian Accelerated Molecular Dynamics: Unconstrained Enhanced Sampling and Free Energy Calculation</article-title>. <source>J.&#x20;Chem. Theor. Comput.</source> <volume>11</volume>, <fpage>3584</fpage>&#x2013;<lpage>3595</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jctc.5b00436</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Miao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sinko</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Pierce</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Bucher</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Walker</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>McCammon</surname>
<given-names>J.&#x20;A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Improved Reweighting of Accelerated Molecular Dynamics Simulations for Free Energy Calculation</article-title>. <source>J.&#x20;Chem. Theor. Comput.</source> <volume>10</volume>, <fpage>2677</fpage>&#x2013;<lpage>2689</lpage>. <pub-id pub-id-type="doi">10.1021/ct500090q</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Micsonai</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wien</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Kernya</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>Y.-H.</given-names>
</name>
<name>
<surname>Goto</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>R&#xe9;fr&#xe9;giers</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Accurate Secondary Structure Prediction and Fold Recognition for Circular Dichroism Spectroscopy</article-title>. <source>Proc. Natl. Acad. Sci. USA</source> <volume>112</volume>, <fpage>E3095</fpage>&#x2013;<lpage>E3103</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1500851112</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moradi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Babin</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Roland</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Darden</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Sagui</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Conformations and Free Energy Landscapes of Polyproline Peptides</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>106</volume>, <fpage>20746</fpage>&#x2013;<lpage>20751</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0906500106</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nagy</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Igaev</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>N. C.</given-names>
</name>
<name>
<surname>Hoffmann</surname>
<given-names>S. V.</given-names>
</name>
<name>
<surname>Grubm&#xfc;ller</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>SESCA: Predicting Circular Dichroism Spectra from Protein Molecular Structures</article-title>. <source>J.&#x20;Chem. Theor. Comput.</source> <volume>15</volume>, <fpage>5087</fpage>&#x2013;<lpage>5102</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jctc.9b00203</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Neale</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pom&#xe8;s</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Garc&#xed;a</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Peptide Bond Isomerization in High-Temperature Simulations</article-title>. <source>J.&#x20;Chem. Theor. Comput.</source> <volume>12</volume>, <fpage>1989</fpage>&#x2013;<lpage>1999</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jctc.5b01022</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nelson</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Santos-Rosa</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kouzarides</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Proline Isomerization of Histone H3 Regulates Lysine Methylation and Gene Expression</article-title>. <source>Cell</source> <volume>126</volume>, <fpage>905</fpage>&#x2013;<lpage>916</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2006.07.026</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Radhakrishnan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Vitalis</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mao</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Steffen</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Pappu</surname>
<given-names>R. V.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Improved Atomistic Monte Carlo Simulations Demonstrate that Poly-L-Proline Adopts Heterogeneous Ensembles of Conformations of Semi-rigid Segments Interrupted by Kinks</article-title>. <source>J.&#x20;Phys. Chem. B</source> <volume>116</volume>, <fpage>6862</fpage>&#x2013;<lpage>6871</lpage>. <pub-id pub-id-type="doi">10.1021/jp212637r</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rognoni</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>M&#xf6;st</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>&#xa0;oldak</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Rief</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Force-dependent Isomerization Kinetics of a Highly Conserved Proline Switch Modulates the Mechanosensing Region of Filamin</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>111</volume>, <fpage>5568</fpage>&#x2013;<lpage>5573</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1319448111</pub-id> </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sreerama</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Venyaminov</surname>
<given-names>S. Y.</given-names>
</name>
<name>
<surname>Woody</surname>
<given-names>R. W.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Estimation of Protein Secondary Structure from Circular Dichroism Spectra: Inclusion of Denatured Proteins with Native Proteins in the Analysis</article-title>. <source>Anal. Biochem.</source> <volume>287</volume>, <fpage>243</fpage>&#x2013;<lpage>251</lpage>. <pub-id pub-id-type="doi">10.1006/abio.2000.4879</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stollar</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Garcia</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chong</surname>
<given-names>P. A.</given-names>
</name>
<name>
<surname>Rath</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Forman-Kay</surname>
<given-names>J.&#x20;D.</given-names>
</name>
<etal/>
</person-group> (<year>2009</year>). <article-title>Structural, Functional, and Bioinformatic Studies Demonstrate the Crucial Role of an Extended Peptide Binding Site for the SH3 Domain of Yeast Abp1p</article-title>. <source>J.&#x20;Biol. Chem.</source> <volume>284</volume>, <fpage>26918</fpage>&#x2013;<lpage>26927</lpage>. <pub-id pub-id-type="doi">10.1074/jbc.M109.028431</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vila</surname>
<given-names>J.&#x20;A.</given-names>
</name>
<name>
<surname>Baldoni</surname>
<given-names>H. A.</given-names>
</name>
<name>
<surname>Ripoll</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Ghosh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Scheraga</surname>
<given-names>H. A.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Polyproline II helix Conformation in a Proline-Rich Environment: a Theoretical Study</article-title>. <source>Biophysical J.</source> <volume>86</volume>, <fpage>731</fpage>&#x2013;<lpage>742</lpage>. <pub-id pub-id-type="doi">10.1016/S0006-3495(04)74151-X</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.-P.</given-names>
</name>
<name>
<surname>Martinez</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Pande</surname>
<given-names>V. S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Building Force Fields: An Automatic, Systematic, and Reproducible Approach</article-title>. <source>J.&#x20;Phys. Chem. Lett.</source> <volume>5</volume>, <fpage>1885</fpage>&#x2013;<lpage>1891</lpage>. <pub-id pub-id-type="doi">10.1021/jz500737m</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wedemeyer</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Welker</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Scheraga</surname>
<given-names>H. A.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Proline Cis&#x2212;Trans Isomerization and Protein Folding</article-title>. <source>Biochemistry</source> <volume>41</volume>, <fpage>14637</fpage>&#x2013;<lpage>14644</lpage>. <pub-id pub-id-type="doi">10.1021/bi020574b</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Williamson</surname>
<given-names>M. P.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>The Structure and Function of Proline-Rich Regions in Proteins</article-title>. <source>Biochem. J.</source> <volume>297</volume> (<issue>Pt 2</issue>), <fpage>249</fpage>&#x2013;<lpage>260</lpage>. <pub-id pub-id-type="doi">10.1042/bj2970249</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yedvabny</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Nerenberg</surname>
<given-names>P. S.</given-names>
</name>
<name>
<surname>So</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Head-Gordon</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Disordered Structural Ensembles of Vasopressin and Oxytocin and Their Mutants</article-title>. <source>J.&#x20;Phys. Chem. B</source> <volume>119</volume>, <fpage>896</fpage>&#x2013;<lpage>905</lpage>. <pub-id pub-id-type="doi">10.1021/jp505902m</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhan</surname>
<given-names>Y. A.</given-names>
</name>
<name>
<surname>Ytreberg</surname>
<given-names>F. M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>The Cis Conformation of Proline Leads to Weaker Binding of a P53 Peptide to MDM2 Compared to Trans</article-title>. <source>Arch. Biochem. Biophys.</source> <volume>575</volume>, <fpage>22</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1016/j.abb.2015.03.021</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zosel</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Mercadante</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Nettels</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Schuler</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A Proline Switch Explains Kinetic Heterogeneity in a Coupled Folding and Binding Reaction</article-title>. <source>Nat. Commun.</source> <volume>9</volume>. <pub-id pub-id-type="doi">10.1038/s41467-018-05725-0</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>