<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mol. Biosci.</journal-id>
<journal-title>Frontiers in Molecular Biosciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mol. Biosci.</abbrev-journal-title>
<issn pub-type="epub">2296-889X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1473609</article-id>
<article-id pub-id-type="doi">10.3389/fmolb.2024.1473609</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Molecular Biosciences</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Semantic segmentation-based detection algorithm for challenging cryo-electron microscopy RNP samples</article-title>
<alt-title alt-title-type="left-running-head">Vargas et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmolb.2024.1473609">10.3389/fmolb.2024.1473609</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Vargas</surname>
<given-names>J.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2805751/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Modrego</surname>
<given-names>A.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Canabal</surname>
<given-names>H.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Martin-Benito</surname>
<given-names>J.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Departamento de &#xd3;ptica</institution>, <institution>Universidad Complutense de Madrid</institution>, <addr-line>Madrid</addr-line>, <country>Spain</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Macromolecular Structure</institution>, <institution>National Centre for Biotechnology</institution>, <addr-line>Madrid</addr-line>, <country>Spain</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/565194/overview">Edward T. Eng</ext-link>, New York Structural Biology Center, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2712735/overview">Yang Suo</ext-link>, Duke University, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2282310/overview">Jose Miguel De La Rosa Trevin</ext-link>, St. Jude Children&#x2019;s Research Hospital, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: J. Vargas, <email>jvargas@ucm.es</email>; J. Martin-Benito, <email>jmartinb@cnb.csic.es</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>01</day>
<month>10</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>11</volume>
<elocation-id>1473609</elocation-id>
<history>
<date date-type="received">
<day>31</day>
<month>07</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>09</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Vargas, Modrego, Canabal and Martin-Benito.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Vargas, Modrego, Canabal and Martin-Benito</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>In this study, we present a novel and robust methodology for the automatic detection of influenza A virus ribonucleoproteins (RNPs) in single-particle cryo-electron microscopy (cryo-EM) images. Utilizing a U-net architecture&#x2014;a type of convolutional neural network renowned for its efficiency in biomedical image segmentation&#x2014;our approach is based on a pretraining phase with a dataset annotated through visual inspection. This dataset facilitates the precise identification of filamentous RNPs, including the localization of the filaments and their terminal coordinates. A key feature of our method is the application of semantic segmentation techniques, enabling the automated categorization of micrograph pixels into distinct classifications of particle and background. This deep learning strategy allows to robustly detect these intricate particles, a crucial step in achieving high-resolution reconstructions in cryo-EM studies. To encourage collaborative advancements in the field, we have made our routines, the pretrained U-net model, and the training dataset publicly accessible. The reproducibility and accessibility of these resources aim to facilitate further research and validation in the realm of cryo-EM image analysis.</p>
</abstract>
<kwd-group>
<kwd>cryo-electron microcopy</kwd>
<kwd>semantic segmantation</kwd>
<kwd>particle picking</kwd>
<kwd>influenza a virus</kwd>
<kwd>image proceesing</kwd>
</kwd-group>
<contract-num rid="cn001">TED2021-132748B-I00</contract-num>
<contract-sponsor id="cn001">Ministerio de Ciencia e Innovaci&#xf3;n<named-content content-type="fundref-id">10.13039/501100004837</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Structural Biology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Highlights</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; Robust methodology for the automatic detection of challenging influenza A virus ribonucleoproteins.</p>
</list-item>
<list-item>
<p>&#x2022; Outperforms other state-of-the-art cryo-EM particle pickers with practically zero false positives in RNP localization.</p>
</list-item>
<list-item>
<p>&#x2022; Provides results with near-human accuracy in challenging particle selection tasks.</p>
</list-item>
<list-item>
<p>&#x2022; Once trained it does not require prior 2D averages or particle data needed and eliminates considerable manual picking workload.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2">
<title>1 Introduction</title>
<p>Cryogenic electron microcopy (cryo-EM) single particle analysis is a powerful technique for obtaining high-resolution three-dimensional (3D) reconstructions of macromolecular complexes in a near-to-native state (<xref ref-type="bibr" rid="B20">Merk et al., 2016</xref>; <xref ref-type="bibr" rid="B41">Zivanov et al., 2018</xref>; <xref ref-type="bibr" rid="B9">Danev et al., 2019</xref>). The structural insights obtained from cryo-EM provide a direct way to unravel the mechanisms of the biological reactions driven by these complexes. In the last decade, cryo-EM has undergone a revolution that has pushed it to reach atomic resolution in the determination of structures (<xref ref-type="bibr" rid="B22">Nakane et al., 2020</xref>; <xref ref-type="bibr" rid="B38">Yip et al., 2020</xref>). This milestone is based on two fundamental pillars: improvements in hardware, mainly in direct electron detectors, and the rapid development of image processing software (<xref ref-type="bibr" rid="B17">Kuhlbrandt, 2014</xref>). Now, deep learning algorithms are being integrated into cryo-EM image processing protocols to enhance the capabilities of this technique in structural biology, improving results and simplifying tasks for non-expert users.</p>
<p>High-resolution cryo-EM reconstructions depend on selecting numerous high-quality particles from the micrographs for subsequent image processing. While manual particle picking in micrographs is accurate, it is unfeasible for today&#x2019;s large datasets due to its time-consuming nature. Consequently, various automatic and semiautomatic methods have been developed. These can be categorized into two types: template-based methods, which rely on reference images for particle selection, and template-free methods that operate without prior information about the particles. Template-free particle picking methods, such as those using Gaussian-generated templates of user-defined size approximating particle dimensions, are noteworthy. Some examples include Relion&#x2019;s methods (<xref ref-type="bibr" rid="B27">Scheres, 2012</xref>), CryoSPARC template picker (<xref ref-type="bibr" rid="B25">Punjani et al., 2017</xref>), EMAN2 boxer auto (<xref ref-type="bibr" rid="B32">Tang et al., 2007</xref>) or DoG Picker (<xref ref-type="bibr" rid="B35">Voss et al., 2009</xref>). These methods facilitate particle selection with minimal prior knowledge and effort. However, they often lack precision in accurately locating particles and may select large amounts of false positives, leading to a preference for template-based methods in high-resolution cryo-EM projects. Template-based methods typically involve manually picking hundreds of particles to obtain 2D reference classes (<xref ref-type="bibr" rid="B32">Tang et al., 2007</xref>; <xref ref-type="bibr" rid="B28">Scheres, 2015</xref>; <xref ref-type="bibr" rid="B21">Moriya et al., 2017</xref>; <xref ref-type="bibr" rid="B25">Punjani et al., 2017</xref>; <xref ref-type="bibr" rid="B12">Grant et al., 2018</xref>), which are used as patterns for particle selection. Nowadays there is a growing shift towards machine learning/deep learning methods for particle picking, exemplified by tools like XMIPP (<xref ref-type="bibr" rid="B1">Abrishami et al., 2013</xref>), SPHIRE-crYOLO (<xref ref-type="bibr" rid="B37">Wagner et al., 2019</xref>), EMAN2 (<xref ref-type="bibr" rid="B4">Bell et al., 2018</xref>), Topaz (<xref ref-type="bibr" rid="B6">Bepler et al., 2019</xref>), APPLE picker (<xref ref-type="bibr" rid="B14">Heimowitz et al., 2018</xref>), WARP (<xref ref-type="bibr" rid="B33">Tegunov and Cramer, 2019</xref>) or CASSPER (<xref ref-type="bibr" rid="B11">George et al., 2021</xref>), among others. These newer methods start with an intensive training phase usually using diverse datasets. This foundational step is designed to train classifiers to recognize cryo-EM particles&#x2019; intrinsic features, aiming to enhance accuracy and versatility across different datasets.</p>
<p>The automatic or semiautomatic methods mentioned above have been widely used for boxing both globular macromolecules and mostly straight filament particles. For globular structures, the process involves locating and boxing particle projections to extract them as square subimages, with each containing a full centered macromolecule. Filamentous particles, despite their complex structure, are similarly processed, although the extracted subimages represent only portions of these line-like filaments. However, automatic detection of these particles poses additional challenges compared to globular macromolecules. This is due to their tendency to overlap and intersect, in some cases be curved, and have terminal ends that, from a pattern recognition perspective, differ significantly from the core areas of the filament. It is noteworthy that the study of this type of complexes is crucial as many biologically and medically important proteins are filamentous, making the development of effective automated detection techniques a key focus in structural biology. Prominent examples encompass cytoskeletal proteins such as microtubules and actin, pivotal for various cellular functionalities, including muscle contraction and intracellular cargo transport (<xref ref-type="bibr" rid="B24">Pospich and Raunser, 2018</xref>). Moreover, significant instances involve amyloid and tau fibrils, implicated in neurodegenerative pathologies, which have recently garnered heightened attention in structural investigations (<xref ref-type="bibr" rid="B10">Fitzpatrick et al., 2017</xref>; <xref ref-type="bibr" rid="B23">Pospich and Raunser, 2017</xref>; <xref ref-type="bibr" rid="B29">Scheres et al., 2023</xref>). Given the intrinsic difficulty in crystallizing filaments, cryo-EM emerges as the foremost methodology for elucidating their structural attributes.</p>
<p>In previous research efforts, distinct methodologies have been proposed with a primary focus on the identification of linear, filamentous particles (<xref ref-type="bibr" rid="B13">He and Scheres, 2017</xref>; <xref ref-type="bibr" rid="B15">Huber et al., 2018</xref>; <xref ref-type="bibr" rid="B36">Wagner et al., 2020</xref>; <xref ref-type="bibr" rid="B34">Thurber et al., 2021</xref>). These approaches leverage the typical inherent characteristics of fibrils, namely their approximate linearity and specific width ranges. To achieve this, various rectangular filters are employed to detect and/or trace filaments, or 2D templates are generated based on previously extracted particles. These methodologies have demonstrated efficacy in the identification and reconstruction of filamentous particles, including but not limited to type 4 filaments (T4F) (<xref ref-type="bibr" rid="B2">Anger et al., 2023</xref>), single protofilaments of infectious mouse RML prions (<xref ref-type="bibr" rid="B19">Manka et al., 2022</xref>), and structures of tau filaments (<xref ref-type="bibr" rid="B31">Shi et al., 2021</xref>). Nevertheless, it is important to note that not all filamentous particles exhibit the characteristic linear conformation. An exemplary case is found in the ribonucleoproteins (RNPs) of the influenza A virus, serving as the epitome of filamentous macromolecular complexes characterized by exceptional flexibility. These RNPs, due to their flexibility and structural diversity, challenge automatic filament pickers and high-resolution reconstruction efforts, with current resolution limitation at &#x223c;7 &#xc5; (<xref ref-type="bibr" rid="B7">Coloma et al., 2020</xref>). Note that in the 3D reconstructions of these complexes performed to date by our group, the selection of hundreds of thousands of images used was done manually (<xref ref-type="bibr" rid="B3">Arranz et al., 2012</xref>; <xref ref-type="bibr" rid="B7">Coloma et al., 2020</xref>) as particle picking programs seem not work correctly for this sample. The structural analysis of RNPs and the RNA polymerase in influenza A virus is crucial for understanding the virus infection and proliferation mechanisms. The RNPs of influenza A are complex structures that involve a double helical conformation, playing a key role in mRNA synthesis and genome replication (<xref ref-type="bibr" rid="B3">Arranz et al., 2012</xref>). The flexibility and structural heterogeneity of these RNPs, particularly in the context of transcription and replication processes, make them challenging to study but crucial for understanding how the virus replicates and propagates. This understanding can lead to the development of targeted therapies or interventions to manage or prevent influenza epidemics. Importantly, according to the Centers for Disease Control and Prevention in the United States, it is estimated that between 4,900 &#x2013; 52,000 people died annually due influenza, including influenza A between 2010 and 2022 in the United States with between 100,000 &#x2013; 710,000 hospitalizations. Thus, understanding the structure and dynamics of RNPs and its RNA polymerase is crucial for comprehending how the influenza virus replicates and transcribes its genetic material, which is a key aspect of its infection mechanism. Nonetheless, the complex details of these processes and the complete understanding of influenza virus infection mechanisms, including all its molecular intricacies, is still not fully understood (<xref ref-type="bibr" rid="B7">Coloma et al., 2020</xref>).</p>
<p>In our study, we utilize Semantic Segmentation, a method based on deep learning, to automate the detection of complex Ribonucleoproteins (RNPs) in cryo-electron microscopy images. This includes identifying the locations of RNP filaments and their terminal ends. Importantly, the RNA polymerase, which is crucial for understanding the virus replication, is situated at one end of the RNPs. Therefore, accurately determining its position is vital to determine its structure and thus fully understand the mechanism of virus proliferation. Consequently, our research focused on detecting RNP filaments and their ends. We use a supervised learning approach with a U-net architecture, trained on a small set of manually labeled micrographs. In this process, we label micrograph pixels as either &#x201c;RNP&#x201d; or &#x201c;Background&#x201d; for RNP filament detection, and &#x201c;RNP-E&#x201d; or &#x201c;Non RNP-E&#x201d; for RNP ends. Post-training, we have two deep learning models: one for segmenting entire Ribonucleoproteins (Full-RNP model) and another for identifying the ends of the RNPs (RNP-E model). These models enable us to determine the coordinates of both RNPs and their ends. Our results show that this method effectively identifies complex filamentous samples, including RNP filaments and their ends, outperforming other commonly used particle pickers and providing results with near-human accuracy.</p>
</sec>
<sec sec-type="methods" id="s3">
<title>2 Methods</title>
<p>In this work, we propose two methods to automatically obtain the coordinates of challenging RNP filaments and RNP ends. This section details the raw data used in training and evaluation, along with information on implementation, training specifics, and our processing pipeline.</p>
<sec id="s3-1">
<title>2.1 Biological samples preparation and raw data collection</title>
<p>Our neural networks have been trained and evaluated using as input cryo-EM micrographs of RNPs of the influenza A virus. In the following, we provide details about how this data was produced.</p>
<sec id="s3-1-1">
<title>2.1.1 Virus production and RNP purification</title>
<p>The RNPs of the influenza A virus used in this work were produced in the CNB-CSIC from native virions. Virions were purified from cultures of Madin-Darby canine kidney (MDCK) epithelial cells. Cells were infected with Influenza A virus (A/WSN/1933(H1N1)) with a multiplicity of infection of 10<sup>&#x2212;3</sup>&#x2013;10<sup>&#x2013;5</sup>pfu/cell and incubated for 40 h at 37&#xb0;C. The supernatant was collected when the cytopathic effect reaches 50%. The viruses were isolated using sucrose gradients and centrifugation and lysed to extract the RNPs as described in (<xref ref-type="bibr" rid="B8">Coloma et al. 2009</xref>; <xref ref-type="bibr" rid="B7">Coloma et al. 2020</xref>).</p>
</sec>
<sec id="s3-1-2">
<title>2.1.2 Sample preparation for electron microscopy</title>
<p>After isolation, RNPs were applied to glow-discharged carbon electron microscopy grids and vitrified by plunge-freezing using liquid ethane. Vitrification is a stain-free, ultrahigh-speed freezing procedure at &#x2212;180&#xb0;C that preserves the native structure of the sample and allows the stabilization of unstable complexes or low-life conformers (<xref ref-type="bibr" rid="B3">Arranz et al. 2012</xref>; <xref ref-type="bibr" rid="B7">Coloma et al. 2020</xref>).</p>
</sec>
<sec id="s3-1-3">
<title>2.1.3 Electron microscopy</title>
<p>The cryo-EM grids were imaged at the ESFR &#x2013; The European Synchrotron Radiation Facility &#x2013; in Grenoble, France using a Titan Krios cryo-EM microscope equipped with a K3 direct detector recording 29,493 movies of size 5,760 &#xd7; 4,092 px with a sampling rate of 0.84 &#xc5;/px. Each movie comprising 42 frames and with a defocus ranging from 0.7 to 3 microns. These movies were aligned using MotionCor2 software (<xref ref-type="bibr" rid="B40">Zheng et al. 2017</xref>) to correct the drift produce by the electron beam on the sample and the contrast transfer function (CTF) was calculated using GCTF software (<xref ref-type="bibr" rid="B39">Zhang 2016</xref>).</p>
</sec>
<sec id="s3-1-4">
<title>2.1.4 Data preparation for the neural network</title>
<p>The aligned micrographs were contrast inverted and downsampled 9 times to produce images of size 640 &#xd7; 448 px with sampling rate 7.56 &#xc5;/px and Fourier Band Pass filtered from 3 to 30 pixels to improve the signal to noise ratio using ImageJ software (<xref ref-type="bibr" rid="B30">Schneider et al. 2012</xref>), while other software packages as Relion, Xmipp or EMAN for example could be used as well. These images have enough contrast to visualize the RNP filaments and their ends easily. In the case of the complete RNP filament detection, we manually label 150 of these filtered micrographs, while for the RNP ends detection, we label 500 micrographs.</p>
</sec>
</sec>
<sec id="s3-2">
<title>2.2 Neural network architecture and training</title>
<p>For segmenting RNP filaments and RNP filament ends, we implemented a 2D U-net-like convolutional neural network, adapted from <xref ref-type="bibr" rid="B26">Ronneberger et al. (2015)</xref>. Our neural network architecture comprises three downsampling and three upsampling blocks, each connected with skip connections for feature preservation. Every block includes two convolutional layers activated by RELU functions. The convolutional layers in these blocks use filters of sizes 128, 256, and 512, respectively, each with a kernel size of 7 &#xd7; 7 to enhance noise robustness. Downsampling in our network is achieved through strided convolutions, while upsampling utilizes transposed convolutions. The final output layer classifies each pixel into two categories, employing a generalized Dice loss function to counteract class imbalance issues. Our network processes images of size 640 &#xd7; 448 pixels. An essential aspect of our method is the normalization of the input images, where we calculate and apply the 98th and 2nd quantiles for contrast adjustment, clipping values outside the 0&#x2013;1 range. We divided the labeled dataset into training and validation sets with an 80:20 split. To enhance model robustness, we included random translation transformations (within [&#x2212;10, 10] pixels range) in the training phase. The model was trained using the Adam optimizer, with batch sizes of 30 images over 50 epochs.</p>
</sec>
<sec id="s3-3">
<title>2.3 Semantic segmentation processing pipeline</title>
<p>After training the Full-RNP and RNP-E networks, they are applied to segment RNP filaments and their ends across all micrographs. These segmented images are then analyzed to pinpoint their coordinates. Our pipeline for this analysis is as follows:<list list-type="simple">
<list-item>
<p>1. Preprocessing: Each input micrograph is first Fourier <italic>band pass filtered,</italic> downsampled, contrast inverted and normalized as previously described.</p>
</list-item>
<list-item>
<p>2. Model Application: The processed image is fed into either the Full-RNP or RNP-E model to produce a binary segmented image.</p>
</list-item>
<list-item>
<p>3. Post-processing: A closing operation is applied to the binary image to eliminate small gaps. Then, the distinct RNP regions are identified and assigned unique integer labels, based on the connectivity of pixels to their neighbors.</p>
</list-item>
<list-item>
<p>4. Region Filtering: Regions that are too small or too large are automatically excluded.</p>
</list-item>
</list>
</p>
<p>For processing RNP ends, the centroid coordinates of each labeled region are determined and adjusted by the previously applied downsampling factor. On the other hand, for the processing of RNP filaments, after step 4 the next steps are followed:<list list-type="simple">
<list-item>
<p>5. Skeletonization: For the RNP filaments, the identified regions are skeletonized or thinned using the homotopic thinning algorithm. (<xref ref-type="bibr" rid="B18">Lee et al., 1994</xref>), transforming the filament&#x2019;s thickness into a 1D curve that represents its skeletal structure.</p>
</list-item>
<list-item>
<p>6. Coordinate Calculation: The coordinates of each of these labelled skeletal structures are calculated and adjusted by the downsampling factor previously applied to accurately locate the RNP filament regions.</p>
</list-item>
</list>
</p>
<p>This pipeline ensures precise and efficient localization of both RNP filaments and their ends in the micrographs.</p>
</sec>
</sec>
<sec sec-type="results" id="s4">
<title>3 Results</title>
<p>In the following, we use the proposed approaches to localize RNP filaments and filament ends in our dataset. We show that our proposed approaches can provide near-human accuracy results and that typical automatic particle pickers do not provide good results in this challenging dataset.</p>
<sec id="s4-1">
<title>3.1 The proposed methods can provide near-human accurate result</title>
<p>In our study, we trained the Full-RNP model utilizing a dataset of 150 micrographs, each meticulously annotated by a human expert. The evaluation of the model&#x2019;s semantic segmentation predictions, when benchmarked against the ground truth data, provided the outcomes presented in <xref ref-type="table" rid="T1">Table 1</xref>. To further scrutinize the reliability of manual annotations, a subset of 50 micrographs from the training set underwent dual rounds of manual labeling by the same person, facilitating a comparative analysis of human annotation consistency using identical evaluate metrics. These results, aimed at appraising the precision of human annotations, are shown in <xref ref-type="table" rid="T1">Table 1</xref> (a) at row &#x201c;H-H&#x201d; and in <xref ref-type="table" rid="T1">Table 1</xref> (c).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Evaluation of the model&#x2019;s RNP-FULL semantic segmentation predictions using conventional metrics for assessing semantic segmentation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">(a)</th>
<th align="center">GAccuracy</th>
<th align="center">MAccuracy</th>
<th align="center">MeanIoU</th>
<th align="center">WeightedIoU</th>
<th align="center">BFSScore</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Validation</td>
<td align="center">0.88</td>
<td align="center">0.84</td>
<td align="center">0.70</td>
<td align="center">0.81</td>
<td align="center">0.69</td>
</tr>
<tr>
<td align="center">Training</td>
<td align="center">0.88</td>
<td align="center">0.84</td>
<td align="center">0.72</td>
<td align="center">0.80</td>
<td align="center">0.71</td>
</tr>
<tr>
<td align="center">H-H</td>
<td align="center">0.86</td>
<td align="center">0.85</td>
<td align="center">0.68</td>
<td align="center">0.78</td>
<td align="center">0.67</td>
</tr>
</tbody>
</table>
<table>
<thead valign="top">
<tr>
<th align="center">(b)<break/>(Validation)</th>
<th align="center">Background</th>
<th align="center">RNP</th>
<th align="center">(c)<break/>(H-H)</th>
<th align="center">Background</th>
<th align="center">RNP</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Background</td>
<td align="center">0.91</td>
<td align="center">0.089</td>
<td align="center">Background</td>
<td align="center">0.87</td>
<td align="center">0.13</td>
</tr>
<tr>
<td align="center">RNP</td>
<td align="center">0.24</td>
<td align="center">0.76</td>
<td align="center">RNP</td>
<td align="center">0.18</td>
<td align="center">0.82</td>
</tr>
</tbody>
</table>
<table>
<thead valign="top">
<tr>
<th align="center">(d)<break/>(Validation)</th>
<th align="center">IoU</th>
<th align="center">BFSScore</th>
<th align="center">(e)<break/>(H-H)</th>
<th align="center">IoU</th>
<th align="center">BFSScore</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Background</td>
<td align="center">0.86</td>
<td align="center">0.76</td>
<td align="center">Background</td>
<td align="center">0.84</td>
<td align="center">0.74</td>
</tr>
<tr>
<td align="center">RNP</td>
<td align="center">0.58</td>
<td align="center">0.66</td>
<td align="center">RNP</td>
<td align="center">0.51</td>
<td align="center">0.60</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>(a) Global accuracy (GAccuracy), mean accuracy (MAccuracy), mean intersection over union (MeanIoU), weighted IoU, and the boundary F1 score (BFSScore) metrics calculated for images in the validation and training sets and a subset of 50 micrographs from the training set that underwent dual rounds of manual RNP labeling by the same person (row H-H). (b) Normalized confusion matrix calculated from the validation set. (c) Normalized confusion matrix calculated from the 50 micrographs that underwent dual rounds of manual RNP labeling by the same person. (d) Average per class IoU and BFSScore scores along all images in the validation set. (e) Average per class IoU and BFSScore scores along all images in the image set that underwent dual rounds of manual RNP labeling by the same person.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> (a) employs a suite of conventional metrics for assessing semantic segmentation, encompassing global accuracy, mean accuracy, mean intersection over union (IoU), weighted IoU, and the boundary F1 (BF) score. Global accuracy (GAccuracy) quantifies the overall proportion of pixels correctly classified across all categories. This metric provides a rapid and computationally efficient assessment of the fraction of pixels correctly classified. Mean accuracy (MAccuracy) calculates the average rate of accurately identified pixels for each category across the dataset. The IoU metric, or Jaccard similarity coefficient, gauges the overlap between the predicted and actual pixels for each class, with MeanIoU averaging this score across all categories. WeightedIoU adjusts the IoU score for each class based on its pixel prevalence, mitigating the influence of minor class discrepancies on the collective metric. The BF score assesses the alignment of predicted class boundaries with their actual counterparts, with MeanBFScore averaging this alignment for each class across all images.</p>
<p>Additionally, <xref ref-type="table" rid="T1">Table 1</xref> sections (b) and (c) shows normalized confusion matrices for the background and RNP categories, contrasting the network&#x2019;s segmentation predictions with the ground truth obtained from the validations set (<xref ref-type="table" rid="T1">Table 1</xref> (b)) and juxtaposing the two sets of manual annotations by the same expert (<xref ref-type="table" rid="T1">Table 1</xref> (c)). The consistency between the model&#x2019;s performance on both validation and training sets underscores its robust generalization capability. Finally, <xref ref-type="table" rid="T1">Table 1</xref> (d) and (e) shows the intersection over union (IoU) for each class and the average of the BFScore for each class across all images in the validation set and for the subset of 50 micrographs labelled twice for the same person. As can be seen from these results, the congruence of the model&#x2019;s metrics with those derived from human annotations underscores the model&#x2019;s potential to achieve near human-level accuracy in RNP semantic segmentation tasks.</p>
<p>In <xref ref-type="fig" rid="F1">Figure 1A</xref>, we show examples of preprocessed micrographs (Fourier <italic>band pass filtered,</italic> downsampled, contrast inverted and normalized), obtained ground-truth labels (labelled) and the predictions made by the RNP-FULL network (predicted). As can be seen from this figure, there is a good visual agreement between the ground truth and the predictions made by the Full-RNP model. In <xref ref-type="fig" rid="F1">Figure 1B</xref>, we show three examples of the processing pipeline followed by our proposed approach. First the preprocessed micrograph is automatically labelled by the trained Full-RNP model, and the segmented images are cleaned, labelled, and thinned. Then, the coordinates of each of these labelled skeletal structures are extracted.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Visual examples showing the performance of the RNP-FULL network with micrographs of the validation set. <bold>(A)</bold> Examples of preprocessed micrographs (micrograph), obtained ground-truth labels by manual labelling (labelled) and predictions made by the RNP-FULL network (predicted). <bold>(B)</bold> In the first, second and third columns, we show respectively preprocessed micrographs, segmented, and filtered images provided by the RNP-FULL network, where the coordinates obtained from the thinning process are shown in red, and the location of the picked particles to be extracted.</p>
</caption>
<graphic xlink:href="fmolb-11-1473609-g001.tif"/>
</fig>
<p>For the training of the RNP-E model, we used 500 manually labelled preprocessed micrographs. In <xref ref-type="table" rid="T2">Table 2</xref>, we use the same metrics used in <xref ref-type="table" rid="T1">Table 1</xref> for assessing semantic segmentation done by the network. In <xref ref-type="table" rid="T2">Table 2</xref> (a) row H-H and <xref ref-type="table" rid="T2">Table 2</xref> (c) and (e), we show again as reference the results obtained by assessing the accuracy of human annotations for the manual labelling of full RNPs. According to these results, we can concur again the good performance of the RNP-E model and the good similarity between the model metrics and the ones obtained by the same person when labelling the full RNPs showing again the model&#x2019;s potential to achieve near human-level accuracy in RNP semantic segmentation tasks. In <xref ref-type="fig" rid="F2">Figure 2</xref>, we show examples of preprocessed micrographs (micrographs) and corresponding micrographs with superimposed labelled RNP ends segmented manually (labelled) and predicted by the RNP-E network (predicted). As can be seen from this figure, there is a good agreement between the ground-truth and the automatically segmented RNP ends. Finally, in <xref ref-type="fig" rid="F3">Figure 3</xref> we show the workflow followed by the proposed method to localize RNP ends. The preprocessed micrographs are segmented automatically by the RNP-E network. Then these images are labelled with unique integer labels based on the connectivity of pixels to their neighbors and too small and too big regions are filtered out. For the remaining regions their centroids are computed localizing or picking the ends of the RNPs.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Evaluation of the model&#x2019;s RNP-E semantic segmentation predictions using conventional metrics for assessing semantic segmentation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">(a)</th>
<th align="center">GAccuracy</th>
<th align="center">MAccuracy</th>
<th align="center">MeanIoU</th>
<th align="center">WeightedIoU</th>
<th align="center">BFSScore</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Validation</td>
<td align="center">0.93</td>
<td align="center">0.82</td>
<td align="center">0.69</td>
<td align="center">0.90</td>
<td align="center">0.73</td>
</tr>
<tr>
<td align="center">Training</td>
<td align="center">0.93</td>
<td align="center">0.83</td>
<td align="center">0.69</td>
<td align="center">0.89</td>
<td align="center">0.74</td>
</tr>
<tr>
<td align="center">H-H</td>
<td align="center">0.86</td>
<td align="center">0.85</td>
<td align="center">0.68</td>
<td align="center">0.78</td>
<td align="center">0.67</td>
</tr>
</tbody>
</table>
<table>
<thead valign="top">
<tr>
<th align="center">(b)<break/>(Validation)</th>
<th align="center">Background</th>
<th align="center">RNP</th>
<th align="center">(c)<break/>(H-H)</th>
<th align="center">Background</th>
<th align="center">RNP</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Background</td>
<td align="center">0.95</td>
<td align="center">0.05</td>
<td align="center">Background</td>
<td align="center">0.87</td>
<td align="center">0.13</td>
</tr>
<tr>
<td align="center">RNP</td>
<td align="center">0.30</td>
<td align="center">0.70</td>
<td align="center">RNP</td>
<td align="center">0.18</td>
<td align="center">0.82</td>
</tr>
</tbody>
</table>
<table>
<thead valign="top">
<tr>
<th align="center">(d)<break/>(Validation)</th>
<th align="center">IoU</th>
<th align="center">BFSScore</th>
<th align="center">(e)<break/>(H-H)</th>
<th align="center">IoU</th>
<th align="center">BFSScore</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Background</td>
<td align="center">0.93</td>
<td align="center">0.79</td>
<td align="center">Background</td>
<td align="center">0.84</td>
<td align="center">0.74</td>
</tr>
<tr>
<td align="center">RNP</td>
<td align="center">0.44</td>
<td align="center">0.68</td>
<td align="center">RNP</td>
<td align="center">0.51</td>
<td align="center">0.60</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>(a) Global accuracy (GAccuracy), mean accuracy (MAccuracy), mean intersection over union (MeanIoU), weighted IoU, and the boundary F1 score (BFSScore) metrics calculated for images in the validation and training sets and a subset of 50 micrographs from the training set that underwent dual rounds of manual RNP labeling by the same person (row H-H). (b) Normalized confusion matrix calculated from the validation set. (c) Normalized confusion matrix calculated from the 50 micrographs that underwent dual rounds of manual RNP labeling by the same person. (d) Average per class IoU and BFSScore scores along all images in the validation set. (e) Average per class IoU and BFSScore scores along all images in the image set that underwent dual rounds of manual RNP labeling by the same person.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Visual examples showing the performance of the RNP-E network with micrographs of the validation set. Examples of preprocessed micrographs (micrographs), obtained ground-truth labels by manual labelling (labelled) superimposed in red over the corresponding micrograph and predictions made by the RNP-FULL network (predicted) superimposed in red over the corresponding micrograph.</p>
</caption>
<graphic xlink:href="fmolb-11-1473609-g002.tif"/>
</fig>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Workflow followed by the proposed method to localize RNP ends. Preprocessed micrographs are segmented automatically by the RNP-E network. These images are labelled with unique integer labels and filtered removing too small and too big regions. For the remaining regions their centroids are computed localizing or picking the ends of the RNPs.</p>
</caption>
<graphic xlink:href="fmolb-11-1473609-g003.tif"/>
</fig>
</sec>
<sec id="s4-2">
<title>3.2 Enhanced performance compared to alternative particle pickers</title>
<p>To compare with our deep learning-based picker, we tested the performance of other particle selectors on the same RNP data set. We choose four of the most used pickers: the template matching picker Gautomatch (<ext-link ext-link-type="uri" xlink:href="https://sbgrid.org/software/titles/gautomatch">https://sbgrid.org/software/titles/gautomatch</ext-link>) and the picker included in the Relion software suite (<xref ref-type="bibr" rid="B16">Kimanius et al., 2021</xref>) (<ext-link ext-link-type="uri" xlink:href="https://github.com/3dem/relion">https://github.com/3dem/relion</ext-link>), Topaz (<xref ref-type="bibr" rid="B5">Bepler et al., 2020</xref>) (<ext-link ext-link-type="uri" xlink:href="https://github.com/tbepler/topaz">https://github.com/tbepler/topaz</ext-link>) and CrYOLO (<xref ref-type="bibr" rid="B37">Wagner et al., 2019</xref>) (<ext-link ext-link-type="uri" xlink:href="https://pypi.org/project/cryolo/">https://pypi.org/project/cryolo/</ext-link>).</p>
<p>The template matching algorithm implemented in Gautomatch software requires as main input one or more 2D averages of the particles to be selected, an estimated size of the box that will contain the entire particle, and an estimate of the average minimum distance between two particles in the image. Additionally, there is a tunable threshold value, ranging from 0 to 1, which indicates the level of cross-correlation between the templates and a feature in the micrograph to be considered a positive match. <xref ref-type="fig" rid="F4">Figure 4</xref> shows the results of Gautomatch picking using 2D averages of the central part of the molecule as templates. The two averages used (inset in panel 4a) were obtained by manually picking and aligning approximately 2000 particles from a random selection of 100 micrographs from the total set of 29,493 images. The panels display the results obtained at different thresholds. Higher values indicate a more restrictive search, where the selected particles are more similar to the 2D averages used as templates. When the threshold is low (0.15), the number of regions selected as positives is very large, including the actual particles and a substantial number of false positives, distributed in the background, in the contaminants, and along the carbon edges of the support. As the threshold value increases (0.2), the RNPs are marked correctly, and the number of false positives selected in the background decreases drastically, although those corresponding to contaminants and carbon edges (red arrows) persist. If the threshold value is increased further in an attempt to reduce the latter false positives (0.3), it is observed that unexpectedly the number of correctly selected particles decreases (blue arrows), while the false positives found in areas of higher contrast remain (red arrows). In summary, there is an optimal threshold (0.2) at which most of the particles are correctly selected, however some contaminations and the edge of the carbon support are also marked as false positives.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Particle picking of the central region of RNPs using Gautomatch software. <bold>(A)</bold> Typical micrograph showing RNP particles, some ice crystal contamination (black arrows), and the edge of the carbon support layer (black line). The inset shows the 2D averages used as templates for particle selection. <bold>(B)</bold> Particles selected using a threshold of 0.15. At this threshold, there is a large number of false positives distributed throughout the image. <bold>(C)</bold> Particles selected using a threshold of 0.2. Most of the RNP molecules have been correctly picked (green arrows), but ice contaminations and the carbon support edge have also been marked as particles (red arrows). <bold>(D)</bold> Increasing the threshold to 0.3 causes some RNPs to be left undetected by the software (blue arrows), while ice and carbon edge contaminations are still detected as positives.</p>
</caption>
<graphic xlink:href="fmolb-11-1473609-g004.tif"/>
</fig>
<p>The ability of Gautomatch to pick the ends of the particles was also tested and the results are shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. Similar to the previous case, the ends of RNPs from 100 micrographs were manually selected and aligned to produce 2D averages, which were used as templates for Gautomatch (inset in <xref ref-type="fig" rid="F5">Figure 5A</xref>). The field covered by the templates in this case was deliberately chosen to be larger than in the previous case to ensure that the image clearly showed the end of the particle, preventing misidentification as an intermediate part of the helix. In this context, multiple tests were performed with different template sizes, and the ones shown here produced the best results. In this case, and very similar to the previous test, using a low threshold (0.2) caused the program to select a large number of matches, including real particles along their entire length (not just the ends) and many false positives. This result is almost indistinguishable from when the 2D averages of the central part of the molecule were used as a template. Increasing the threshold value to 0.4 caused most of the false positives in the background to disappear, and the number of correct positives increased proportionally to the total number of labeled particles (green arrows), although some real ends were no longer selected (blue arrows). However, the number of false positives in the center of the particles and in high-contrast regions (carbon edges and contaminants) remained high (red arrows). Finally, increasing the threshold to 0.6 resulted in the loss of correct positives, with the program selecting only false positives in the high-contrast regions.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Particle picking of the ends of RNPs using Gautomatch. <bold>(A)</bold> Typical micrograph showing RNP particles. The inset shows the 2D averages used as templates for particle selection. <bold>(B)</bold> Particles selected using a threshold of 0.2. At this threshold, there is a large number of false positives distributed throughout the image, and the picking is very similar to that obtained when the centers of the molecules were used as templates (<xref ref-type="fig" rid="F4">Figure 4B</xref>). <bold>(C)</bold> Particles selected using a threshold of 0.4. Some of the RNP ends have been correctly picked (green arrows), but others have not been detected (blue arrows). Ice contaminations and the carbon support edge have also been marked as positive ends (red arrows). <bold>(D)</bold> Increasing the threshold to 0.6 results in RNPs being undetected, while ice and carbon edge contaminations are still detected as positives.</p>
</caption>
<graphic xlink:href="fmolb-11-1473609-g005.tif"/>
</fig>
<p>We also compared with our particle selector the picking algorithm implemented in Relion (<xref ref-type="bibr" rid="B16">Kimanius et al., 2021</xref>). Similar to the previous case, the software requires as main input data the 2D averages to be used as templates, the minimum distance between particles, and two parameters called &#x201c;minimum mean noise&#x201d; and &#x201c;maximum standard deviation noise&#x201d; designed to prevent the picker from selecting regions of high contrast. The values of these parameters should be determined empirically. Moreover, in Relion there is an adjustable threshold that indicates the level of similarity between the template and the selected feature in the micrograph. <xref ref-type="fig" rid="F6">Figure 6</xref> shows the results obtained using the same 2D averages from the Gautomatch tests (insets in <xref ref-type="fig" rid="F4">Figures 4A</xref>, <xref ref-type="fig" rid="F5">5A</xref>) as templates. After empirically optimizing the &#x201c;minimum mean noise&#x201d; and &#x201c;maximum standard deviation noise&#x201d; parameters to minimize as much as possible the picking of incorrect high-contrast regions, several tests were performed at different thresholds. In the case of the central regions selection (<xref ref-type="fig" rid="F6">Figure 6A</xref>), the results are shown at two thresholds around the optimal value. Using these thresholds, the software correctly identified most of the particles (green arrows), and the edges of the supporting carbon were not marked as positive matches. However, contaminations corresponding to ice crystals were mistakenly selected as particles (red arrows). As with Gautomatch, an increase in the particle selection threshold value resulted in fewer real particles being selected, but the false positives produced by ice crystals remained. Finally, in the particle end-picking test performed using the 2D averages of the insets of <xref ref-type="fig" rid="F5">Figure 5A</xref> as templates, the results were very similar to those obtained using the centers as templates (<xref ref-type="fig" rid="F6">Figure 6B</xref>). Additionally, the selection of regions containing contaminants as false positives (red arrows) also occurred, and increasing the threshold value did not resolve the issue.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Particle picking using Relion. <bold>(A)</bold> Picking the central region of the RNPs using the 2D averages shown in <xref ref-type="fig" rid="F4">Figure 4A</xref> as templates. After manual optimization of all picking parameters, the most suitable threshold was found to be approximately 0.05. At this threshold, most of the RNPs are correctly selected (green arrows). However, the software also marks areas corresponding to contaminations as positive (red arrows). Increasing the threshold to higher values (Th 0.30) to eliminate these false positives results in the loss of RNPs that were previously correctly marked (blue arrows), while contaminations are still marked as positives (red arrows). <bold>(B)</bold> Picking the ends of the RNPs using the 2D averages shown in <xref ref-type="fig" rid="F5">Figure 5A</xref> as templates. In this case, the detection of the ends was less efficient than for the central regions. Although the software correctly selected a few cases (green arrows), most of the marked positives were actually central regions of the RNPs (orange arrows), leaving many ends unmarked (blue arrows). Contaminations were also marked as positives (red arrows). As in the previous case, increasing the threshold (Th 1.1) caused correctly labeled particles to be lost (blue arrows) while contaminations continued to be detected as positives (red arrows).</p>
</caption>
<graphic xlink:href="fmolb-11-1473609-g006.tif"/>
</fig>
<p>In summary, the template-matching-based particle selectors analyzed here produce very similar results whether the central or terminal regions are used as templates, indicating that they can barely discern between these two regions in the images. Although particle selection results are slightly better when using the 2D averages of the central region, the programs tend to select high-contrast regions as positives to some extent, which cannot be resolved by varying the particle selection threshold. However, the results obtained with our software satisfactorily solve these problems and give results similar to those produced by a human expert.</p>
<p>We also compared the performance of our particle picker against other neural network-based software, specifically Topaz and CrYOLO. The results of these comparisons are presented in <xref ref-type="fig" rid="F7">Figure 7</xref>. To maintain consistency in the evaluation, we used the same set of micrographs and coordinates for training as we did with our own program.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Particle picking using Topaz and CrYOLO software. <bold>(A)</bold> Left: CryoEM image of RNP particles with ice contamination indicated by black arrows and the edge of the carbon support layer marked by a black line. Right: Particles selected by Topaz after training using the central regions of the RNPs with a threshold value of 1. The particles are accurately detected without false positives. However, the coordinate selection occurs near the boundary between the RNP and the background, resulting in a &#x201c;zigzag&#x201d; pattern of selected regions (marked with <sup>&#x2a;&#x2a;</sup>). <bold>(B)</bold> Particles selected by Topaz after training on the end of the RNPs at two different thresholds. Left: At threshold 1, most of the ends are selected (green arrows). However, there are a small number of false positives due to selection of ice contamination and regions where the RNPs have a sharp bend (red arrows). Right: Increasing the threshold to 2 causes some correct positives that were previously marked to be lost (blue arrows), while some false positives remain (red arrows). <bold>(C)</bold> Particles selected by CrYOLO after training using the central regions of the RNPs. Most of the particles are selected (e.g. green arrow), nevertheless a number of false positives associated to the carbon support are also marked (red arrow). Increasing the threshold does not solve this problem, as correctly picked particles are lost while some false positives remain. <bold>(D)</bold> Particles selected by Cryolo after training using the termini regions of the RNPs. The results obtained are very similar to those shown in <bold>(C)</bold> since the entire particle is selected, rather than just the ends. The number of false positives is higher than in <bold>(C)</bold> (e.g. red arrows).</p>
</caption>
<graphic xlink:href="fmolb-11-1473609-g007.tif"/>
</fig>
<p>Topaz employs a convolutional neural network based on positive unlabeled learning (<xref ref-type="bibr" rid="B5">Bepler et al., 2020</xref>), with multiple adjustable parameters in its learning protocol, including the particle size in its longest dimension. However, for the dataset we used, which includes filamentary and highly flexible structures that often bend, determining an appropriate value for this parameter proved challenging. After extensive trial and error, we found that the renet8 model architecture yielded the best results. <xref ref-type="fig" rid="F7">Figure 7A</xref> shows the particle selection from the central regions of the RNPs at the threshold that produced the optimal outcome. While the particle detection was accurate, with no false positives, the selected coordinates were positioned near the boundary between the particle and the background, rather than at the center of the filaments. This led to the appearance of a &#x201c;zig-zag&#x201d; pattern in the coordinates. <xref ref-type="fig" rid="F7">Figure 7B</xref> illustrates the particle selection by Topaz when the network was trained using the ends of the RNPs at two different thresholds. The results were similarly accurate, although there was a slightly higher tendency to select false positives, particularly in areas with ice contamination or sharp bends in the RNPs.</p>
<p>CrYOLO employs a convolutional neural network based on supervised learning, requiring labeled data for training (<xref ref-type="bibr" rid="B37">Wagner et al., 2019</xref>). It offers multiple adjustable parameters, such as particle diameter, box size, and detection threshold, to adapt to different datasets. <xref ref-type="fig" rid="F7">Figures 7C, D</xref> display the results of particle picking using CrYOLO, trained on the central regions and the ends of the RNPs, respectively, at the threshold that yielded the best results. Surprisingly, the results are quite similar in both cases, showing little difference between training on centers versus ends. Moreover, when trained on the ends, there was a greater tendency to select false positives.</p>
<p>In summary, neural network-based particle pickers, such as Topaz and CrYOLO, outperform traditional template-matching methods in terms of detection accuracy. However, fine-tuning their parameters is essential and often labor-intensive. Without careful optimization, this can lead to the selection of false positives.</p>
</sec>
<sec id="s4-3">
<title>3.3 Semantic segmentation picking provides good quality 2D averages</title>
<p>The particle-picking system presented in this study offers not only accurate detection of regions of interest through semantic segmentation, but also introduces a novel method for generating coordinates used for particle extraction. In the case of localizing RNP filaments and unlike other software, which typically calculates the centroid of the detected region to determine coordinate placement, our approach utilizes a skeletonization process. This process identifies the geometric center of the filament, enabling the program to determine the position of the helical axis of the particle, regardless of its curvature. As a result, our software generates a line of coordinates that facilitates the extraction of a continuous series of images along the entire axis of the particle.</p>
<p>This coordinate generation method for the case of localizing RNP filaments has two significant advantages. First, the extracted images are centered on or near the particle axis, minimizing the shifts required for image alignment. This leads to a reduction in the computational resources needed for image processing. Second, this approach increases the total number of images to be processed in an efficient manner, extending the concept of equispaced and uniform extraction&#x2014;commonly applied to straight helical particles&#x2014;to more flexible, curved filaments. <xref ref-type="fig" rid="F8">Figure 8</xref> shows 2D averages computed with CryoSPARC software (<xref ref-type="bibr" rid="B25">Punjani et al., 2017</xref>) of RNP filaments (RNP helical central regions) obtained from particles picked using the software presented in this work, where it is recognizable secondary structure.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Two-dimensional averages of particles picked using the software presented in this work. All averages are obtained from 800 to 1,000 particles, secondary structure is visible in the nucleoprotein monomer. The scale bar represents 100 &#xc5;.</p>
</caption>
<graphic xlink:href="fmolb-11-1473609-g008.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s5">
<title>4 Discussion</title>
<p>As demonstrated by our results, our proposed method performs well and surpasses traditional template-matching pickers. In the localization of complete RNPs, our method has the distinct advantage of having practically zero false positives, whereas template-matching pickers are prone to mistakenly selecting ice contaminations and carbon edges. Surprisingly, increasing the cross-correlation threshold in template matching-based pickers, which theoretically should make the selected particles more closely resemble the templates, tends to result in the selection of incorrect higher-contrast features, picking up false positives representing contaminations and the edges of holes in the carbon. It is important to note that selecting false positives can greatly complicate all subsequent classification and image processing tasks aimed at determining the underlying structure. Compared to other neural network-based pickers, our approach also demonstrates good performance. Neural network-based particle pickers, such as Topaz and CrYOLO, surpass traditional template-matching methods in detection accuracy for localizing RNP filaments and ends. However, these methods require parameter fine-tuning, which can be labor-intensive. Without careful optimization, there is a higher risk of selecting false positives. Our approach seeks to address these challenges, potentially offering improvements in accuracy and efficiency. It is important to highlight that, although the CASSPER method is similarly based on semantic segmentation, it is not well-suited for selecting filamentous particles and their ends. CASSPER is specifically designed for picking globular proteins, as it focuses on estimating the centroids of automatically segmented protein regions.</p>
<p>Furthermore, our method substantially outperforms others in selecting the RNP ends. As previously discussed, the ends of RNPs contain the unique structure of the polymerase, and their study is of great importance, making it crucial to distinguish the ends from other regions of the RNP filaments (RNP helical central regions). In attempts to pick the RNP ends using traditional template-matching pickers, we used 2D averages of images of the RNP ends previously obtained through extensive manual picking followed by 2D particle classification and averaging. Although the templates clearly depicted RNP ends, the results from traditional picker methods were practically the same as those obtained using the central region of the helix as template. Moreover, increasing the threshold to select particles that most resembled the used template led to the same outcome as before, where mainly higher contrast regions were selected that did not correspond to filament ends, thus representing false positives. However, our method correctly selects the filament ends without selecting other filament regions or other false positives (contaminations, carbon edges, etc.), with nearly the same precision as manual picking by a human expert as shown in the result tables. These results suggest that traditional pickers are unable to distinguish between RNP ends and central filament regions, regardless of the template used. This likely occurs because the matching process relies more on the primary structure present in the image (the filament) rather than on the surrounding context, which truly differentiates between central and end regions. In contrast, our method performs exceptionally well in this respect. It is also important to note that our method is capable of selecting features or particles that are sparsely populated in the image (RNP ends), despite their strong resemblance to the majority feature (RNP center filament), with minimal error. This capability is important because it suggests that our picking system could be used to search for minority projections of molecular complexes, which is particularly valuable in structural studies facing the common problem in cryoEM of preferential views in sample preparations.</p>
<p>Our approach has other important advantages. This method does not require as input any prior 2D averages, nor knowledge of any particle data (neither estimated diameter nor minimum distance between particles, etc.). The need for prior knowledge of these parameters complicates the use of other pickers and makes them much more prone to errors if any of those estimates are not precise. Our approach only requires manual segmentation of a limited number of micrographs, overriding the need for parameter knowledge/estimation. Moreover, the typical most reliable way to obtain 2D averages for template matching pickers consist of manual picking on the input micrographs, extract the particles, and align them with existing software. This workflow corresponds to a considerable amount of work. Additionally, alignment software may perform suboptimally when provided with few particles coming from manual picking, leading to poor templates and worse results. All these limitations highlighted before can be overcome when analyzing particles that can be considered &#x201c;easy to pick&#x201d;, such as ribosomes, with work and prior experience. However, in challenging cases, these issues can become practically insurmountable, potentially leading to project failure due to poor picking quality. Therefore, although the proposed method has been specifically designed for the localization of the centers and ends of RNPs, we believe that this method holds potential beyond its initial scope. It could be highly beneficial in addressing other complex cases where traditional methods may fall short. Such cases include the picking of very flexible filamentous samples, the selective picking of specific regions within macromolecules, or, as mentioned previously, the picking of minority views that are challenging to identify with conventional techniques. This broader applicability suggests that our method could serve as a valuable tool in a variety of challenging scenarios in the field of image processing and analysis. We believe that in such difficult projects, our approach can significantly facilitate the particle selection task, thereby increasing the probability of success.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The source code is freely available under the terms of an open-source software license and can be downloaded from <ext-link ext-link-type="uri" xlink:href="https://github.com/1aviervargas/Semantic_Segmentation_Picker">https://github.com/1aviervargas/Semantic_Segmentation_Picker</ext-link>. The images used in the training and evaluation of the RNP-E network are available from <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/records/12922653">https://zenodo.org/records/12922653</ext-link>.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>JV: Conceptualization, Data curation, Formal Analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. AM: Data curation, Investigation, Methodology, Validation, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. HC: Investigation, Writing&#x2013;original draft, Writing&#x2013;review and editing. JM-B: Conceptualization, Data curation, Formal Analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. Authors want to acknowledge economical support from the Spanish Ministerio de Ciencia e Innovaci&#xf3;n, Grant PID2022-137548OB-I00 funded by MCIN/AEI/10.13039/501100011033/ and by ERDF A way of making Europe (to JV). Grant PID2020-117752RB-I00 financed by MCIU/AEI/10.13039/501100011033 and FEDER, UE (to JM-B) and grant TED2021-132748B-I00 financed by the European Union &#x201c;Next-Generation EU&#x201d;/PRTR (to JM-B and JV). CNB-CSIC acknowledges support from the Severo Ochoa Program for Centers of Excellence in R&#x26;D (CEX2023-001386-S).</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abrishami</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Zaldivar-Peraza</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>de la Rosa-Trevin</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Vargas</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Oton</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Marabini</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>A pattern matching approach to the automatic selection of particles from low-contrast electron micrographs</article-title>. <source>Bioinformatics</source> <volume>29</volume> (<issue>19</issue>), <fpage>2460</fpage>&#x2013;<lpage>2468</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt429</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Anger</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pieulle</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shahin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Valette</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Le Guenno</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kosta</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Structure of a heteropolymeric type 4 pilus from a monoderm bacterium</article-title>. <source>Nat. Commun.</source> <volume>14</volume> (<issue>1</issue>), <fpage>7143</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-023-42872-5</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arranz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Coloma</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Chichon</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Conesa</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Carrascosa</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Valpuesta</surname>
<given-names>J. M.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>The structure of native influenza virion ribonucleoproteins</article-title>. <source>Science</source> <volume>338</volume> (<issue>6114</issue>), <fpage>1634</fpage>&#x2013;<lpage>1637</lpage>. <pub-id pub-id-type="doi">10.1126/science.1228172</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bell</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Durmaz</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Fluty</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Ludtke</surname>
<given-names>S. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>New software tools in EMAN2 inspired by EMDatabank map challenge</article-title>. <source>J. Struct. Biol.</source> <volume>204</volume> (<issue>2</issue>), <fpage>283</fpage>&#x2013;<lpage>290</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2018.09.002</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bepler</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kelley</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Noble</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Berger</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Topaz-Denoise: general deep denoising models for cryoEM and cryoET</article-title>. <source>Nat. Commun.</source> <volume>11</volume> (<issue>1</issue>), <fpage>5208</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-020-18952-1</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bepler</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Morin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rapp</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brasch</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shapiro</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Noble</surname>
<given-names>A. J.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Positive-unlabeled convolutional neural networks for particle picking in cryo-electron micrographs</article-title>. <source>Nat. Methods</source> <volume>16</volume> (<issue>11</issue>), <fpage>1153</fpage>&#x2013;<lpage>1160</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0575-8</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Coloma</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Arranz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>de la Rosa-Trevin</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Sorzano</surname>
<given-names>C. O. S.</given-names>
</name>
<name>
<surname>Munier</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Carlero</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Structural insights into influenza A virus ribonucleoproteins reveal a processive helical track as transcription mechanism</article-title>. <source>Nat. Microbiol.</source> <volume>5</volume> (<issue>5</issue>), <fpage>727</fpage>&#x2013;<lpage>734</lpage>. <pub-id pub-id-type="doi">10.1038/s41564-020-0675-3</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Coloma</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Valpuesta</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Arranz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Carrascosa</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Ortin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Martin-Benito</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>The structure of a biologically active influenza virus ribonucleoprotein complex</article-title>. <source>PLoS Pathog.</source> <volume>5</volume> (<issue>6</issue>), <fpage>e1000491</fpage>. <pub-id pub-id-type="doi">10.1371/journal.ppat.1000491</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Danev</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yanagisawa</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kikkawa</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Cryo-electron microscopy methodology: current aspects and future directions</article-title>. <source>Trends Biochem. Sci.</source> <volume>44</volume> (<issue>10</issue>), <fpage>837</fpage>&#x2013;<lpage>848</lpage>. <pub-id pub-id-type="doi">10.1016/j.tibs.2019.04.008</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fitzpatrick</surname>
<given-names>A. W. P.</given-names>
</name>
<name>
<surname>Falcon</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Murzin</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Murshudov</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Garringer</surname>
<given-names>H. J.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Cryo-EM structures of tau filaments from Alzheimer&#x2019;s disease</article-title>. <source>Nature</source> <volume>547</volume> (<issue>7662</issue>), <fpage>185</fpage>&#x2013;<lpage>190</lpage>. <pub-id pub-id-type="doi">10.1038/nature23002</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>George</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Assaiya</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Roy</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Kembhavi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chauhan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Paul</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>CASSPER is a semantic segmentation-based particle picking algorithm for single-particle cryo-electron microscopy</article-title>. <source>Commun. Biol.</source> <volume>4</volume> (<issue>1</issue>), <fpage>200</fpage>. <pub-id pub-id-type="doi">10.1038/s42003-021-01721-1</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grant</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Rohou</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Grigorieff</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>cisTEM, user-friendly software for single-particle image processing</article-title>. <source>Elife</source> <volume>7</volume>, <fpage>e35383</fpage>. <pub-id pub-id-type="doi">10.7554/eLife.35383</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Scheres</surname>
<given-names>S. H. W.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Helical reconstruction in RELION</article-title>. <source>J. Struct. Biol.</source> <volume>198</volume> (<issue>3</issue>), <fpage>163</fpage>&#x2013;<lpage>176</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2017.02.003</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heimowitz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>And&#xe9;n</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Singer</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>APPLE picker: automatic particle picking, a low-effort cryo-EM framework</article-title>. <source>J. Struct. Biol.</source> <volume>204</volume> (<issue>2</issue>), <fpage>215</fpage>&#x2013;<lpage>227</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2018.08.012</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huber</surname>
<given-names>S. T.</given-names>
</name>
<name>
<surname>Kuhm</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sachse</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Automated tracing of helical assemblies from electron cryo-micrographs</article-title>. <source>J. Struct. Biol.</source> <volume>202</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2017.11.013</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kimanius</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sharov</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Nakane</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Scheres</surname>
<given-names>S. H. W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>New tools for automated cryo-EM single-particle analysis in RELION-4.0</article-title>. <source>Biochem. J.</source> <volume>478</volume> (<issue>24</issue>), <fpage>4169</fpage>&#x2013;<lpage>4185</lpage>. <pub-id pub-id-type="doi">10.1042/BCJ20210708</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kuhlbrandt</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Biochemistry. The resolution revolution</article-title>. <source>Science</source> <volume>343</volume> (<issue>6178</issue>), <fpage>1443</fpage>&#x2013;<lpage>1444</lpage>. <pub-id pub-id-type="doi">10.1126/science.1251652</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Kashyap</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Chu</surname>
<given-names>C. N.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>Building skeleton models via 3-D medial surface Axis thinning algorithms</article-title>. <source>CVGIP Graph. Models Image Process.</source> <volume>56</volume> (<issue>6</issue>), <fpage>462</fpage>&#x2013;<lpage>478</lpage>. <pub-id pub-id-type="doi">10.1006/cgip.1994.1042</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manka</surname>
<given-names>S. W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wenborn</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Betts</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Joiner</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Saibil</surname>
<given-names>H. R.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>2.7 &#xc5; cryo-EM structure of <italic>ex vivo</italic> RML prion fibrils</article-title>. <source>Nat. Commun.</source> <volume>13</volume> (<issue>1</issue>), <fpage>4004</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-022-30457-7</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Merk</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bartesaghi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Banerjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Falconieri</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Davis</surname>
<given-names>M. I.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Breaking cryo-EM resolution barriers to facilitate drug discovery</article-title>. <source>Cell</source> <volume>165</volume> (<issue>7</issue>), <fpage>1698</fpage>&#x2013;<lpage>1707</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2016.05.040</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moriya</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Saur</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Stabrin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Merino</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Voicu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>High-resolution single particle analysis from electron cryo-microscopy images using SPHIRE</article-title>. <source>J. Vis. Exp.</source> <volume>123</volume>, <fpage>55448</fpage>. <pub-id pub-id-type="doi">10.3791/55448</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nakane</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kotecha</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sente</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>McMullan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Masiulis</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Single-particle cryo-EM at atomic resolution</article-title>. <source>Nature</source> <volume>587</volume> (<issue>7832</issue>), <fpage>152</fpage>&#x2013;<lpage>156</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2829-0</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pospich</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Raunser</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>The molecular basis of Alzheimer&#x27;s plaques</article-title>. <source>Science</source> <volume>358</volume> (<issue>6359</issue>), <fpage>45</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1126/science.aap8002</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pospich</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Raunser</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Single particle cryo-EM &#x2014; an optimal tool to study cytoskeletal proteins</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>52</volume>, <fpage>16</fpage>&#x2013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2018.07.006</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Punjani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rubinstein</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Fleet</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Brubaker</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>cryoSPARC: algorithms for rapid unsupervised cryo-EM structure determination</article-title>. <source>Nat. methods</source> <volume>14</volume> (<issue>3</issue>), <fpage>290</fpage>&#x2013;<lpage>296</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.4169</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ronneberger</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Fischer</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Brox</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). <source>U-net: convolutional networks for biomedical image segmentation</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scheres</surname>
<given-names>S. H.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>RELION: implementation of a Bayesian approach to cryo-EM structure determination</article-title>. <source>J. Struct. Biol.</source> <volume>180</volume> (<issue>3</issue>), <fpage>519</fpage>&#x2013;<lpage>530</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2012.09.006</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scheres</surname>
<given-names>S. H.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Semi-automated selection of cryo-EM particles in RELION-1.3</article-title>. <source>J. Struct. Biol.</source> <volume>189</volume> (<issue>2</issue>), <fpage>114</fpage>&#x2013;<lpage>122</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2014.11.010</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scheres</surname>
<given-names>S. H. W.</given-names>
</name>
<name>
<surname>Ryskeldi-Falcon</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Goedert</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Molecular pathology of neurodegenerative diseases by cryo-EM of amyloids</article-title>. <source>Nature</source> <volume>621</volume> (<issue>7980</issue>), <fpage>701</fpage>&#x2013;<lpage>710</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-023-06437-2</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schneider</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Rasband</surname>
<given-names>W. S.</given-names>
</name>
<name>
<surname>Eliceiri</surname>
<given-names>K. W.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>NIH Image to ImageJ: 25 years of image analysis</article-title>. <source>Nat. Methods</source> <volume>9</volume> (<issue>7</issue>), <fpage>671</fpage>&#x2013;<lpage>675</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.2089</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Murzin</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Falcon</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kotecha</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Structure-based classification of tauopathies</article-title>. <source>Nature</source> <volume>598</volume> (<issue>7880</issue>), <fpage>359</fpage>&#x2013;<lpage>363</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-021-03911-7</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Baldwin</surname>
<given-names>P. R.</given-names>
</name>
<name>
<surname>Mann</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Rees</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>EMAN2: an extensible image processing suite for electron microscopy</article-title>. <source>J. Struct. Biol.</source> <volume>157</volume> (<issue>1</issue>), <fpage>38</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2006.05.009</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tegunov</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Cramer</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Real-time cryo-electron microscopy data preprocessing with Warp</article-title>. <source>Nat. Methods</source> <volume>16</volume> (<issue>11</issue>), <fpage>1146</fpage>&#x2013;<lpage>1152</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0580-y</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thurber</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tycko</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Automated picking of amyloid fibrils from cryo-EM images for helical reconstruction with RELION</article-title>. <source>J. Struct. Biol.</source> <volume>213</volume> (<issue>2</issue>), <fpage>107736</fpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2021.107736</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Voss</surname>
<given-names>N. R.</given-names>
</name>
<name>
<surname>Yoshioka</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Radermacher</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Potter</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Carragher</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>DoG Picker and TiltPicker: software tools to facilitate particle selection in single particle electron microscopy</article-title>. <source>J. Struct. Biol.</source> <volume>166</volume> (<issue>2</issue>), <fpage>205</fpage>&#x2013;<lpage>213</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2009.01.004</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wagner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lusnig</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pospich</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Stabrin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sch&#xf6;nfeld</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Raunser</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Two particle-picking procedures for filamentous proteins: SPHIRE-crYOLO filament mode and SPHIRE-STRIPER</article-title>. <source>Acta Crystallogr. Sect. D. Struct. Biol.</source> <volume>76</volume> (<issue>7</issue>), <fpage>613</fpage>&#x2013;<lpage>620</lpage>. <pub-id pub-id-type="doi">10.1107/S2059798320007342</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wagner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Merino</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Stabrin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Moriya</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Antoni</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Apelbaum</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>SPHIRE-crYOLO is a fast and accurate fully automated particle picker for cryo-EM</article-title>. <source>Commun. Biol.</source> <volume>2</volume> (<issue>1</issue>), <fpage>218</fpage>. <pub-id pub-id-type="doi">10.1038/s42003-019-0437-z</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yip</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Fischer</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Paknia</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Chari</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Stark</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Atomic-resolution protein structure determination by cryo-EM</article-title>. <source>Nature</source> <volume>587</volume> (<issue>7832</issue>), <fpage>157</fpage>&#x2013;<lpage>161</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-020-2833-4</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Gctf: real-time CTF determination and correction</article-title>. <source>J. Struct. Biol.</source> <volume>193</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2015.11.003</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>S. Q.</given-names>
</name>
<name>
<surname>Palovcak</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Armache</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Verba</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Agard</surname>
<given-names>D. A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>MotionCor2: anisotropic correction of beam-induced motion for improved cryo-electron microscopy</article-title>. <source>Nat. Methods</source> <volume>14</volume> (<issue>4</issue>), <fpage>331</fpage>&#x2013;<lpage>332</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.4193</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zivanov</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Nakane</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Forsberg</surname>
<given-names>B. O.</given-names>
</name>
<name>
<surname>Kimanius</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hagen</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Lindahl</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>New tools for automated high-resolution cryo-EM structure determination in RELION-3</article-title>. <source>Elife</source> <volume>7</volume>, <fpage>e42166</fpage>. <pub-id pub-id-type="doi">10.7554/eLife.42166</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>