<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="review-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mol. Biosci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Molecular Biosciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mol. Biosci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-889X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1613399</article-id>
<article-id pub-id-type="doi">10.3389/fmolb.2025.1613399</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Review</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Connecting the dots: deep learning-based automated model building methods in cryo-EM</article-title>
<alt-title alt-title-type="left-running-head">Bansia and des Georges</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmolb.2025.1613399">10.3389/fmolb.2025.1613399</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Bansia</surname>
<given-names>Harsh</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3038549"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x26; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>des Georges</surname>
<given-names>Amedee</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/252429"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x26; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Department of Molecular Pathobiology, NYU College of Dentistry</institution>, <city>New York</city>, <state>NY</state>, <country country="US">United States</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>NYU Pain Research Center</institution>, <city>New York</city>, <state>NY</state>, <country country="US">United States</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Harsh Bansia, <email xlink:href="hb2880@nyu.edu">hb2880@nyu.edu</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-11">
<day>11</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1613399</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>27</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Bansia and des Georges.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Bansia and des Georges</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-11">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>The resolution revolution in single particle cryo-electron microscopy (cryo-EM) has dramatically expanded our structural knowledge of large biomolecular complexes. While high-resolution cryo-EM density maps enable atomic model building, lower-resolution maps can still reveal secondary structures, folds, and domains. When combined with integrative modeling approaches, such data can provide meaningful insights into biomolecular structure and function. Constructing accurate models, however, remains challenging: at low resolutions it is difficult to interpret density maps features reliably, and at high resolutions traditional model-building workflows can become a time-consuming bottleneck. Deep learning, which is transforming problem-solving across scientific domains, offers powerful new tools to automate and accelerate this process. In this review, we discuss deep learning-based methods developed to automate model building in cryo-EM density maps, assessing their impact on streamlining structure determination. Recognizing that biomacromolecular structures exhibit hierarchical organization, we classify these methods according to their ability to model primary, secondary, tertiary, and quaternary structures of biomolecules. Deep learning tools for building atomic models in cryo-EM density maps are further grouped as <italic>de novo</italic>, where the model is predicted directly from features learned from the cryo-EM density, or hybrid, where it is derived by integrating structural templates with these features. We outline current limitations, including the challenge of obtaining sufficiently large and diverse datasets for training networks to model different types of biomolecules in the cryo-EM density maps, and the open challenge of constructing training sets that capture the conformational heterogeneity often observed in the cryo-EM maps. We conclude by highlighting emerging directions for this rapidly advancing field, which promise to make automated, data-driven model building an integral part of structural biology.</p>
</abstract>
<kwd-group>
<kwd>drug-discovery</kwd>
<kwd>model building</kwd>
<kwd>structural biology</kwd>
<kwd>cryo-EM atomic models</kwd>
<kwd>deep neural network</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>National Institute of General Medical Sciences</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/100000057</institution-id>
</institution-wrap>
</funding-source>
<award-id rid="sp1">GM133598</award-id>
</award-group>
<funding-statement>The author(s) declare that financial support was received for the research and/or publication of this article. NIH R35GM133598 and NYU startup grant to AdG and NYU postdoctoral research and professional development support grant to HB.</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="18"/>
<equation-count count="1"/>
<ref-count count="203"/>
<page-count count="43"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Structural Biology</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Prominent architect Louis Sullivan coined the phrase &#x201c;<italic>form follows function</italic>&#x201d; asserting that an object&#x2019;s shape should primarily relate to its intended purpose. He applied this design philosophy to pioneer the modern skyscraper. The design principle quickly found applications in other domains besides architecture, including automobile design, product design, software engineering, and even describes the hierarchy of biological organization at cellular, tissue, organ, individual, and ecosystem levels. At the molecular level &#x201c;<italic>form follows function</italic>&#x201d; implies how a particular arrangement of atoms, residues, and higher order motifs in a biomacromolecule allows it to perform a specific function, e.g., enzyme active-site catalysis, membrane protein assisted transport of solutes, flow of genetic information, underscoring the need to accurately determine macromolecular structure to elucidate its function (<xref ref-type="bibr" rid="B21">Carugo and Djinovi&#x107;-Carugo, 2023</xref>). Unlike skyscrapers, which are static, biomolecules can dynamically adapt their <italic>forms</italic> by spatially rearranging their constituent elements to perform a diverse array of <italic>functions</italic> in rapidly changing cellular environments, the basis for allosteric mechanisms (<xref ref-type="bibr" rid="B8">Astore et al., 2024</xref>).</p>
<p>Structure determination of protein&#x2013;protein and protein&#x2013;nucleic acid complexes is crucial for understanding cellular processes and developing therapeutics for challenging disorders (<xref ref-type="bibr" rid="B7">Anderson, 2003</xref>; <xref ref-type="bibr" rid="B49">de Oliveira et al., 2021</xref>; <xref ref-type="bibr" rid="B12">Bansia et al., 2021</xref>). Single-particle cryo-electron microscopy (cryo-EM) has become the method of choice for determining high-resolution structures of biomacromolecules, ranging from small proteins to large complex assemblies in their native context (<xref ref-type="bibr" rid="B94">K&#xfc;hlbrandt, 2014</xref>; <xref ref-type="bibr" rid="B129">Nogales, 2016</xref>; <xref ref-type="bibr" rid="B48">de la Cruz and Eng, 2023</xref>; <xref ref-type="bibr" rid="B37">Chua et al., 2022</xref>). A three-dimensional <bold>(3D) density map</bold> in cryo-EM is represented as a 3D grid, with each <bold>voxel</bold> assigned to a density value. A <bold>voxel</bold> is the 3D equivalent of a two-dimensional (2D) pixel and represents a value in a 3D grid, analogous to how a pixel represents a value in a 2D grid. A 3D cryo-EM density map is reconstructed from thousands of 2D projection images of randomly oriented, flash-frozen biomolecules (single particles) embedded in vitreous ice, providing an approximation of the electron scattering potential of the biomolecule (<xref ref-type="bibr" rid="B178">Wang and Moore, 2017</xref>; <xref ref-type="bibr" rid="B118">Marques et al., 2019</xref>). These density maps provide the structural framework for building and refining models of biomacromolecules, revealing molecular mechanisms and conformational states that may be difficult to capture with other structural techniques (<xref ref-type="bibr" rid="B23">Chang et al., 2021</xref>). The structure of biomacromolecules is often described in terms of hierarchical levels of complexity and organization ranging from local, regular secondary structures such as &#x3b2;-sheets and &#x3b1;-helices to global, complex 3D arrangement of atoms capturing interactions between regions that may be far apart in the primary structure. The resolution of the density map determines the level of detail, from overall molecular shapes and secondary structures at lower resolutions to 3D arrangement of individual residues and atomic features at near-atomic resolution. Model building involves the construction of plausible models of biomacromolecule within the interpretable density of the map while maintaining correct chemical, stereochemical, and geometrical properties of the biomacromolecule. The level of detail achievable in the model depends on the map resolution. Therefore, successful model building ultimately depends on the quality and resolution of the density maps. At modest to lower resolutions, model building becomes increasingly challenging, often leading to errors in models deposited in the Protein Data Bank (PDB) (<xref ref-type="bibr" rid="B190">Zardecki et al., 2016</xref>; <xref ref-type="bibr" rid="B138">Pintilie, 2023</xref>; <xref ref-type="bibr" rid="B147">Reggiano et al., 2023</xref>; <xref ref-type="bibr" rid="B44">Croll et al., 2021</xref>). Moreover, as the biomolecule studied by cryo-EM grow in size and complexity, a now common situation, maps frequently contain densities corresponding to regions without homologous templates in the PDB. This lack of structural precedent makes <italic>de novo</italic> atomic model reconstruction particularly difficult and time-consuming (<xref ref-type="bibr" rid="B39">Cianfrocco and Kellogg, 2020</xref>). <xref ref-type="fig" rid="F1">Figure 1</xref> shows that as of the end of the year 2024, the Electron Microscopy Data Bank (EMDB) (<xref ref-type="bibr" rid="B41">Consortium, 2023</xref>; <xref ref-type="bibr" rid="B96">Lawson et al., 2016</xref>) contained 9,329 entries, of which 6,877 (73.7%) fell within the model building range (0&#x2013;4.0 &#xc5;). However, only 5,791 entries had corresponding atomic models deposited in the PDB, representing just 62.1% of all EMDB submissions and 84.2% of those in the 0&#x2013;4.0 &#xc5; resolution range (<xref ref-type="bibr" rid="B96">Lawson et al., 2016</xref>) (<xref ref-type="fig" rid="F1">Figure 1</xref>). Similar trend can be observed for the year 2025 where deposited models (5,571) in PDB corresponds to 62.6% (8,893) of all EMDB submissions and 83.2% (6,696) of those in the 0&#x2013;4.0 &#xc5; resolution range (<xref ref-type="bibr" rid="B96">Lawson et al., 2016</xref>) (<xref ref-type="fig" rid="F1">Figure 1</xref>). While the reasons for this discrepancy may be multifaceted, it underscores the persistent challenge in generating complete atomic models even for high-resolution data. As such, accurate model building of target macromolecular assemblies from density maps is one of the most demanding tasks in any structure determination pipeline, requiring labor-intensive manual inputs reliant on complex judgements, clearly necessitating development of advanced model building tools in structural biology.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>
<bold>(A)</bold> Yearly distribution of total EMDB entries and associated PDB entries for the last decade. <bold>(B)</bold> Distribution of EMDB entries by resolution per year. Data sourced from EMDataResource (<xref ref-type="bibr" rid="B96">Lawson et al., 2016</xref>).</p>
</caption>
<graphic xlink:href="fmolb-12-1613399-g001.tif">
<alt-text content-type="machine-generated">(A) Bar graph showing the yearly entries from 2014 to 2025 for EMDB and PDB. Both datasets show a marked increase each year. (B) Bar graph depicting EMDB entries from 2014 to 2025 by resolution categories: 0 to 4 &#xC5;, 4 to 5 &#xC5;, 5 to 10 &#xC5;, and over 10 &#xC5;.</alt-text>
</graphic>
</fig>
<p>Although methods for reconstructing models (<xref ref-type="bibr" rid="B174">Terwilliger et al., 2018</xref>; <xref ref-type="bibr" rid="B11">Baker et al., 2011</xref>; <xref ref-type="bibr" rid="B111">Lindert et al., 2009</xref>; <xref ref-type="bibr" rid="B52">DiMaio et al., 2011</xref>; <xref ref-type="bibr" rid="B170">Terashi and Kihara, 2018</xref>), which iteratively optimize and refine structures by minimizing an energy function, have accelerated cryo-EM structure determination, they often require expert, informed manual input to derive accurate models. Traditional machine learning algorithms have been used to develop methods aimed at automating structure determination from cryo-EM density maps. These approaches are generally rule-based or rely on statistical learning techniques to guide macromolecular modeling from cryo-EM maps (<xref ref-type="bibr" rid="B159">Si et al., 2012</xref>; <xref ref-type="bibr" rid="B113">Lingyu et al., 2012</xref>; <xref ref-type="bibr" rid="B27">Chen et al., 2016</xref>). SSELearner (<xref ref-type="bibr" rid="B159">Si et al., 2012</xref>) automatically identifies helices and &#x3b2;-strands by using <italic>Support Vector Machine</italic> (SVM) to classify voxels in cryo-EM density maps. RENNSH (<xref ref-type="bibr" rid="B113">Lingyu et al., 2012</xref>) only identifies &#x3b1;-helices using nested <italic>K Nearest Neighbors</italic> (KNN) classifiers to distinguish helix from non-helix voxels. Pathwalking (<xref ref-type="bibr" rid="B27">Chen et al., 2016</xref>) combines <italic>k-means clustering</italic>, an unsupervised machine learning algorithm, with a Traveling Salesman Problem (TSP) solver, a combinatorial optimization algorithm, to automatically trace the protein backbone directly from cryo-EM maps at 3.0&#x2013;6.0 &#xc5; resolution. However, the above methods are mostly limited to either identifying secondary structure elements or tracing the minimal protein backbone, thereby highlighting the need for effective atomic model-building methods from cryo-EM density maps to provide a complete modeling solution.</p>
<p>The resolution revolution in cryo-EM, driven by hardware and software advances, has led to the generation of large number of high-resolution density maps (<xref ref-type="bibr" rid="B48">de la Cruz and Eng, 2023</xref>; <xref ref-type="bibr" rid="B37">Chua et al., 2022</xref>; <xref ref-type="bibr" rid="B32">Cheng, 2018</xref>). These advances, combined with parallel developments in computational hardware, software, and machine learning methodologies, have facilitated the development of deep learning-based tools to automate critical steps in the cryo-EM structure determination workflow. Deep learning is a branch of machine learning that employs artificial neural networks, modeled after biological neural networks, for prediction and classification tasks, with the term &#x2018;deep&#x2019; referring to the use of multiple hidden layers in the network (<xref ref-type="bibr" rid="B70">Greener et al., 2022</xref>). Deep learning has revolutionized problem solving across science (<xref ref-type="bibr" rid="B98">LeCun et al., 2015</xref>), exemplified by AlphaFold2&#x2019;s Nobel Prize winning success in protein structure prediction (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>). Within the cryo-EM workflow itself, deep learning has already significantly improved steps like particle picking, 3D density map reconstruction and structural heterogeneity analysis (<xref ref-type="bibr" rid="B161">Si et al., 2022</xref>; <xref ref-type="bibr" rid="B51">DiIorio and Kulczyk, 2023</xref>; <xref ref-type="bibr" rid="B59">Farheen et al., 2025</xref>). Building on this momentum, a new wave of deep learning methods now targets the critical challenge of automating reconstruction of models from cryo-EM density maps (<xref ref-type="bibr" rid="B107">Li S. et al., 2025</xref>; <xref ref-type="bibr" rid="B65">Giri et al., 2023</xref>; <xref ref-type="bibr" rid="B196">Zhang C. et al., 2025</xref>; <xref ref-type="bibr" rid="B162">Si et al., 2023</xref>) &#x2013; the focus of this review.</p>
<p>We begin with a brief primer on key aspects of biomolecular structures, followed by an overview of the fundamentals of deep neural networks, with the aim of providing a general and accessible introduction for readers from diverse fields to the terminology and concepts used throughout the review. We then examine approximately 50 deep learning-based tools for modeling proteins, nucleic acids, and protein-nucleic acid complexes in cryo-EM density maps, analyzing their underlying architectures to identify common conceptual strategies, outlining their main stages from data preprocessing and training to feature learning from cryo-EM density maps, model building, and subsequent refinement. Recognizing the hierarchical organization of biomolecular structures, we classify these tools according to their ability to model the primary, secondary, tertiary, and quaternary structures of biomolecules. Tools for building atomic models in cryo-EM density maps are further grouped as <italic>de novo</italic>, where the atomic model is predicted directly from features learned from the cryo-EM density, or hybrid, where it is derived by integrating structural templates with these features. Subsequent sections address the assessment and validation of structural models built by these tools in cryo-EM density maps, the availability and applications of these tools, and their current limitations and potential future improvements. We complement the sections with figures and tables summarizing, for each tool, its training datasets, neural network architecture, prediction tasks, the types of biomolecules it builds, and its availability as servers or publicly accessible code. Our comprehensive review of deep learning-based methods for automated model building in cryo-EM density maps provides a timely, useful, and one-stop resource for structural biologists, researchers interested in applying these methods in cryo-EM workflows, and method developers aiming to design next-generation model-building tools.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>A primer on biomacromolecular structure</title>
<p>Biomacromolecules are large, complex molecules typically formed by the polymerization of smaller repeating units called monomers. In biological systems, biomacromolecules are essential for life and most commonly include proteins, nucleic acids and carbohydrates. Nucleic acids - deoxyribonucleic acid (DNA) and ribonucleic acid (RNA) - store and transmit the genetic information essential for life. Proteins are the workhorses of biological systems, playing critical roles in virtually every cellular process including catalyzing biochemical reactions as enzymes, providing structural support, transporting molecules, and regulating cellular processes. The structure of biomacromolecules is often described in terms of hierarchical levels of complexity and organization. For proteins and nucleic acids, these are commonly referred to as <bold>primary, secondary, tertiary</bold>, and sometimes <bold>quaternary structures</bold>.</p>
<p>Proteins are linear biopolymers of repeating units called <bold>amino acids</bold>. There are 20 different types of amino acids commonly found in proteins. An amino acid has a central <bold>&#x3b1;-carbon (C&#x3b1;)</bold> atom to which four different groups are covalently attached: an amino group (-NH<sub>2</sub>), a carboxylic acid group (-COOH), a hydrogen atom (-H), and a <bold>side chain</bold> (-R group) unique to each amino acid. The side chain imparts distinct physicochemical properties to the amino acids (such as polar, non-polar, aromatic, aliphatic, acidic, basic) and thereby contributes to structural and functional diversity in proteins. Successive amino acids in a protein are covalently linked by <bold>peptide bonds</bold>, formed between the carboxyl group of one amino acid and the amino group of the next, creating the <bold>main chain or backbone (repeating -N-C&#x3b1;-C-)</bold>, to which distinctive side chains are attached (<xref ref-type="fig" rid="F2">Figure 2A</xref>). <bold>Polypeptides</bold> are polymers of a series of amino acids linked by peptide bonds, and each amino acid within a polypeptide chain is referred to as a <bold>residue</bold> (<xref ref-type="fig" rid="F2">Figure 2A</xref>). Proteins consist of one or more polypeptide chains. A polypeptide chain has directionality, with an amino-terminal (N-terminal), where the terminal residue has a free amino group, and a carboxy-terminal (C-terminal), where the terminal residue has a free carboxyl group. By convention, the sequence of amino acids in a polypeptide is written starting with the N-terminal residue. In successive amino acid residues within a polypeptide, the peptide bond is planar, restricting free rotation around the C&#x2013;N bond. However, free rotation is permitted around the N&#x2013;C&#x3b1; and C&#x3b1;&#x2013;C bonds, which are quantified by the <bold>backbone dihedral (torsion) angles &#x3c6; (phi) and &#x3c8; (psi)</bold>, respectively (<xref ref-type="fig" rid="F2">Figure 2A</xref>). These angles define the backbone conformation of a polypeptide, allowing proteins to fold into a variety of distinct structures. The polypeptide backbone is rich in hydrogen-bonding potential because each peptide bond contains both a hydrogen-bond donor (the NH group) and a hydrogen-bond acceptor (the CO group), allowing the backbone to stabilize structural elements within a protein. The <bold>primary structure</bold> of a protein refers to its linear <bold>sequence</bold> of amino acid <bold>residues</bold>, defined by the peptide bonds linking them along the polypeptide chain(s) (<xref ref-type="fig" rid="F2">Figure 2A</xref>). For any given segment of a polypeptide chain, <bold>secondary structure</bold> refers to the local spatial arrangement of its main-chain (backbone) atoms. The secondary structure of a polypeptide segment is completely determined when the backbone dihedral angles, &#x3c6; (phi) and &#x3c8; (psi), are known for each residue. When these angles adopt a repeating, consistent value throughout the segment, a recurring, periodic structural pattern arises, leading to regular secondary structures such as <bold>&#x3b1;-helices</bold> and <bold>&#x3b2;-strands</bold> (<xref ref-type="fig" rid="F2">Figure 2B</xref>). In an &#x3b1;-helix, the polypeptide backbone adopts a right-handed helical conformation, with amino acid side chains projecting outward from the helical axis. The &#x3b1;-helix is stabilized by intra-helix hydrogen bonds between the backbone carbonyl oxygen of residue <italic>i</italic> and the backbone amide hydrogen of residue <italic>i&#x2b;4</italic> (<xref ref-type="fig" rid="F2">Figure 2B</xref>). Compared to an &#x3b1;-helix, a &#x3b2;-strand represents an extended conformation of the polypeptide backbone. &#x3b2;-Sheets are composed of two or more &#x3b2;-strands arranged side-by-side in an antiparallel or parallel manner, having opposite or the same amino-to-carboxyl orientations, respectively (<xref ref-type="fig" rid="F2">Figure 2B</xref>). In &#x3b2;-strands, the side chains alternate above and below the plane of the backbone, giving &#x3b2;-sheets their characteristic pleated appearance. Unlike the intra-helix backbone hydrogen bonding in &#x3b1;-helices, &#x3b2;-sheets are stabilized by inter-strand hydrogen bonds between the backbones of adjacent &#x3b2;-strands (<xref ref-type="fig" rid="F2">Figure 2B</xref>). Random coils are examples of secondary structures in which no regular pattern exists. Turns or loops are secondary structure elements that connect successive runs of &#x3b1;-helices or &#x3b2;-strands, often facilitating a reversal of the polypeptide chain direction. An example is a &#x3b2;-turn, which connects the ends of two adjacent &#x3b2;-strands in an antiparallel &#x3b2;-sheet. Unlike secondary structure, which defines the local spatial arrangement of adjacent amino acid residues, <bold>tertiary structure</bold> describes the overall three-dimensional folding of the entire polypeptide chain, including interactions between residues that may be far apart in the primary structure and between different secondary structure elements (<xref ref-type="fig" rid="F2">Figure 2E</xref>). Thus, tertiary structure captures long-range interactions. Secondary structure elements such as &#x3b1;-helices and &#x3b2;-sheets can combine in specific ways through connecting segments to form supersecondary structures, or motifs, such as the helix&#x2013;turn&#x2013;helix. Tertiary structure is characterized by distinct overall folding patterns, or folds, that describe the three-dimensional arrangement of these motifs within a single polypeptide chain. A protein <bold>domain</bold> is a region of a polypeptide chain that can fold independently into a stable structure and can often move as a single unit relative to the rest of the chain (<xref ref-type="fig" rid="F2">Figure 2E</xref>). In large proteins, different domains may have distinct structural and functional properties. Many proteins are composed of more than one polypeptide chain, identical or different, associated non-covalently or through covalent disulfide bonds. Each polypeptide chain is called a subunit, and such proteins are referred to as multisubunit or multimeric proteins (<xref ref-type="fig" rid="F2">Figure 2E</xref>). The spatial arrangement and interactions of these subunits define <bold>quaternary structure</bold>, which can range from simple dimers composed of identical subunits to large complexes containing many different subunits (<xref ref-type="fig" rid="F2">Figure 2E</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Schematic illustrating the hierarchical organization of biomolecular structures (proteins and nucleic acids), as described in <xref ref-type="sec" rid="s2">Section 2</xref>. <bold>(A,C)</bold> Primary structure. <bold>(B,D)</bold> Secondary structure. <bold>(E)</bold> Tertiary and quaternary structure. The illustration was generated in PyMOL (<xref ref-type="bibr" rid="B50">DeLano and Lam, 2005</xref>) using the cryo-EM structure of AsCas12f-sgRNA-target DNA ternary complex (PDB ID: 8J12).</p>
</caption>
<graphic xlink:href="fmolb-12-1613399-g002.tif">
<alt-text content-type="machine-generated">Diagram illustrating the primary, secondary, tertiary and quaternary structures of proteins and nucleic acids. Protein structures are shown as polypeptides, alpha-helices, and beta-sheets. Nucleic acids are depicted as polynucleotides, DNA double helixes, and RNA stem-loops. The tertiary and quaternary structure shows a complex of protein-DNA/RNA with domains and subunits.</alt-text>
</graphic>
</fig>
<p>Nucleic acids - deoxyribonucleic acid (DNA) and ribonucleic acid (RNA) - are linear biopolymers of nucleotides. A nucleotide consists of three components&#x2013;a pentose <bold>sugar</bold>, a nitrogenous (nitrogen-containing) <bold>base</bold>, and a <bold>phosphate</bold> group (<xref ref-type="fig" rid="F2">Figure 2C</xref>). In RNA the sugar is D-ribose whereas in DNA the sugar is 2&#x2032;-deoxy-D-ribose which lacks the 2&#x2032;-hydroxyl group (<xref ref-type="fig" rid="F2">Figure 2D</xref>). In a nucleotide, the phosphate group is attached to the 5&#x2032;carbon of sugar through a phosphoester bond, whereas the nitrogenous base is covalently attached to the 1&#x2032;carbon of sugar via a glycosidic bond (<xref ref-type="fig" rid="F2">Figure 2C</xref>). A nucleotide without a phosphate group is called a nucleoside. In nucleic acids, there are five major nitrogenous bases, which are derivatives of either purines or pyrimidines (<xref ref-type="fig" rid="F2">Figure 2D</xref>). Adenine (A) and Guanine (G) are the major purine bases in both RNA and DNA (<xref ref-type="fig" rid="F2">Figure 2D</xref>). Among the three pyrimidine bases, Uracil (U) occurs in RNA, Thymine (T) is found in DNA while Cytosine (C) is present in both DNA and RNA (<xref ref-type="fig" rid="F2">Figure 2D</xref>). The successive nucleotides in nucleic acids are covalently linked through <bold>phosphodiester linkages</bold> where phosphate group bridges the 3&#x2032;and 5&#x2032;positions of successive sugar moieties in the adjacent nucleotides, forming the sugar-phosphate backbone to which nitrogenous bases are attached as side groups (<xref ref-type="fig" rid="F2">Figure 2C</xref>). Nucleotides as part of nucleic acids are referred to as <bold>nucleotide residues</bold>. The <bold>primary structure</bold> of a nucleic acid refers to its linear sequence of nucleotide residues, distinguished by their nitrogenous bases, and defined by the phosphodiester bonds linking them along the polynucleotide chain. Polynucleotide chains in nucleic acids have directionality, with the 5&#x2032;end having a free phosphate group attached to the 5&#x2032;carbon of the terminal sugar, while the 3&#x2032;end having a free hydroxyl group on the 3&#x2032;carbon of the terminal sugar (<xref ref-type="fig" rid="F2">Figures 2C,D</xref>). The <bold>secondary structure</bold> of a nucleic acid refers to the regular, stable structures like double helices, hairpins, and loops formed by hydrogen bonding patterns between nitrogenous bases within a single polynucleotide chain or between two such chains. The most common form of DNA, known as B-DNA, consists of two helical polynucleotide chains wound around a common axis to form a right-handed double helix, with the nitrogenous bases positioned on the inside and the sugar&#x2013;phosphate backbone on the outside of the double helix (<xref ref-type="fig" rid="F2">Figure 2D</xref>). The two chains of the double helix have opposite directionality making them antiparallel and are held together by hydrogen bonding between complementary base pairs: A pairs with T and G pairs with C (<xref ref-type="fig" rid="F2">Figure 2D</xref>). This specific base pairing ensures that the two strands are complementary: whenever G occurs in one chain, C is found in the other and likewise for A and T. RNA molecules are mostly single-stranded and can fold into local double-helical regions or stem loop secondary structures, also referred to as hairpin loops (<xref ref-type="fig" rid="F2">Figure 2D</xref>). Similar to DNA, G pairs with C, and A pairs with U in RNA. Self-complementary sequences in RNA form hairpin loops, consisting of a complementary base-paired stem and an unpaired loop at the end (<xref ref-type="fig" rid="F2">Figure 2D</xref>). The <bold>tertiary structure</bold> of a nucleic acid refers to the three-dimensional folding and spatial arrangement of its secondary structure elements, such as helices and hairpin loops, resulting in complex overall shapes (<xref ref-type="fig" rid="F2">Figure 2E</xref>). Common examples include supercoiled cellular DNA and tRNA (transfer RNA).</p>
</sec>
<sec id="s3">
<label>3</label>
<title>A primer on deep neural networks (DNNs)</title>
<p>This primer aims to provide a concise introduction to deep learning concepts and neural network architectures employed in automated model-building methods for cryo-EM. For a more comprehensive understanding of machine learning concepts, readers are referred to the reference (<xref ref-type="bibr" rid="B70">Greener et al., 2022</xref>).</p>
<sec id="s3-1">
<label>3.1</label>
<title>Artificial neurons and artificial neural networks</title>
<p>
<bold>Artificial neural networks (ANNs)</bold>, inspired by biological neural networks, are universal function approximators and can learn to model any mathematical function to a desired degree of accuracy (<xref ref-type="bibr" rid="B70">Greener et al., 2022</xref>). The fundamental information processing units of ANNs are interconnected <bold>artificial neurons</bold>, also inspired by biological neurons. A single artificial neuron is a mathematical function that processes and transforms input data (<xref ref-type="fig" rid="F3">Figure 3A</xref>). Each neuron receives one or more inputs (<italic>x</italic>
<sub>
<italic>i</italic>
</sub>), multiplies each input by a corresponding learnable weight term (<italic>w</italic>
<sub>
<italic>i</italic>
</sub>), sums these weighted inputs, adds a learnable bias term (<italic>b</italic>), and passes the result through a non-linear activation function (<italic>f</italic>) to produce its output (<italic>y</italic>) (<xref ref-type="fig" rid="F3">Figure 3A</xref>) (<xref ref-type="bibr" rid="B70">Greener et al., 2022</xref>). In other words, an artificial neuron computes a non-linear function of a weighted sum of its inputs.<disp-formula id="equ1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>
<bold>(A)</bold> Illustration of an artificial neuron showing its inputs, weights, bias, activation function, and output. Schematic overview of representative neural network architectures used by deep learning tools for model building in cryo-EM: <bold>(B)</bold> Multilayer perceptron, <bold>(C)</bold> Convolutional neural network, <bold>(D)</bold> U-Net, <bold>(E)</bold> Graph neural network, and <bold>(F)</bold> Recurrent neural network. These architectures are described in <xref ref-type="sec" rid="s3">Section 3</xref>.</p>
</caption>
<graphic xlink:href="fmolb-12-1613399-g003.tif">
<alt-text content-type="machine-generated">(A) Diagram of an artificial neuron showing inputs, weights, bias, nonlinear activation function, and output. (B) Multilayer perceptron with input, hidden, and output layers depicted. (C) Convolutional neural network illustrating 3D convolution. (D) U-Net architecture demonstrating downsampling and upsampling paths with skip connections. (E) Graph neural network showing nodes, edges, and message passing operation. (F) Recurrent neural network highlighting input, hidden state, and output layers.</alt-text>
</graphic>
</fig>
<p>
<bold>Weights</bold> control the strength or importance of the connections between neurons, and the <bold>bias</bold> allows the activation function to shift its output, enabling the model to learn diverse patterns. The <bold>activation function</bold> introduces non-linearity, transforming the linear operations (weighted sums and biases) into non-linear ones. This allows the network to model complex relationships and effectively act as a universal function approximator. Non-linear patterns are inherent in most real-world data, and activation functions enable neural networks to capture this non-linear structure. In an ANN, artificial neurons are arranged in layers, with neurons in one layer connected to neurons in adjacent layers, while neurons within a layer does not communicate with each other. Layers correspond to different stages of computation in a neural network with the output of one layer serving as the input to the next.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Neural network architecture</title>
<p>A basic network typically consists of three types of layers: an input layer, one or more hidden layers, and an output layer (<xref ref-type="fig" rid="F3">Figure 3B</xref>). <bold>Deep learning</bold> (DL) is a branch of machine learning that employs ANNs for prediction and classification tasks, with the term &#x2018;<bold>deep</bold>&#x2019; referring to the use of <bold>multiple hidden layers</bold> in the network (<xref ref-type="fig" rid="F3">Figure 3B</xref>). Each neuron in the input layer typically corresponds to one input feature value calculated from the input data. Hidden layers, located between the input and output layers, process and transform the data: with multiple hidden layers, the network can learn hierarchical patterns and extract higher-level features. The output layer generates the network prediction, with the number of neurons determined by the specific task such as the number of classes in a classification problem. A mathematical function that quantifies the disagreement between the predicted output from a neural network and the ground truth values is known as <bold>loss function</bold> (<xref ref-type="bibr" rid="B70">Greener et al., 2022</xref>). Essentially, a loss function quantifies the error in predictions from a network. Weights and biases are the adjustable, learnable parameters of a neural network, and they are optimized during the network training process to minimize the chosen loss function. <bold>Backpropagation</bold> (<xref ref-type="bibr" rid="B98">LeCun et al., 2015</xref>) is a fundamental algorithm used to train neural networks. It employs the chain rule of calculus to compute the gradients of the loss function with respect to all weights and biases, indicating the direction and magnitude by which they should be adjusted to reduce the error. Subsequently, an <bold>optimization</bold> algorithm utilizes these gradients to update the weights and biases to minimize the loss function and improve network performance. TensorFlow (<xref ref-type="bibr" rid="B1">Abadi, 2016</xref>) and PyTorch (<xref ref-type="bibr" rid="B130">Paszke, 2019</xref>) are mainstream software platforms for training neural networks. Backpropagation&#x2013;optimization is an iterative process, a feedback loop involving the calculation of loss function gradients and subsequent parameter adjustments and is fundamental to the neural network learning process. However, the <bold>vanishing gradient problem</bold> may arise during the training of deep neural networks, where gradients become extremely small as they propagate through layers, making learning slow or ineffective.</p>
<p>Each neuron in a <bold>fully connected (FC)</bold> layer, also known as dense layer, is connected to every neuron in the previous layer, and its output is passed to every neuron in the next layer (<xref ref-type="fig" rid="F3">Figure 3B</xref>). A <bold>feedforward neural network (FNN)</bold> is a type of ANN where information flows in only one direction, from the input layer, through one or more hidden layers, to the output layer, without feedback loops or cycles (<xref ref-type="fig" rid="F3">Figure 3B</xref>). <bold>Multilayer perceptrons</bold> (MLPs) are a class of FNNs identified by at least three layers: an input layer, one or more hidden layers, and an output layer, with neurons typically connected in a dense or fully connected (FC) manner (<xref ref-type="fig" rid="F3">Figure 3B</xref>).</p>
<p>The need to handle the unique characteristics of different data modalities has driven the development of specialized neural network architectures (<xref ref-type="bibr" rid="B16">Bronstein, 2021</xref>) (<xref ref-type="table" rid="T1">Table 1</xref>). For example, the need for efficient local feature extraction in image processing led to the development of convolutional neural networks (CNNs) (<xref ref-type="bibr" rid="B97">LeCun and Bengio, 1998</xref>). Complex neural network architectures often incorporate fully connected layers and MLPs as sub-components. Neural network architectures commonly used in automated model-building methods for cryo-EM are described below.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Neural network architectures to handle specific data types.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Data type</th>
<th align="left">Structure</th>
<th align="left">Examples</th>
<th align="left">Architecture</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<bold>Grids</bold>
</td>
<td align="left">Regular arrays (1D, 2D, 3D) with fixed spatial relationships</td>
<td align="left">2D images, 3D cryo-EM density maps</td>
<td align="left">CNNs, U-Nets, Vision Transformers</td>
</tr>
<tr>
<td align="left">
<bold>Graphs</bold>
</td>
<td align="left">Irregular connectivity, nodes and edges define relationships</td>
<td align="left">Biomolecular structure, Interaction networks</td>
<td align="left">GNNs, Graph Transformers</td>
</tr>
<tr>
<td align="left">
<bold>Sequential</bold>
</td>
<td align="left">Ordered list of elements</td>
<td align="left">Protein/DNA sequences</td>
<td align="left">RNNs, LSTMs, Transformers</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Grid-structured data and convolutional neural networks (CNNs)</title>
<p>Grid-structured data can be naturally represented as a regular, multi-dimensional array or grid, where each grid element has a fixed spatial or temporal relationship to its neighbors, thereby enabling spatial or temporal locality in the structure. Common examples include images, represented as 2D grids of pixels containing intensity values, and volumetric data such as 3D cryo-EM density maps, represented as 3D grids of voxels containing density values. <bold>Convolutional neural networks (CNNs)</bold> are specifically designed to process grid-structured data by exploiting inherent spatial or temporal locality in the data to detect complex patterns and extract hierarchical features (<xref ref-type="bibr" rid="B97">LeCun and Bengio, 1998</xref>; <xref ref-type="bibr" rid="B6">Alzubaidi et al., 2021</xref>). A CNN comprises one or more convolutional layers, where the values in the next layer are computed by applying learnable convolutional filters across the input grid, producing another grid-like layer as output (<xref ref-type="fig" rid="F3">Figure 3C</xref>). Each such output is known as a feature map, which contains specific features extracted from the input data (<xref ref-type="fig" rid="F3">Figure 3C</xref>). The convolution operation enforces local connectivity, such that each neuron in the convolutional layer connects only to a small, localized region of the input such as a patch of pixels in an image or a patch of voxels in a volumetric input (<xref ref-type="fig" rid="F3">Figure 3C</xref>). The extent of this local connectivity is referred to as the <bold>receptive field</bold> of a neuron in the convolutional layer. <bold>Dilated convolutions</bold> are convolution filters with spacing between their elements, which increases the receptive field. To extract a particular feature, the same filter is applied across the entire input grid. This parameter sharing reduces the number of trainable network parameters and allows CNNs to detect the same feature regardless of its location in the input. In cryo-EM, 3D CNNs are the go-to architecture for processing volumetric 3D density maps, as they can automatically and adaptively learn hierarchical patterns and features. 3D CNNs use 3D convolutional layers, where a 3D filter is applied across the volumetric input along all three spatial dimensions (x, y, z) producing a 3D output volume as a feature map that captures patterns across all dimensions (<xref ref-type="fig" rid="F3">Figure 3C</xref>). <bold>Pooling layers</bold> are typically inserted between successive convolutional layers to reduce computational load and improve network robustness. They downsample the feature maps generated by convolutional layers by reducing their spatial dimensions. This dimensionality reduction makes detected features more robust to variations in the input (e.g., translations) and decreases computational complexity. <bold>Max pooling</bold> is commonly used, where a filter slides over the feature map and selects the maximum value from each local region, reducing its size and emphasizing important features. A <bold>CNN classifier</bold>, located at the end of the CNN architecture after several convolutional and pooling layers, uses fully connected (FC) layers to classify the high-level features extracted from the input grid into specific categories. In an FC layer, each neuron is connected to all neurons of the previous layer, like a conventional multilayer perceptron (MLP) (<xref ref-type="fig" rid="F3">Figure 3B</xref>).</p>
<p>While traditional CNN architecture is well-suited for image classification, many tasks require pixel or voxel-wise classification, such as semantic segmentation, where a label is assigned to every pixel or voxel, and multi-scale feature extraction. The <bold>U-Net</bold> architecture, with its characteristic &#x2018;U&#x2019; shape, was specifically designed for this purpose (<xref ref-type="bibr" rid="B150">Ronneberger Navab et al., 2015</xref>). The architecture of U-Nets consists of an encoder&#x2013;decoder structure with skip connections (<xref ref-type="fig" rid="F3">Figure 3D</xref>). The <bold>encoder (contracting path)</bold>, like a standard CNN, uses convolution and pooling (<bold>downsampling</bold>) layers to reduce spatial dimension, processing the grid-structured input to extract high-level feature maps and capture semantic information (<xref ref-type="fig" rid="F3">Figure 3D</xref>). The encoder leads to the <bold>bottleneck layer</bold> at the bottom of the &#x2018;U&#x2019;-shaped architecture, which contains an abstract, compact feature representation called the <bold>latent representation</bold> (<xref ref-type="fig" rid="F3">Figure 3D</xref>). The <bold>decoder (expanding path)</bold> then takes these features and uses deconvolution or transposed convolution <bold>(upsampling)</bold> layers to restore spatial dimension and reconstruct the output (<xref ref-type="fig" rid="F3">Figure 3D</xref>). A key feature of the U-Net is its use of <bold>skip connections</bold>, where the input to each layer (deconvolutional) in the decoder combines the output from the previous layer with the corresponding output from an encoder layer (convolutional), allowing information to bypass intermediate layers and preserve spatial details (<xref ref-type="fig" rid="F3">Figure 3D</xref>). Like 3D CNNs, 3D U-Nets are designed for segmentation of 3D grid-structured data (<xref ref-type="bibr" rid="B40">&#xc7;i&#xe7;ek, 2016</xref>). 3D U-Nets excel at precise voxel-wise segmentation in cryo-EM density maps, accurately identifying backbone atoms, secondary structures, amino acid types, and nucleotides.</p>
<p>The vanishing gradient problem arises when training deep CNNs or U-Nets. <bold>Residual neural networks (ResNets)</bold> use residual connections to address this issue, facilitating the training of much deeper networks (<xref ref-type="bibr" rid="B73">He, 2016</xref>). Residual connections are a specific type of skip connection in which the input of a layer is added to its output. Early in training, if some layers are unnecessary, ResNet can learn to ignore them via skip connections, effectively allowing the network to &#x201c;skip&#x201d; over these layers and mitigate the vanishing gradient issue. ResNets can serve as a backbone architecture for feature extraction from cryo-EM density maps.</p>
<p>
<bold>Diffusion models</bold> are generative machine learning models that learn the underlying probability distribution of a dataset to create new data resembling the original (<xref ref-type="bibr" rid="B80">Ho, 2020</xref>). They operate by gradually adding noise to data (<bold>diffusion</bold>) and then learning to reverse this process to generate new data (<bold>denoising</bold>). The neural network architecture underlying the denoising step in diffusion models is often based on the U-Net architecture (<xref ref-type="fig" rid="F3">Figure 3D</xref>). In cryo-EM, diffusion models have shown strong performance in denoising density maps, thereby enhancing model building.</p>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>Graph-structured data and graph neural networks (GNNs)</title>
<p>A graph is a set of nodes (vertices) and edges (links), where each edge connects a pair of nodes (<xref ref-type="fig" rid="F3">Figure 3E</xref>). An attributed graph is a graph in which each node and edge is associated with one or more features, called attributes (<xref ref-type="fig" rid="F3">Figure 3E</xref>). <bold>Graph neural networks (GNNs)</bold> are neural network architectures specifically designed to operate on graph-structured data (<xref ref-type="bibr" rid="B16">Bronstein, 2021</xref>; <xref ref-type="bibr" rid="B177">Veli&#x10d;kovi&#x107;, 2023</xref>). Attributed graphs are commonly used in GNNs and other applications where each component of the graph carries additional information beyond just connectivity (<xref ref-type="fig" rid="F3">Figure 3E</xref>). In general, graphs do not have a canonical or fixed node ordering, and therefore graph operations are typically defined to be independent of node ordering. Accordingly, GNNs are designed to preserve the <bold>permutation symmetry</bold> of the input graph. A GNN accepts an attributed graph as input, performs learnable transformations on the node attributes while preserving permutation symmetry, and outputs a graph with updated node attributes but the same connectivity as the input graph (<xref ref-type="fig" rid="F3">Figure 3E</xref>). GNNs progressively transform node representations through <bold>message passing</bold> operations, where each node iteratively updates its features by aggregating and transforming the features of its neighboring nodes using learnable functions (<xref ref-type="fig" rid="F3">Figure 3E</xref>). <bold>Graph Convolutional Networks (GCNs)</bold> are neural networks designed for graph-structured data, performing convolution-like operations to aggregate and combine information from each node&#x2019;s neighbors. Biomacromolecules such as proteins and nucleic acids are naturally represented as graph-structured data, where atoms or residues serve as nodes, and the bonds or interactions connecting them serve as edges (<xref ref-type="bibr" rid="B15">Bengio, 2023</xref>). In an attributed graph representing a biomacromolecule, nodes (atoms or residues) can carry features such as element or residue type, charge, hydrophobicity, secondary structure, or 3D coordinates (<xref ref-type="bibr" rid="B15">Bengio, 2023</xref>). Edges (bonds or interactions) may include attributes like bond type, bond order, interaction type (e.g., hydrogen bond), distance, or interaction energy (<xref ref-type="bibr" rid="B15">Bengio, 2023</xref>). These attributes encode chemical, structural, and functional information, enabling richer representations for GNN tasks such as property prediction, structural analysis, or molecular modeling (<xref ref-type="bibr" rid="B15">Bengio, 2023</xref>).</p>
</sec>
<sec id="s3-5">
<label>3.5</label>
<title>Sequential data and recurrent neural networks (RNNs)</title>
<p>While grid and graph-structured input data are spatial in nature, many types of data operated on by machine learning models are sequential, such as text, speech, and time-series information. <bold>Recurrent neural networks (RNNs)</bold> are designed to process sequential data, where the order and context of information are important, and can be used to classify input sequences or predict sequence-dependent properties (<xref ref-type="fig" rid="F3">Figure 3F</xref>) (<xref ref-type="bibr" rid="B169">Sutskever, 2014</xref>). Their primary applications are in natural language processing (NLP), including speech recognition, language translation, and text generation (<xref ref-type="bibr" rid="B69">Graves, 2013</xref>). Unlike feedforward neural networks, which contain no loops or cycles, neurons in RNNs have recurrent connections that form directed cycles. Recurrent connections enable RNNs to maintain an internal or hidden state that captures information about previously processed inputs, giving RNNs a notion of memory and allowing information to cycle within the network (<xref ref-type="fig" rid="F3">Figure 3F</xref>). This mechanism enables RNNs to capture sequential and temporal dependencies in the input data. However, RNNs suffer from the vanishing gradient problem when trained on long sequences, which limits their ability to capture long-term dependencies. As a result, information from the early parts of a sequence may be lost by the time it reaches later steps. Gating mechanisms, implemented as special structures, were introduced in RNN architecture to address this problem. Standard RNNs update their hidden state at every step in the same way, causing early information to fade when sequences are long. Gates allow the network to decide what information to remember, forget, and output, enabling long-term memory. <bold>Long Short-Term Memory (LSTM)</bold> networks are gated RNNs consisting of a memory cell and three gates: input, forget, and output (<xref ref-type="bibr" rid="B81">Hochreiter and Schmidhuber, 1997</xref>). These gates control the flow of information into, out of, and within the memory cell, allowing it to retain important information over extended periods and enabling the network to effectively handle long-term dependencies in sequential data. LSTMs, often used in conjunction with other neural network architectures, have been applied to protein structure prediction and modeling (<xref ref-type="bibr" rid="B5">AlQuraishi, 2019</xref>).</p>
</sec>
<sec id="s3-6">
<label>3.6</label>
<title>Attention mechanisms and transformer networks</title>
<p>The sequential nature of input processing in RNNs makes it difficult to access specific parts of the sequence and to capture long-range dependencies when generating outputs. Attention mechanisms alleviate this limitation by allowing RNNs to focus on different parts of the input sequence (<xref ref-type="bibr" rid="B10">Bahdanau, 2014</xref>). <bold>Transformer networks</bold>, which achieved state-of-the-art results in NLP tasks, introduced the <bold>self-attention mechanism</bold>, enabling them to dynamically access all parts of the input sequence simultaneously and effectively capture long-range dependencies (<xref ref-type="bibr" rid="B176">Vaswani, 2017</xref>). Unlike the static filters of CNNs, self-attention dynamically computes filters based on the input, allowing Transformers to naturally process irregular and non-grid structured data. Unlike the attention used in RNNs, self-attention in Transformers analyzes all sequence elements in parallel and weighs their importance relative to each other, thus capturing both local and distant dependencies. In contrast to RNNs and LSTMs, Transformers enable parallel processing of sequential data during both training and inference, leading to significantly faster training times and making them well-suited for handling large-scale protein sequence datasets. Transformers generally adopt an encoder&#x2013;decoder architecture, where the encoder applies self-attention to learn relationships within the input sequence, and the decoder uses attention mechanisms to generate the output sequence while incorporating information from the encoder. In biological applications, Transformers model long-range interactions in protein sequences essential for protein structure prediction tasks (<xref ref-type="bibr" rid="B112">Ling, 2025</xref>). Originally designed for sequential data, Transformers have been adapted with domain-specific architectural modifications to process grid structured data (e.g., Vision Transformers (<xref ref-type="bibr" rid="B53">Dosovitskiy, 2020</xref>)) and graph structured data (e.g., Graph Transformers (<xref ref-type="bibr" rid="B126">M&#xfc;ller, 2023</xref>)), making them highly versatile across multiple domains.</p>
<p>The <bold>Swin-Conv-UNet</bold> was developed to improve performance in semantic segmentation (<xref ref-type="bibr" rid="B194">Zhang et al., 2023</xref>). It is a hybrid neural network architecture combining CNNs and transformer-based attention mechanisms within a U-Net framework. Like U-Net, it uses an encoder&#x2013;decoder structure with skip connections to capture multi-scale features and preserve spatial details (<xref ref-type="fig" rid="F3">Figure 3D</xref>). Swin Transformer blocks are integrated to model long-range dependencies, allowing the network to capture both local and global contextual information. The convolutional components (<bold>Conv</bold>) learn fine-grained local features, while the transformer components (<bold>Swin</bold>) model complex global relationships across multiple scales (<bold>UNet</bold>), making Swin-Conv-UNet particularly effective for high-resolution and structured data tasks.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Deep learning-based automated model building in cryo-EM using a multi-modal approach</title>
<p>Deep learning methods for model building in cryo-EM density maps differ in the underlying neural network architecture, prediction task, target biomolecules, and the level of structural detail in the resulting models (<xref ref-type="table" rid="T2">Tables 2</xref>,<xref ref-type="table" rid="T3">3</xref>). However, most approaches follow a common multi-stage pipeline that transforms raw cryo-EM density maps into molecular models. Briefly, the representative steps are (<xref ref-type="fig" rid="F4">Figure 4</xref>): (1) preprocessing the input density map; (2) applying deep neural networks to learn features from the preprocessed density map that identify different aspects of a biomolecular structure; and (3) model building from the learned features followed by refinement.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Architecture of deep learning-based automated model building methods in cryo-EM.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">S. No.</th>
<th rowspan="2" align="left">Method (year)</th>
<th rowspan="2" align="center">Resolution (&#x212b;)</th>
<th colspan="3" align="center">Multi-modal architecture</th>
<th rowspan="2" align="left">Final prediction task</th>
</tr>
<tr>
<th align="left">Feature learning</th>
<th align="left">Model building</th>
<th align="left">Post-processing/refinement</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="7" align="left">Primary structure</td>
</tr>
<tr>
<td align="left">1</td>
<td align="left">
<italic>findMySequence</italic> (<xref ref-type="bibr" rid="B36">Chojnowski et al., 2022</xref>) (2021)</td>
<td align="left">&#x2264;4.5</td>
<td align="left">MLP classifier</td>
<td align="left">Profile HMM, <italic>HMMER</italic> suite (<xref ref-type="bibr" rid="B54">Eddy, 2011</xref>)</td>
<td align="left">N/A</td>
<td align="left">Protein sequence identification</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">
<italic>checkMySequence</italic> (<xref ref-type="bibr" rid="B33">Chojnowski, 2022</xref>) (2022)&#x3c;</td>
<td align="left">&#x3c;4.0</td>
<td align="left">MLP classifier</td>
<td align="left">
<italic>findMySequence</italic> (<xref ref-type="bibr" rid="B36">Chojnowski et al., 2022</xref>), <italic>HMMER</italic> suite (<xref ref-type="bibr" rid="B54">Eddy, 2011</xref>)</td>
<td align="left">N/A</td>
<td align="left">Protein sequence-assignment validation</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">
<italic>doubleHelix</italic> (<xref ref-type="bibr" rid="B34">Chojnowski, 2023</xref>) (2023)&#x3c;</td>
<td align="left">&#x3c;5.0</td>
<td align="left">MLP classifier</td>
<td align="left">Brickworx (<xref ref-type="bibr" rid="B35">Chojnowski et al., 2015</xref>), INFERNAL suite (<xref ref-type="bibr" rid="B128">Nawrocki and Eddy, 2013</xref>), estimated base-type probabilities</td>
<td align="left">N/A</td>
<td align="left">Nucleic acid sequence identification, assignment and validation</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">
<italic>EMSequenceFinder</italic> (<xref ref-type="bibr" rid="B123">Mondal et al., 2025</xref>) (2024)</td>
<td align="left">3.0&#x2013;8.0</td>
<td align="left">3D CNN</td>
<td align="left">Traced or fitted backbone fragments, Bayesian scoring function</td>
<td align="left">N/A</td>
<td align="left">Amino acid residue sequence assignment</td>
</tr>
<tr>
<td colspan="7" align="left">Secondary structure</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">CNN-classifier (<xref ref-type="bibr" rid="B102">Li et al., 2016</xref>) (2017)</td>
<td align="left">5.0&#x2013;10.0</td>
<td align="left">3D CNN</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
<td align="left">Protein SSEs: &#x3b1;-helix, &#x3b2;-sheet</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">Emap2sec (<xref ref-type="bibr" rid="B116">Maddhuri Venkata Subramaniya et al., 2019</xref>) (2019)</td>
<td align="left">5.0&#x2013;10.0</td>
<td align="left">3D CNN</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
<td align="left">Protein SSEs: &#x3b1;-helix, &#x3b2;-sheet, others</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">Haruspex (<xref ref-type="bibr" rid="B124">Mostosi et al., 2020</xref>) (2020)</td>
<td align="left">&#x2264;4.0</td>
<td align="left">3D U-Net</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
<td align="left">Protein SSEs: &#x3b1;-helix, &#x3b2;-sheet<break/>RNA/DNA (nucleotides)</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">Emap2sec&#x2b; (<xref ref-type="bibr" rid="B179">Wang et al., 2021</xref>) (2021)</td>
<td align="left">5.0&#x2013;10.0</td>
<td align="left">ResNet</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
<td align="left">Protein SSEs: &#x3b1;-helix, &#x3b2;-sheet, others<break/>DNA/RNA (nucleotides)</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">EMNUSS (<xref ref-type="bibr" rid="B74">He and Huang, 2021a</xref>) (2021)</td>
<td align="left">2.0&#x2013;10.0</td>
<td align="left">Nested UNet (UNet&#x2b;&#x2b;)</td>
<td align="left">N/A</td>
<td align="left">N./A</td>
<td align="left">Protein SSEs: &#x3b1;-helix, &#x3b2;-sheet, others</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">DeepSSETracer (<xref ref-type="bibr" rid="B125">Mu et al., 2021</xref>) (2021)</td>
<td align="left">5.0&#x2013;10.0</td>
<td align="left">3D U-Net</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
<td align="left">Protein SSEs: &#x3b1;-helix, &#x3b2;-sheet, others</td>
</tr>
<tr>
<td align="left">11</td>
<td align="left">HaPi (<xref ref-type="bibr" rid="B62">Garcia Condado et al., 2022</xref>) (2022)</td>
<td align="left">&#x2264;5.0</td>
<td align="left">3D CNN</td>
<td align="left">HandNet (CNN-based): Handedness of the map</td>
<td align="left">N/A</td>
<td align="left">Protein SSEs: right or left-handed &#x3b1;-helix</td>
</tr>
<tr>
<td align="left">12</td>
<td align="left">CryoSSESeg (<xref ref-type="bibr" rid="B155">Sazzed, 2024</xref>) (2024)</td>
<td align="left">5.0&#x2013;10.0</td>
<td align="left">3D U-Net</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
<td align="left">Protein SSEs: &#x3b1;-helix, &#x3b2;-sheet, background</td>
</tr>
<tr>
<td align="left">13</td>
<td align="left">EMInfo (<xref ref-type="bibr" rid="B18">Cao et al., 2025</xref>) (2025)</td>
<td align="left">2.0&#x2013;10.0</td>
<td align="left">Nested U-Net (UNet&#x2b;&#x2b;)</td>
<td align="left">Utilized by EMBuild (<xref ref-type="bibr" rid="B76">He et al., 2022</xref>)</td>
<td align="left">N/A</td>
<td align="left">Protein SSEs: &#x3b1;-helix, &#x3b2;-strand, coil<break/>RNA/DNA (nucleotides)</td>
</tr>
<tr>
<td colspan="7" align="left">Tertiary and quaternary structure (<italic>de novo</italic>)</td>
</tr>
<tr>
<td align="left">14</td>
<td align="left">AAnchor (<xref ref-type="bibr" rid="B153">Rozanov and Wolfson, 2018</xref>) (2018)</td>
<td align="left">&#x2264;3.1</td>
<td align="left">3D CNN</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
<td align="left">Amino acid types, coordinates (center of mass) and detection confidence</td>
</tr>
<tr>
<td align="left">15</td>
<td align="left">A<sup>2</sup>-Net (<xref ref-type="bibr" rid="B188">Xu et al., 2019</xref>) (2019)</td>
<td align="left">&#x2264;5.0</td>
<td align="left">3D CNN</td>
<td align="left">MCTS algorithm (<xref ref-type="bibr" rid="B157">Shen, 2018</xref>), KNN-Graph, Peptide Bond Recognition Network (CNN-based)</td>
<td align="left">N/A</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">16</td>
<td align="left">Cascaded-CNN (<xref ref-type="bibr" rid="B160">Si et al., 2020</xref>) (2020)</td>
<td align="left">2.6&#x2013;4.4</td>
<td align="left">3D CNN</td>
<td align="left">Path-walking algorithm, Graph and helix refinement, PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>), SCWRL4 (<xref ref-type="bibr" rid="B92">Krivov et al., 2009a</xref>)</td>
<td align="left">N/A</td>
<td align="left">Protein backbones with connected C&#x3b1; atoms</td>
</tr>
<tr>
<td align="left">17</td>
<td align="left">Structure Generator (<xref ref-type="bibr" rid="B100">Li, 2020</xref>) (2020)</td>
<td align="left">1.4&#x2013;1.8</td>
<td align="left">3D CNN based on ResNet</td>
<td align="left">GCN, LSTM</td>
<td align="left">N/A</td>
<td align="left">Protein atomic models</td>
</tr>
<tr>
<td align="left">18</td>
<td align="left">DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) (2020)</td>
<td align="left">&#x2264;4.0</td>
<td align="left">3D U-Net</td>
<td align="left">Customized TSP and DP algorithms, Known geometric constraints, SCWRL4 (<xref ref-type="bibr" rid="B93">Krivov et al., 2009b</xref>)</td>
<td align="left">
<italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>)</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">19</td>
<td align="left">DeepMM (<xref ref-type="bibr" rid="B75">He and Huang, 2021b</xref>) (2021)</td>
<td align="left">2.5&#x2013;5.0</td>
<td align="left">Densely connected CNN (DenseNet)</td>
<td align="left">Smith-Waterman algorithm (<xref ref-type="bibr" rid="B164">Smith and Waterman, 1981</xref>), <italic>ctrip</italic> program from the Jackal modeling package (<xref ref-type="bibr" rid="B187">Xiang and Honig, 2001</xref>; <xref ref-type="bibr" rid="B134">Petrey et al., 2003</xref>)</td>
<td align="left">AMBER package (<xref ref-type="bibr" rid="B22">Case et al., 2025</xref>)</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">20</td>
<td align="left">SEGEM (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>) (2021)</td>
<td align="left">2.0&#x2013;5.0</td>
<td align="left">3D CNN</td>
<td align="left">Backbone tracing and sequence assignment; scoring matrix, breadth-first search with pruning</td>
<td align="left">N/A</td>
<td align="left">Protein backbones with C&#x3b1; atoms aligned to sequence</td>
</tr>
<tr>
<td align="left">21</td>
<td align="left">SegmA (<xref ref-type="bibr" rid="B154">Rozanov and Wolfson, 2023</xref>) (2022)</td>
<td align="left">3.0&#x2013;3.4</td>
<td align="left">G-CNN, U-Net</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
<td align="left">Amino acid types and locations<break/>Nucleotide locations</td>
</tr>
<tr>
<td align="left">22</td>
<td align="left">DeepTracer-2.0 [96] (2023)</td>
<td align="left">&#x2264;4.0</td>
<td align="left">3D U-Net</td>
<td align="left">Proteins: DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>)<break/>Nucleic acids: Known geometric constraints, Brickworx model (<xref ref-type="bibr" rid="B35">Chojnowski et al., 2015</xref>)</td>
<td align="left">N/A</td>
<td align="left">All-atom protein-DNA/RNA models</td>
</tr>
<tr>
<td align="left">23</td>
<td align="left">ModelAngelo (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>) (2023)</td>
<td align="left">&#x2264;4.0</td>
<td align="left">CNN</td>
<td align="left">GNN, HMM profile, <italic>HMMER</italic> (<xref ref-type="bibr" rid="B54">Eddy, 2011</xref>)</td>
<td align="left">GNN, <italic>phenix.real_space_refine</italic>(<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>)</td>
<td align="left">All-atom protein complex models<break/>Nucleic acid backbones</td>
</tr>
<tr>
<td align="left">24</td>
<td align="left">CryoREAD (<xref ref-type="bibr" rid="B180">Wang et al., 2023</xref>) (2023)</td>
<td align="left">2.0&#x2013;5.0</td>
<td align="left">3D U-Net</td>
<td align="left">VRP solver (<xref ref-type="bibr" rid="B143">Psaraftis, 1988</xref>), DP algorithm, CP solver (<xref ref-type="bibr" rid="B151">Rossi, 2006</xref>), known geometric constraints</td>
<td align="left">
<italic>phenix.real_space_refine</italic>(<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>), COOT (<xref ref-type="bibr" rid="B55">Emsley and Cowtan, 2004</xref>)</td>
<td align="left">All-atom RNA/DNA models</td>
</tr>
<tr>
<td align="left">25</td>
<td align="left">SMARTFold (<xref ref-type="bibr" rid="B105">Li et al., 2023</xref>) (2023)</td>
<td align="left">&#x3c;8.0</td>
<td align="left">3D U-Net</td>
<td align="left">EMformer (Transformer-based), AlphaFold2 [31] inspired structure module</td>
<td align="left">N/A</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">26</td>
<td align="left">EMRNA (<xref ref-type="bibr" rid="B108">Li et al., 2025b</xref>) (2024)</td>
<td align="left">2.0&#x2013;6.0</td>
<td align="left">Swin-Conv-UNet (SCUNet)</td>
<td align="left">TSP algorithm (<xref ref-type="bibr" rid="B78">Helsgaun, 2000</xref>), Smith-Waterman algorithm (<xref ref-type="bibr" rid="B164">Smith and Waterman, 1981</xref>), Kabsch superposition (<xref ref-type="bibr" rid="B90">Kabsch, 1976</xref>)</td>
<td align="left">AMBER package (<xref ref-type="bibr" rid="B22">Case et al., 2025</xref>), <italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>)</td>
<td align="left">All-atom RNA models</td>
</tr>
<tr>
<td align="left">27</td>
<td align="left">Cryo2Struct (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>) (2024)</td>
<td align="left">1.0&#x2013;4.0</td>
<td align="left">3D Transformer</td>
<td align="left">HMM (<xref ref-type="bibr" rid="B144">Rabiner and Juang, 1986</xref>), customized Viterbi algorithm (<xref ref-type="bibr" rid="B61">Forney, 1973</xref>)</td>
<td align="left">
<italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>)</td>
<td align="left">Protein backbones with C&#x3b1; atoms aligned to sequence</td>
</tr>
<tr>
<td align="left">28</td>
<td align="left">EmodelX (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>) (2024)</td>
<td align="left">2.0&#x2013;4.0</td>
<td align="left">3D Residual U-Net</td>
<td align="left">PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>)</td>
<td align="left">
<italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>)</td>
<td align="left">All-atom protein complex models</td>
</tr>
<tr>
<td align="left">29</td>
<td align="left">EM2NA (<xref ref-type="bibr" rid="B106">Li et al., 2024</xref>) (2024)</td>
<td align="left">&#x2264;5</td>
<td align="left">Swin-Conv-UNet (SCUNet)</td>
<td align="left">VRP algorithm (<xref ref-type="bibr" rid="B79">Helsgaun, 2017</xref>), Smith-Waterman algorithm (<xref ref-type="bibr" rid="B164">Smith and Waterman, 1981</xref>), Arena algorithm (<xref ref-type="bibr" rid="B133">Perry et al., 2023</xref>)</td>
<td align="left">
<italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>)</td>
<td align="left">All-atom DNA/RNA models</td>
</tr>
<tr>
<td align="left">30</td>
<td align="left">CryFold (<xref ref-type="bibr" rid="B168">Su et al., 2025</xref>) (2024)</td>
<td align="left">2.0&#x2013;7.0</td>
<td align="left">3D U-Net</td>
<td align="left">Cry-Net (transformer-based modules): Cryformer (encoder) and Structure Module (decoder), HMM profile, Heuristic algorithm of ModelAngelo (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>)</td>
<td align="left">N/A</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">31</td>
<td align="left">DeepCryoRNA (<xref ref-type="bibr" rid="B101">Li and Chen, 2025</xref>) (2025)</td>
<td align="left">&#x3c;6.0</td>
<td align="left">MultiResUNet (based on U-Net)</td>
<td align="left">Known geometric constraints, Customized Gotoh algorithm (<xref ref-type="bibr" rid="B68">Gotoh, 1982</xref>)</td>
<td align="left">QRNAS software (<xref ref-type="bibr" rid="B166">Stasiewicz et al., 2019</xref>)</td>
<td align="left">All-atom RNA models</td>
</tr>
<tr>
<td align="left">32</td>
<td align="left">E3-CryoFold (<xref ref-type="bibr" rid="B182">Wang et al., 2025</xref>) (2025)</td>
<td align="left">1.0&#x2013;4.0</td>
<td align="left">3D and sequence Transformer</td>
<td align="left">SE (3) GNN</td>
<td align="left">SE (3) GNN</td>
<td align="left">All-atom protein complex models</td>
</tr>
<tr>
<td colspan="7" align="left">Tertiary and quaternary structure (hybrid)</td>
</tr>
<tr>
<td align="left">33</td>
<td align="left">SEGEM&#x2b;&#x2b; (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>) (2021)</td>
<td align="left">2.0&#x2013;5.0</td>
<td align="left">3D CNN</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), SEGEM (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>)</td>
<td align="left">N/A</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">34</td>
<td align="left">CR-I-TASSER (<xref ref-type="bibr" rid="B193">Zhang et al., 2022</xref>) (2022)</td>
<td align="left">2.0&#x2013;15.0</td>
<td align="left">3D CNN</td>
<td align="left">LOMETS (<xref ref-type="bibr" rid="B198">Zheng et al., 2019</xref>), ResPRE (<xref ref-type="bibr" rid="B103">Li et al., 2019</xref>), REMC simulations, I-TASSER method (<xref ref-type="bibr" rid="B189">Yang et al., 2015</xref>)</td>
<td align="left">FG-MD (<xref ref-type="bibr" rid="B192">Zhang et al., 2011</xref>)</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">35</td>
<td align="left">DEMO-EM (<xref ref-type="bibr" rid="B202">Zhou et al., 2022</xref>) (2022)</td>
<td align="left">3.0&#x2013;10.0</td>
<td align="left">DomainDist (ResNet-based)</td>
<td align="left">D-I-TASSER (<xref ref-type="bibr" rid="B200">Zheng et al., 2025</xref>), L-BFGS (<xref ref-type="bibr" rid="B114">Liu and Nocedal, 1989</xref>), REMC simulation, FASPR (<xref ref-type="bibr" rid="B83">Huang et al., 2020</xref>)</td>
<td align="left">FG-MD (<xref ref-type="bibr" rid="B192">Zhang et al., 2011</xref>)</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">36</td>
<td align="left">DeepTracer-ID (<xref ref-type="bibr" rid="B24">Chang et al., 2022</xref>) (2022)</td>
<td align="left">&#x3c;4.2</td>
<td align="left">Utilizes DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) (3D U-Net based)</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>), PyMOL-align (<xref ref-type="bibr" rid="B50">DeLano and Lam, 2005</xref>), PyMOL-cealign (<xref ref-type="bibr" rid="B158">Shindyalov and Bourne, 1998</xref>), FATCAT (<xref ref-type="bibr" rid="B104">Li et al., 2020</xref>)</td>
<td align="left">N/A</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">37</td>
<td align="left">EMBuild (<xref ref-type="bibr" rid="B76">He et al., 2022</xref>) (2022)</td>
<td align="left">4.0&#x2013;8.0</td>
<td align="left">Nested U-Net (UNet&#x2b;&#x2b;)</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), SWORD (<xref ref-type="bibr" rid="B141">Postic et al., 2017</xref>), Fast Fourier Transform (FFT)-based global alignment (<xref ref-type="bibr" rid="B184">Wen et al., 2020</xref>), Bron&#x2013;Kerbosch algorithm</td>
<td align="left">
<italic>phenix.real_space_refine</italic>(<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>)</td>
<td align="left">All-atom protein complex model</td>
</tr>
<tr>
<td align="left">38</td>
<td align="left">FFF (<xref ref-type="bibr" rid="B29">Chen et al., 2023</xref>) (2023)</td>
<td align="left">1.0&#x2013;4.0</td>
<td align="left">RetinaNet (ResNet-based)</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), TMD (<xref ref-type="bibr" rid="B156">Schlitter et al., 1994</xref>)</td>
<td align="left">MDFF (<xref ref-type="bibr" rid="B175">Trabuco et al., 2009</xref>)</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">39</td>
<td align="left">CrAI (<xref ref-type="bibr" rid="B117">Mallet et al., 2025</xref>) (2023)</td>
<td align="left">&#x3c;10.0</td>
<td align="left">3D U-Net</td>
<td align="left">Template Fabs and VHHs, Detection, localization and alignment of templates</td>
<td align="left">N/A</td>
<td align="left">Location and orientation (pose) of antibody fragments (Fabs and VHHs)</td>
</tr>
<tr>
<td align="left">40</td>
<td align="left">DeepMainmast (<xref ref-type="bibr" rid="B172">Terashi et al., 2024</xref>) (2023)</td>
<td align="left">2.5&#x2013;5.0</td>
<td align="left">UNet3&#x2b; (3D U-Net based)</td>
<td align="left">VRP solver (<xref ref-type="bibr" rid="B143">Psaraftis, 1988</xref>), Smith-Waterman algorithm (<xref ref-type="bibr" rid="B164">Smith and Waterman, 1981</xref>), AlphaFold2 [31], CP solver (<xref ref-type="bibr" rid="B132">Perron and Lee, 2011</xref>) VESPER (<xref ref-type="bibr" rid="B72">Han et al., 2021</xref>), PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>)</td>
<td align="left">RosettaCM (<xref ref-type="bibr" rid="B165">Song et al., 2013</xref>)</td>
<td align="left">All-atom protein complex models</td>
</tr>
<tr>
<td align="left">41</td>
<td align="left">DEMO-EM2 [139] (2024)</td>
<td align="left">3.0 - 10.0</td>
<td align="left">Utilizes FUpred (<xref ref-type="bibr" rid="B199">Zheng et al., 2020</xref>) (ResNet-based)</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), L-BFGS (<xref ref-type="bibr" rid="B114">Liu and Nocedal, 1989</xref>), FUpred (<xref ref-type="bibr" rid="B199">Zheng et al., 2020</xref>), DE algorithm (<xref ref-type="bibr" rid="B167">Storn and Price, 1997</xref>)</td>
<td align="left">Global domain optimization</td>
<td align="left">All-atom protein complex models</td>
</tr>
<tr>
<td align="left">42</td>
<td align="left">DeepTracer-Refine (<xref ref-type="bibr" rid="B31">Chen et al., 2024b</xref>) (2024)</td>
<td align="left">&#x2264;4.0</td>
<td align="left">Utilizes DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) (3D U-Net based)</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), PyMOL cealign (<xref ref-type="bibr" rid="B158">Shindyalov and Bourne, 1998</xref>), PyMOL align (<xref ref-type="bibr" rid="B50">DeLano and Lam, 2005</xref>), and Chimera MatchMaker (<xref ref-type="bibr" rid="B121">Meng et al., 2006</xref>)</td>
<td align="left">DeepTracer-Refine (<xref ref-type="bibr" rid="B31">Chen et al., 2024b</xref>)</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">43</td>
<td align="left">EmodelX (&#x2b;AF) (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>) (2024)</td>
<td align="left">2.0&#x2013;4.0</td>
<td align="left">3D Residual U-Net</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), EmodelX (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>)</td>
<td align="left">EmodelX (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>)</td>
<td align="left">All-atom protein complex models</td>
</tr>
<tr>
<td align="left">44</td>
<td align="left">CryoJAM (<xref ref-type="bibr" rid="B20">Carrion et al., 2024</xref>) (2024)</td>
<td align="left">4.0&#x2013;8.0</td>
<td align="left">3D U-Net</td>
<td align="left">Known homolog structures, KD-tree (<xref ref-type="bibr" rid="B163">Skrodzki, 2019</xref>), PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>)</td>
<td align="left">N/A</td>
<td align="left">Automates protein homolog model fitting</td>
</tr>
<tr>
<td align="left">45</td>
<td align="left">DiffModeler (<xref ref-type="bibr" rid="B181">Wang et al., 2024</xref>) (2024)</td>
<td align="left">&#x2264;15.0</td>
<td align="left">Diffusion model</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), SWORD2 (<xref ref-type="bibr" rid="B43">Cretin et al., 2022</xref>), VESPER algorithm (<xref ref-type="bibr" rid="B72">Han et al., 2021</xref>)</td>
<td align="left">N/A</td>
<td align="left">All-atom protein complex models<break/>All-atom protein-nucleic acid complex models (when integrated with CryoREAD)</td>
</tr>
<tr>
<td align="left">46</td>
<td align="left">DeepTracer-LowResEnhance (2024) (<xref ref-type="bibr" rid="B115">Ma and Si, 2025</xref>)</td>
<td align="left">2.5&#x2013;8.4</td>
<td align="left">Utilizes DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) and CryoFEM (<xref ref-type="bibr" rid="B45">Dai et al., 2023</xref>) (3D U-Net based)</td>
<td align="left">AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>)</td>
<td align="left">DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>)</td>
<td align="left">All-atom protein models</td>
</tr>
<tr>
<td align="left">47</td>
<td align="left">Cryo2Struct2 [165] (2025)</td>
<td align="left">1.0&#x2013;4.0</td>
<td align="left">3D SegFormer (Transformer-based)</td>
<td align="left">HMM (<xref ref-type="bibr" rid="B144">Rabiner and Juang, 1986</xref>), customized Viterbi algorithm (<xref ref-type="bibr" rid="B61">Forney, 1973</xref>), AlphaFold3 (<xref ref-type="bibr" rid="B2">Abramson et al., 2024</xref>)</td>
<td align="left">N/A</td>
<td align="left">All-atom protein complex models</td>
</tr>
<tr>
<td align="left">48</td>
<td align="left">DEMO-EMfit (<xref ref-type="bibr" rid="B17">Cai et al., 2025</xref>) (2025)</td>
<td align="left">3.0&#x2013;10.0</td>
<td align="left">Utilizes DiffModeler (<xref ref-type="bibr" rid="B181">Wang et al., 2024</xref>) (Diffusion model)</td>
<td align="left">Fast Fourier Transform (FFT), L-BFGS (<xref ref-type="bibr" rid="B114">Liu and Nocedal, 1989</xref>), FUpred (<xref ref-type="bibr" rid="B199">Zheng et al., 2020</xref>), DE algorithm (<xref ref-type="bibr" rid="B167">Storn and Price, 1997</xref>)</td>
<td align="left">Domain-level refinement</td>
<td align="left">Automates fitting of protein and protein-nucleic acid complex models</td>
</tr>
<tr>
<td align="left">49</td>
<td align="left">CryoDomain (<xref ref-type="bibr" rid="B46">Dai et al., 2025</xref>) (2025)</td>
<td align="left">4.0&#x2013;10.0</td>
<td align="left">Residual U-Net and Swin-Conv U-Net</td>
<td align="left">Density-atom embedding Database (DateDB)</td>
<td align="left">N/A</td>
<td align="left">Protein domain identification</td>
</tr>
<tr>
<td align="left">50</td>
<td align="left">DEMO-EMol (<xref ref-type="bibr" rid="B197">Zhang et al., 2025b</xref>) (2025)</td>
<td align="left">1.96&#x2013;12.77</td>
<td align="left">Utilizes EMNUSS (<xref ref-type="bibr" rid="B74">He and Huang, 2021a</xref>) (UNet&#x2b;&#x2b; based)</td>
<td align="left">L-BFGS (<xref ref-type="bibr" rid="B114">Liu and Nocedal, 1989</xref>), DEMO-EMfit (<xref ref-type="bibr" rid="B17">Cai et al., 2025</xref>), DE algorithm (<xref ref-type="bibr" rid="B201">Zhou et al., 2020</xref>)</td>
<td align="left">Domain-level flexible refinement</td>
<td align="left">All-atom protein-nucleic acid complex models</td>
</tr>
<tr>
<td align="left">51</td>
<td align="left">MICA (<xref ref-type="bibr" rid="B71">Gyawali et al., 2025</xref>) (2025)</td>
<td align="left">1.5&#x2013;4.0</td>
<td align="left">Multimodal encoder-decoder framework</td>
<td align="left">EmodelX (&#x2b;AF) (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>), AlphaFold3 (<xref ref-type="bibr" rid="B2">Abramson et al., 2024</xref>), PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>)</td>
<td align="left">
<italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>)</td>
<td align="left">All atom protein complex models</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<bold>Abbreviations</bold> (described in text and Glossary section): Secondary structure elements (<bold>SSEs</bold>), Profile Hidden Markov Model (profile <bold>HMM</bold>), Traveling Salesman Problem (<bold>TSP</bold>) solver, Vehicle Routing Problem (<bold>VRP</bold>) solver, Monte Carlo Tree Search (<bold>MCTS</bold>), Special Euclidean Group of degree 3 (<bold>SE (3)</bold>), Dynamic Programming (<bold>DP</bold>), Constraint Programming (<bold>CP</bold>), Differential Evolution (<bold>DE</bold>), Replica Exchange Monte Carlo (<bold>REMC</bold>) simulation, Limited-memory Broyden&#x2013;Fletcher&#x2013;Goldfarb&#x2013;Shanno (<bold>L-BFGS</bold>).</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Available cryo-EM structure modeling tools across resolutions and molecules.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Biomolecule</th>
<th rowspan="2" align="center">Primary structure</th>
<th rowspan="2" align="center">Secondary structure</th>
<th colspan="2" align="center">Tertiary and quaternary structure</th>
</tr>
<tr>
<th align="center">
<italic>De novo</italic>
</th>
<th align="center">Hybrid</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<bold>Proteins</bold>
</td>
<td align="left">
<italic>findMySequence</italic> (<xref ref-type="bibr" rid="B36">Chojnowski et al., 2022</xref>), <italic>checkMySequence</italic> (<xref ref-type="bibr" rid="B33">Chojnowski, 2022</xref>), <italic>EMSequenceFinder</italic> (<xref ref-type="bibr" rid="B123">Mondal et al., 2025</xref>)</td>
<td align="left">CNN-classifier (<xref ref-type="bibr" rid="B102">Li et al., 2016</xref>), Emap2sec (<xref ref-type="bibr" rid="B116">Maddhuri Venkata Subramaniya et al., 2019</xref>), EMNUSS (<xref ref-type="bibr" rid="B74">He and Huang, 2021a</xref>), DeepSSETracer (<xref ref-type="bibr" rid="B125">Mu et al., 2021</xref>), HaPi (<xref ref-type="bibr" rid="B62">Garcia Condado et al., 2022</xref>), CryoSSESeg (<xref ref-type="bibr" rid="B155">Sazzed, 2024</xref>)</td>
<td align="left">AAnchor (<xref ref-type="bibr" rid="B153">Rozanov and Wolfson, 2018</xref>), A<sup>2</sup>-Net (<xref ref-type="bibr" rid="B188">Xu et al., 2019</xref>), Cascaded-CNN (<xref ref-type="bibr" rid="B160">Si et al., 2020</xref>), Structure Generator (<xref ref-type="bibr" rid="B100">Li, 2020</xref>), DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>), DeepMM (<xref ref-type="bibr" rid="B75">He and Huang, 2021b</xref>), SEGEM (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>), SegmA (<xref ref-type="bibr" rid="B154">Rozanov and Wolfson, 2023</xref>), SMARTFold (<xref ref-type="bibr" rid="B105">Li et al., 2023</xref>), Cryo2Struct (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>), EmodelX (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>), CryFold (<xref ref-type="bibr" rid="B168">Su et al., 2025</xref>), E3-CryoFold (<xref ref-type="bibr" rid="B182">Wang et al., 2025</xref>)</td>
<td align="left">SEGEM&#x2b;&#x2b; (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>), CR-I-TASSER (<xref ref-type="bibr" rid="B193">Zhang et al., 2022</xref>), DEMO-EM (<xref ref-type="bibr" rid="B202">Zhou et al., 2022</xref>), DeepTracer-ID (<xref ref-type="bibr" rid="B24">Chang et al., 2022</xref>), EMBuild (<xref ref-type="bibr" rid="B76">He et al., 2022</xref>), FFF (<xref ref-type="bibr" rid="B29">Chen et al., 2023</xref>), CrAI (<xref ref-type="bibr" rid="B117">Mallet et al., 2025</xref>), DeepMainmast (<xref ref-type="bibr" rid="B172">Terashi et al., 2024</xref>), DEMO-EM2 (<xref ref-type="bibr" rid="B195">Zhang et al., 2024</xref>), DeepTracer-Refine (<xref ref-type="bibr" rid="B31">Chen et al., 2024b</xref>), EmodelX (&#x2b;AF) (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>), CryoJAM (<xref ref-type="bibr" rid="B20">Carrion et al., 2024</xref>), DeepTracer-LowResEnhance (<xref ref-type="bibr" rid="B115">Ma and Si, 2025</xref>), Cryo2Struct2 (<xref ref-type="bibr" rid="B64">Giri and Cheng, 2025</xref>), CryoDomain (<xref ref-type="bibr" rid="B46">Dai et al., 2025</xref>), MICA (<xref ref-type="bibr" rid="B71">Gyawali et al., 2025</xref>)</td>
</tr>
<tr>
<td align="center">
<bold>Nucleic acids</bold>
</td>
<td align="left">
<italic>doubleHelix</italic> (<xref ref-type="bibr" rid="B34">Chojnowski, 2023</xref>)</td>
<td align="left">
<italic>doubleHelix</italic> (<xref ref-type="bibr" rid="B34">Chojnowski, 2023</xref>)</td>
<td align="left">CryoREAD (<xref ref-type="bibr" rid="B180">Wang et al., 2023</xref>), EMRNA (<xref ref-type="bibr" rid="B108">Li et al., 2025b</xref>), EM2NA (<xref ref-type="bibr" rid="B106">Li et al., 2024</xref>), DeepCryoRNA (<xref ref-type="bibr" rid="B101">Li and Chen, 2025</xref>)</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="center">
<bold>Both</bold>
</td>
<td align="left">N/A</td>
<td align="left">Emap2sec&#x2b; (<xref ref-type="bibr" rid="B179">Wang et al., 2021</xref>), Haruspex (<xref ref-type="bibr" rid="B124">Mostosi et al., 2020</xref>), EMInfo (<xref ref-type="bibr" rid="B18">Cao et al., 2025</xref>)</td>
<td align="left">DeepTracer-2.0 (<xref ref-type="bibr" rid="B127">Nakamura et al., 2023</xref>), ModelAngelo (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>)</td>
<td align="left">DiffModeler (<xref ref-type="bibr" rid="B181">Wang et al., 2024</xref>), DEMO-EMfit (<xref ref-type="bibr" rid="B17">Cai et al., 2025</xref>), DEMO-EMol (<xref ref-type="bibr" rid="B197">Zhang et al., 2025b</xref>)</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Schematic overview of the representative steps used by deep learning-based tools for automated model building in cryo-EM, using a density map and the model of ryanodine receptor 1 as input and output, respectively. Illustrations of the deep neural networks (DNNs) are generated using NN-SVG (<xref ref-type="bibr" rid="B99">LeNail, 2019</xref>).</p>
</caption>
<graphic xlink:href="fmolb-12-1613399-g004.tif">
<alt-text content-type="machine-generated">Diagram depicting a multi-step approach used by deep-learning based automated model building methods in cryo-EM.</alt-text>
</graphic>
</fig>
<sec id="s4-1">
<label>4.1</label>
<title>Training datasets and preprocessing</title>
<p>Deep neural networks underlying automated model building methods in cryo-EM are trained and tested on large, labeled datasets comprising experimental and/or simulated density maps (<xref ref-type="table" rid="T4">Table 4</xref>). Since few experimental cryo-EM density maps were available in the EMDB (<xref ref-type="bibr" rid="B41">Consortium, 2023</xref>) during the early years (<xref ref-type="fig" rid="F1">Figure 1</xref>), the first deep learning-based methods for cryo-EM model building relied on simulated or synthetic maps, generated from PDB structures, for training and testing (<xref ref-type="table" rid="T4">Table 4</xref>). Utilities such as <italic>pdb2mrc</italic>, <italic>pdb2vol</italic>, or <italic>molmap</italic> from the EMAN2 package (<xref ref-type="bibr" rid="B14">Bell et al., 2018</xref>), the Situs package (<xref ref-type="bibr" rid="B186">Wriggers, 2010</xref>), or UCSF Chimera (<xref ref-type="bibr" rid="B135">Pettersen et al., 2004</xref>) can be used to simulate cryo-EM density maps from PDB models at different resolutions. As the cryo-EM resolution revolution increased the number of experimental cryo-EM density maps in the EMDB (<xref ref-type="fig" rid="F1">Figure 1</xref>), subsequent deep learning-based methods for cryo-EM model building began to utilize these maps for training and testing (<xref ref-type="table" rid="T4">Table 4</xref>). Simulated maps are still useful in cases where experimental density maps do not provide enough data to train deep learning models for a specific model-building task. Therefore, many cryo-EM model-building methods combine simulated and experimental density maps to train their deep neural networks (<xref ref-type="table" rid="T4">Table 4</xref>). Before training, the input cryo-EM density maps are first preprocessed using resampling and normalization. Resampling involves standardizing the varying voxel sizes of raw density maps to a uniform size. The resampled maps are then normalized to make density values comparable across different maps. Alternatively, publicly available datasets such as Cryo2Struct2Data (<xref ref-type="bibr" rid="B66">Giri et al., 2024</xref>) can be utilized. This dataset is a significant resource comprising 7,600 preprocessed cryo-EM density maps, in which voxels are labeled based on known atomic structures, making it suitable for training and testing deep learning-based model building methods in cryo-EM.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Characteristics of training datasets used by deep learning-based automated model building methods in cryo-EM. (&#x2a;size of train and test sets from the dataset size is not known; <sup>&#x23;</sup>number of cryo-EM structures is not known).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Training data</th>
<th align="left">Method [ref.] (number of structures, resolution (&#x212b;))</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<bold>Experimental</bold>
</td>
<td align="left">
<italic>findMySequence</italic> (<xref ref-type="bibr" rid="B36">Chojnowski et al., 2022</xref>) (117 cryo-EM, &#x3c;4.0; 1000 X-ray, 2.0&#x2013;3.0), <italic>checkMySequence</italic> (<xref ref-type="bibr" rid="B33">Chojnowski, 2022</xref>) (796&#x2a;, &#x3c;4.0), <italic>doubleHelix</italic> (<xref ref-type="bibr" rid="B34">Chojnowski, 2023</xref>) (152 cryo-EM, &#x3c;3.5; 108 X-ray, &#x3c;3.5), <italic>EMSequenceFinder</italic> (<xref ref-type="bibr" rid="B123">Mondal et al., 2025</xref>) (3914, 3.0&#x2013;10.0), Haruspex (<xref ref-type="bibr" rid="B124">Mostosi et al., 2020</xref>) (293, &#x2264;4.0), DeepSSETracer (<xref ref-type="bibr" rid="B125">Mu et al., 2021</xref>) (1216, 5.0&#x2013;10.0), CryoSSESeg (<xref ref-type="bibr" rid="B155">Sazzed, 2024</xref>) (1268, 5.0&#x2013;10.0), EMInfo (<xref ref-type="bibr" rid="B18">Cao et al., 2025</xref>) (90, 2.0&#x2013;10.0), DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) (1440, &#x2264;4.0), DeepTracer-ID (<xref ref-type="bibr" rid="B24">Chang et al., 2022</xref>) and DeepTracer-Refine (<xref ref-type="bibr" rid="B31">Chen et al., 2024b</xref>) (utilizes DeepTracer), SegmA (<xref ref-type="bibr" rid="B154">Rozanov and Wolfson, 2023</xref>) (1237, 3.0 - 3.4), DeepTracer-2.0 (<xref ref-type="bibr" rid="B127">Nakamura et al., 2023</xref>) (1733, &#x2264;4.0), ModelAngelo (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>) (&#x223c;3200, &#x3c;4.0), CryoREAD (<xref ref-type="bibr" rid="B180">Wang et al., 2023</xref>) (290, 2.0&#x2013;5.0), SMARTFold (<xref ref-type="bibr" rid="B105">Li et al., 2023</xref>) (8749, &#x3c;8.0), EMRNA (<xref ref-type="bibr" rid="B108">Li et al., 2025b</xref>) (284, 2.0&#x2013;6.0), Cryo2Struct (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>) and Cryo2Struct2 (<xref ref-type="bibr" rid="B64">Giri and Cheng, 2025</xref>) (6652, 1.0 - 4.0), EmodelX (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>) and EmodelX(&#x2b;AF) (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>) (1529, 2.0&#x2013;4.0), EM2NA (<xref ref-type="bibr" rid="B106">Li et al., 2024</xref>) (257, 2.0&#x2013;5.0), CryFold (<xref ref-type="bibr" rid="B168">Su et al., 2025</xref>) (5158, &#x3c;4.0), DeepCryoRNA (<xref ref-type="bibr" rid="B101">Li and Chen, 2025</xref>) (131, 2.0&#x2013;7.0), DEMO-EM (<xref ref-type="bibr" rid="B202">Zhou et al., 2022</xref>) (26,151<sup>&#x23;</sup>, 2.0 - 20.0), DEMO-EM2 (<xref ref-type="bibr" rid="B195">Zhang et al., 2024</xref>) (utilizes FUpred), FUpred (<xref ref-type="bibr" rid="B199">Zheng et al., 2020</xref>) (849<sup>&#x23;</sup> multi-domain and 1700<sup>&#x23;</sup> single-domain proteins, N/A), EMBuild (<xref ref-type="bibr" rid="B76">He et al., 2022</xref>) (209, 4.0&#x2013;8.0), FFF (<xref ref-type="bibr" rid="B29">Chen et al., 2023</xref>) (&#x223c;2400&#x2a;, 1.0 - 4.0), CrAI (<xref ref-type="bibr" rid="B117">Mallet et al., 2025</xref>) (1000, &#x3c;10.0), DeepMainmast (<xref ref-type="bibr" rid="B172">Terashi et al., 2024</xref>) (197, 2.5&#x2013;5.0), DeepTracer-LowResEnhance (<xref ref-type="bibr" rid="B115">Ma and Si, 2025</xref>) (utilizes DeepTracer and CryoFEM), CryoFEM (<xref ref-type="bibr" rid="B45">Dai et al., 2023</xref>) (1082, 2.5 - 5.0), DiffModeler (<xref ref-type="bibr" rid="B181">Wang et al., 2024</xref>) (230, 5.0&#x2013;10.0), DEMO-EMfit (<xref ref-type="bibr" rid="B17">Cai et al., 2025</xref>) (utilizes DiffModeler), DEMO-EMol (<xref ref-type="bibr" rid="B197">Zhang et al., 2025b</xref>) (156, 2.0&#x2013;5.0), CryoDomain (<xref ref-type="bibr" rid="B46">Dai et al., 2025</xref>) (200,000 domain density maps; 1.0 - 20.0), MICA (<xref ref-type="bibr" rid="B71">Gyawali et al., 2025</xref>) (440, 1.0&#x2013;4.0)</td>
</tr>
<tr>
<td align="left">
<bold>Simulated</bold>
</td>
<td align="left">CNN-classifier (<xref ref-type="bibr" rid="B102">Li et al., 2016</xref>) (15, 8.0), A<sup>2</sup>-Net (<xref ref-type="bibr" rid="B188">Xu et al., 2019</xref>) (1250, 3.0), Cascaded-CNN (<xref ref-type="bibr" rid="B160">Si et al., 2020</xref>) (7024&#x2a;, N/A), Structure Generator (<xref ref-type="bibr" rid="B100">Li, 2020</xref>) (18,515, 1.4&#x2013;1.8)</td>
</tr>
<tr>
<td align="left">
<bold>Experimental (E) and simulated (S)</bold>
</td>
<td align="left">Emap2sec (<xref ref-type="bibr" rid="B116">Maddhuri Venkata Subramaniya et al., 2019</xref>) (43 E, 5.0 - 10.0; 2000&#x2a;S, 6.0 and 10.0), Emap2sec&#x2b; (<xref ref-type="bibr" rid="B179">Wang et al., 2021</xref>) (84 E, 5.0 - 10.0; 755 S, 6.0 and 10.0), EMNUSS (<xref ref-type="bibr" rid="B74">He and Huang, 2021a</xref>) (120 E, 5.0&#x2013;9.5; 468 E, 2.0&#x2013;4.0; 1964 S, 6.0 and 10.0), HaPi (<xref ref-type="bibr" rid="B62">Garcia Condado et al., 2022</xref>) (261 E, &#x2264;5.0; 12,343 S, &#x2264;5.0), AAnchor (<xref ref-type="bibr" rid="B153">Rozanov and Wolfson, 2018</xref>) (24 E, 1.8&#x2013;3.1; &#x223c;3845 S, N/A), DeepMM (<xref ref-type="bibr" rid="B75">He and Huang, 2021b</xref>) (100 E, 2.0&#x2013;5.0; 100 S, 5.0), SEGEM (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>) and SEGEM&#x2b;&#x2b; (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>) (2088 E, 2.0&#x2013;5.0; 41,428 S, 3.0&#x2013;7.0), E3-CryoFold (<xref ref-type="bibr" rid="B182">Wang et al., 2025</xref>) (7000 E, 1.0&#x2013;4.0; 163,284 S, N/A), CR-I-TASSER (<xref ref-type="bibr" rid="B193">Zhang et al., 2022</xref>) (3600 E, 2.1&#x2013;10.0; 3088 S, 1.0&#x2013;15.0), CryoJAM (<xref ref-type="bibr" rid="B20">Carrion et al., 2024</xref>) (256 E, 4.0&#x2013;8.0; &#x3e;256 S, N/A)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Feature learning</title>
<p>During training, deep neural networks automatically learn hierarchical, increasingly abstract representations from preprocessed cryo-EM density maps, enabling them to identify key structural features for model building. Specifically, deep neural networks learn voxel-wise representations from preprocessed cryo-EM density maps that predict multiple structural features such as backbone atom positions, residue types, and secondary structures at each voxel, offering a preliminary, coarse-grained representation of the molecular structure that can guide subsequent model construction. Deep learning-based automated model building methods employ different neural network architectures for feature learning (<xref ref-type="table" rid="T2">Table 2</xref>), with specific details provided for each method in the following sections.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Model building</title>
<p>Since the voxel-wise representations learned by deep neural networks from preprocessed cryo-EM density maps provide only a coarse structural representation, they must be further processed to generate a complete atomic model. This broadly involves backbone tracing, sequence assignment, and full-atom reconstruction. The learned voxel representations serve as a basis for constructing an initial structural model. Many methods represent this as a graph where nodes correspond to residues, secondary structure elements, or small structural fragments, while edges denote spatial proximity or potential chemical bonds, which is then iteratively processed and refined. Backbone tracing generates chains and fragments by connecting predicted backbone atoms while incorporating stereochemical constraints. This is a complex and challenging step due to the factorial growth in the number of ways spatially distributed atoms can be connected, requiring the use of advanced optimization algorithms. Sequence assignment involves correctly threading the biomolecular sequence onto the traced backbone structures, followed by full-atom reconstruction including side chains. Details of the model building step for each method are summarized in <xref ref-type="table" rid="T2">Table 2</xref> and described in the following sections.</p>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Refinement</title>
<p>Deep learning&#x2013;based automated model-building methods often employ established structural biology tools, including <italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>), molecular mechanics force fields, molecular dynamics flexible fitting (MDFF) (<xref ref-type="bibr" rid="B120">McGreevy et al., 2016</xref>) and COOT (<xref ref-type="bibr" rid="B55">Emsley and Cowtan, 2004</xref>), to further refine reconstructed atomic models. These tools utilize energy minimization techniques, apply physical and stereochemical constraints, and incorporate empirical knowledge-based restraints to refine models, thereby ensuring that the biomacromolecule retains correct chemical, stereochemical, and geometric properties including realistic bond lengths and angles as well as minimal steric clashes. The application of <italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>) significantly improves side-chain conformations and map&#x2013;model correlations. In some workflows, this step is followed by additional refinement using molecular dynamics (MD) simulations or energy minimization algorithms, which further optimize the atomic arrangement by simulating physical forces and interactions.</p>
</sec>
<sec id="s4-5">
<label>4.5</label>
<title>Deep learning-based automated model building methods</title>
<p>Biomacromolecules exhibit hierarchical levels of structural complexity and organization, ranging from the linear sequence of ordered residues to the complex three-dimensional spatial arrangement of atoms. (<xref ref-type="sec" rid="s2">Section 2</xref>). In this section, we group deep learning-based automated model-building methods by their ability to predict (<xref ref-type="sec" rid="s4-5-1">Section 4.5.1</xref>) primary, (<xref ref-type="sec" rid="s4-5-2">Section 4.5.2</xref>) secondary, and (<xref ref-type="sec" rid="s4-5-3">Section 4.5.3</xref>) tertiary or quaternary structural aspects of biomacromolecules. Deep learning tools for building atomic models in cryo-EM density maps are further grouped as <italic>de novo</italic> (<xref ref-type="sec" rid="s4-5-3-1">Section 4.5.3.1</xref>), where the model is predicted directly from features learned from the cryo-EM density, or hybrid (<xref ref-type="sec" rid="s4-5-3-2">Section 4.5.3.2</xref>), where it is derived by integrating structural templates with these features. It should be noted that methods grouped under tertiary or quaternary structure also extract structural features at the secondary (such as &#x3b1;-helices and &#x3b2;-sheets) or primary structure level, but their ultimate prediction task is to determine the three-dimensional arrangement of residues and atoms. Methods grouped under secondary structure, in contrast, focus only on predicting secondary structure classes from the input cryo-EM density maps.</p>
<sec id="s4-5-1">
<label>4.5.1</label>
<title>Primary structure prediction</title>
<p>The primary structure of proteins and nucleic acids is essentially their linear sequence of residues covalently linked to each other along the polymer chain. Neural networks can predict residue type probabilities from cryo-EM reconstructions, enabling the assignment and validation of protein or nucleic acid sequences. Tools specifically designed for this task are described below.</p>
<p>
<bold>
<italic>findMySequence</italic>
</bold> is a computer program that identifies protein sequences from cryo-EM reconstructions and crystallographic data (<xref ref-type="bibr" rid="B36">Chojnowski et al., 2022</xref>). To achieve this, it uses machine-learning predicted residue-type probabilities to query sequence databases using <italic>HMMER</italic> suite (<xref ref-type="bibr" rid="B54">Eddy, 2011</xref>). To predict residue-type probabilities from cryo-EM map and main-chain models, <italic>findMySequence</italic> utilizes a neural network with two fully connected hidden layers offering improved performance over a previous support-vector machine-based classifier. To identify a sequence that matches the predicted residue-type probabilities, the probabilities are first converted into a multiple sequence alignment (MSA), then into a profile Hidden Markov Model (profile-HMM) using the <italic>HMMER</italic> suite. This profile-HMM is then used to query a sequence database to find plausible matches (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section). To build side chains for an input main-chain model fragment, <italic>findMySequence</italic> considers all possible alignments of the fragment to the target sequence and selects the most plausible one based on the predicted residue-type probabilities to assign residue types to the fragment.</p>
<boxed-text id="dBox1" position="float">
<label>BOX 1</label>
<caption>
<title>Glossary section.</title>
</caption>
<p>
<bold>SE(3)</bold>: Special Euclidean Group of degree three is a group of all rigid motions in 3D space and combines rotations and translations. It describes transformations that preserve shape and size of objects.</p>
<p>
<bold>Monte Carlo Tree Search (MCTS)</bold>: MCTS is a powerful algorithmic framework used for decision making in sequential decision processes. MCTS builds a search tree by simulating many possible moves and selecting the most promising path.</p>
<p>
<bold>Profile Hidden Markov Model (profile HMM)</bold>: A profile hidden Markov model (profile HMM) is a probabilistic model constructed from a multiple sequence alignment (MSA) of related protein or nucleic acid sequences. It captures both conservation and variability in the alignment by defining match states for conserved positions, insert and delete states for regions where extra residues or gaps are likely, and probabilities that describe which residues are observed at each aligned column. The model is parameterized by two types of probabilities: transition probabilities, which specify the likelihood of moving between match, insert, and delete states as the sequence progresses along the profile, and emission probabilities, which define the likelihood of observing a specific residue from a match or insert state. To assess whether a new sequence belongs to the family, the model computes the probability that the sequence could be generated by traversing the profile&#x2019;s states, emitting the observed residues along the way. The higher this probability, the more likely the new sequence is a member of that family.</p>
<p>
<bold>Dynamic Programming</bold>: Algorithms designed for solving optimization problems by breaking them into simpler, smaller and overlapping subproblems.</p>
<p>
<bold>Constraint Programming</bold>: Techniques developed for solving combinatorial problems where the solution must satisfy a set of constraints.</p>
<p>
<bold>Mean-Shift Algorithm</bold>: Non-parametric clustering technique to locate the highest density points of a distribution or simply identify dense regions in a dataset.</p>
<p>
<bold>Traveling Salesman Problem (TSP) Solver</bold>: The goal of the solver is to find the shortest possible route that visits a set of points exactly once and returns to the starting point as a way of solving the well-known combinatorial optimization TSP problem.</p>
<p>
<bold>Vehicle Routing Problem (VRP) Problem</bold>: The goal of the solver is to optimize routes for multiple vehicles visiting a set of locations or points while satisfying a set of constraints. VRP is a generalization of TSP.</p>
</boxed-text>
<p>
<bold>
<italic>checkMySequence</italic>
</bold> is a new, automated tool designed to quickly and accurately detect register shifts in protein models built into cryo-EM density maps including large macromolecular complexes (<xref ref-type="bibr" rid="B33">Chojnowski, 2022</xref>). To detect register shifts, <italic>checkMySequence</italic> first identifies a reference sequence for each protein chain in the input model. This step uses a protocol from the <italic>findMySequence</italic> program (<xref ref-type="bibr" rid="B36">Chojnowski et al., 2022</xref>), employing a neural network classifier to generate residue-type probability profiles from the cryo-EM density map and input model. These probabilities are then used with the <italic>HMMER</italic> suite (<xref ref-type="bibr" rid="B54">Eddy, 2011</xref>) to query sequence databases, scoring plausible matches. Once reference sequences are established, <italic>checkMySequence</italic> assigns fragments of the input model to them, identifying areas where this new assignment conflicts with the original sequence assignment in the input model. If a discrepancy is found, the method suggests a more plausible sequence assignment.</p>
<p>
<bold>
<italic>doubleHelix</italic>
</bold> is a computer program designed for the assignment, identification, and validation of nucleic acid sequences in structures determined using both cryo-EM and X-ray crystallography (<xref ref-type="bibr" rid="B34">Chojnowski, 2023</xref>). <italic>doubleHelix</italic> combines neural network classifiers to identify nucleobases with a sequence-independent secondary structure assignment approach for comprehensive nucleic acid analysis. The neural network classifiers estimate the likelihood of a nucleotide being a purine or a pyrimidine based on a provided backbone model and its corresponding density map. Each neural network classifier is built with two fully connected hidden layers. After estimating purine and pyrimidine probabilities using the neural network, <italic>doubleHelix</italic> then identifies base pairs in RNA and DNA models by aligning recurring nucleic acid structural motifs to the target model with base-pairing restraints derived from the backbone conformation. This process involves superimposing small search-fragments of known secondary structures onto the input model using an algorithm from the <italic>Brickworx</italic> program (<xref ref-type="bibr" rid="B35">Chojnowski et al., 2015</xref>). For sequence identification, <italic>doubleHelix</italic> leverages the INFERNAL suite (<xref ref-type="bibr" rid="B128">Nawrocki and Eddy, 2013</xref>) to find the most plausible sequence in a database, using the input model&#x2019;s residue-type probabilities and secondary structure. Similarly, it uses these estimated base-type probabilities to assign RNA or DNA models to a target sequence.</p>
<p>
<bold>
<italic>EMSequenceFinder</italic>
</bold> is a deep learning method that assigns amino acid sequences to backbone fragments traced in cryo-EM density maps (<xref ref-type="bibr" rid="B123">Mondal et al., 2025</xref>). The method requires a cryo-EM density map with a resolution better than 8.0 &#xc5;, the corresponding backbone traces fitted or traced into the map (specifying N, C&#x3b1;, C, and O atom positions), and one or more protein sequences. <italic>EMSequenceFinder</italic> quantifies the density map fit of traces using a 3D Convolutional Neural Network (CNN), trained on a large dataset of cryo-EM density maps. The network determines the likelihood of each residue type by extracting features from the side chain voxel intensities. These extracted features are then combined with additional input, such as map resolution and the secondary structure type of the trace containing the residue, to output the probability for each amino acid. <italic>EMSequenceFinder</italic> then assigns protein sequences to backbone fragments by identifying the best-scoring threading among all possible alignments of a sequence to the backbone trace. It achieves this using a Bayesian scoring function that ranks the 20 standard amino acid types at each backbone position, considering the fit to the density map (quantified above using a 3D CNN), map resolution, and secondary structure propensity. The output is the best-scoring threading for each input backbone trace.</p>
</sec>
<sec id="s4-5-2">
<label>4.5.2</label>
<title>Secondary structure prediction</title>
<p>Secondary structure refers to local, stable, and regular conformations of the polymer backbone, such as &#x3b1;-helices and &#x3b2;-sheets in proteins, and regular helical segments in nucleic acids. Deep learning tools specifically designed to predict voxel-wise probabilities of secondary structure types from the cryo-EM density maps are described below in an approximate chronological order.</p>
<p>
<bold>CNN-classifier</bold> is a 3D convolutional neural network (3D CNN) designed to automatically detect secondary structures of proteins in cryo-EM density maps (<xref ref-type="bibr" rid="B102">Li et al., 2016</xref>). It predicts the probability of a voxel belonging to specific protein secondary structure elements (SSEs), such as &#x3b1;-helices, &#x3b2;-sheets, or background. By using 3D convolutions, the model effectively captures 3D spatial information within the protein structures, learning discriminative features automatically from cryo-EM density maps. To enhance efficiency, the CNN-classifier incorporates multiple deconvolution operations at various intermediate layers to generate feature maps of same dimension, which were then summed to create a multi-scale representation. The CNN-classifier combines inception learning and residual learning with dilated convolutions. The inception component uses multiple convolutional layers with different filter sizes to create diverse paths between network hidden layers. This increases the number of trainable parameters at each step without increasing the overall computational complexity. Residual learning employs shortcut identity mappings to simulate nonlinear relationships between input and output layers allowing the network to achieve accurate results without added computational cost. Dilated convolutions, integrated at the reception module, expand the network&#x2019;s receptive field, allowing it to capture information across various scales without losing image resolution.</p>
<p>
<bold>Emap2sec</bold> is a deep learning method designed to identify protein secondary structures (&#x3b1;-helices, &#x3b2;-sheets, and other structures) directly from cryo-EM density maps with resolutions between 5 and 10 &#xc5; (<xref ref-type="bibr" rid="B116">Maddhuri Venkata Subramaniya et al., 2019</xref>). It employs a 3D convolutional neural network (3D CNN) to assign a secondary structure to each grid point. The core of Emap2sec is a two-phase stacked neural network where phase 1 network analyzes the normalized density of a single voxel and outputs probability values for the three secondary structure classes (&#x3b1;-helix, &#x3b2;-sheet and other structure; defined as structure that is not &#x3b1;-helix nor &#x3b2;-sheet). Phase 2 network then refines these initial predictions by incorporating contextual information from neighboring voxels. Ultimately, each voxel is assigned to the secondary structure class with the highest probability among the three types.</p>
<p>
<bold>Emap2sec&#x2b;</bold>, successor of Emap2sec, is designed to identify DNA or RNA in addition to protein secondary structures directly from cryo-EM maps at 5-10 &#xc5; resolution (<xref ref-type="bibr" rid="B179">Wang et al., 2021</xref>). It employs a deep residual convolutional neural network (ResNet), framing the task as a classification problem rather than segmentation due to prior success with Emap2sec and that classification would perform better than segmentation at intermediate resolutions. Emap2sec&#x2b; classifies individual voxels from cryo-EM density maps into one of four structural categories - DNA/RNA or a protein secondary structure (&#x3b1;-helices, &#x3b2;-sheets, other structures). This classification occurs through a two-phase neural network. In Phase 1, an input voxel undergoes five independent evaluations: four binary classifiers each determining the probability of a specific structure at the voxel (DNA/RNA, &#x3b1;-helix, &#x3b2;-sheet or other structures), and a fifth multi-class classifier providing probabilities for all four categories. The probability values from these Phase 1 classifiers are then fed into the Phase 2 network which refines the final probability assignments of the four structure classes for the central voxel by considering the structural predictions of neighboring voxels, which effectively smooths the structural assignment across the entire cryo-EM density map.</p>
<p>
<bold>Haruspex</bold> is a deep learning tool that leverages convolutional neural networks to automatically identify and annotate protein secondary structure elements and RNA/DNA within cryo-EM density maps at an average map resolution of 4.0 &#xc5; or better (<xref ref-type="bibr" rid="B124">Mostosi et al., 2020</xref>). Haruspex utilizes a U-Net-style convolutional neural network. It processes voxel segments through multiple convolutional and pooling layers and extracts features to identify different structural elements, which are later combined to restore spatial detail. The final output provides a probability for each voxel, indicating whether it belongs to an &#x3b1;-helix, &#x3b2;-strand, nucleotide, or is unassigned. This essentially annotates the cryo-EM density map with the locations of these key biomolecular structures.</p>
<p>
<bold>EMNUSS</bold> utilizes a nested 3D U-Net architecture (UNet&#x2b;&#x2b;) to annotate secondary structures in cryo-EM density maps, effective at both intermediate and high resolutions (<xref ref-type="bibr" rid="B74">He and Huang, 2021a</xref>). This design, which starts with an encoder subnetwork followed by a decoder subnetwork, concatenates the outputs of these subnetworks, after which a final convolution layer assigns secondary structure classifications. For each voxel in the annotated region, EMNUSS provides three channels containing probabilities that indicate whether the voxel is close to an &#x3b1;-helix residue, a &#x3b2;-strand residue, or a coil residue. Compared to the U-Net architecture, UNet&#x2b;&#x2b; incorporates dense skip connections on its skip pathways, which significantly enhances gradient flow.</p>
<p>
<bold>DeepSSETracer</bold> employs a neural network architecture of end-to-end convolution operations, adapted from the 3D U-Net architecture (<xref ref-type="bibr" rid="B40">&#xc7;i&#xe7;ek, 2016</xref>), to detect secondary structures within cryo-EM component maps for individual chains at medium resolution, rather than analyzing an entire cryo-EM density map, which may contain multiple chains. (<xref ref-type="bibr" rid="B125">Mu et al., 2021</xref>). This architecture allows DeepSSETracer to process density maps of varying sizes, predicting the probability of each voxel belonging to an &#x3b1;-helix, &#x3b2;-sheet, or other structure. The tool is integrated with ChimeraX (<xref ref-type="bibr" rid="B122">Meng et al., 2023</xref>) in a software bundle that integrates the secondary structure prediction with visualization capabilities of ChimeraX.</p>
<p>Cryo-EM reconstruction can yield two equally consistent, but mirror-image, 3D density maps. Since proteins have a specific handedness, only one reconstruction is correct. Currently, biologists must manually determine this by inspecting the rotations of &#x3b1;-helices within the map, a task that becomes challenging at lower resolutions as helices lose their distinct handedness. <bold>HaPi</bold> (Handedness Pipeline), uses two 3D convolutional neural networks (CNNs) to automatically determine handedness of the cryo-EM density map for resolutions up to 5.0 &#xc5; (<xref ref-type="bibr" rid="B62">Garcia Condado et al., 2022</xref>). The first network, <italic>AlphaVolNet</italic>, identifies the location of &#x3b1;-helices throughout the entire map by processing each non-background voxel. The second network, <italic>HandNet</italic>, then evaluates the overall handedness of the map using a consensus strategy that averages the individual handedness predictions.</p>
<p>
<bold>CryoSSESeg</bold> is a convolutional neural network (CNN) framework designed to identify the organization of protein secondary structure elements within medium-resolution cryo-EM density maps (<xref ref-type="bibr" rid="B155">Sazzed, 2024</xref>). CryoSSESeg works by first isolating individual protein chains from an atomic model and then using their coordinates to extract and mask the corresponding regions in the cryo-EM density map. This chain-based cropping ensures that entire secondary structures are present within the image, which helps the model learn more effectively. The STRIDE secondary structure annotation tool (<xref ref-type="bibr" rid="B77">Heinig and Frishman, 2004</xref>) is then used to label individual voxels within the density map. The network is an adaptation of the 3D U-Net model, featuring a downsampling (<xref ref-type="bibr" rid="B18">Cao et al., 2025</xref>) path (contracting path) that condenses spatial information, a bottleneck layer that further compresses data and enhances feature representation, and an upsampling path (expansive path) that recovers spatial detail. The final layer outputs three channels, each corresponding to one of three classes: &#x3b1;-helix, &#x3b2;-sheet, or background.</p>
<p>
<bold>EMInfo</bold> is a deep learning method that uses a 3D UNet&#x2b;&#x2b; architecture to automatically detect protein secondary structures and nucleic acid locations in cryo-EM density maps (<xref ref-type="bibr" rid="B18">Cao et al., 2025</xref>). UNet&#x2b;&#x2b; is a supervised encoder-decoder network comprising of downsampling, upsampling, and skip connections. This design allows UNet&#x2b;&#x2b; to implement multi-scale feature extraction from input maps without significantly increasing computational cost. EMInfo outputs a four-channel probability for each voxel representing the likelihood of it belonging to different structural categories. For each voxel, the channel with the highest probability is selected as its predicted category, ultimately generating a structure annotation map that directly corresponds to the input density map. EMInfo achieves accurate structural annotation by ignoring background voxels, those with a density value below a specified contour level, ensuring that structural regions are not misclassified as background.</p>
</sec>
<sec id="s4-5-3">
<label>4.5.3</label>
<title>Tertiary and quaternary structure prediction</title>
<p>Tertiary and quaternary structures refer to the three-dimensional arrangement of atoms in a single polymer chain or multiple chains (subunits), respectively. This structural level captures interactions between residues that may be far apart in the sequence. Deep learning tools specifically designed to build 3D atomic models in the input cryo-EM density map are discussed below. They are further grouped as <italic>de novo</italic> (4.5.3.1), where the model is predicted directly from features learned from the cryo-EM density, or hybrid (4.5.3.2), where it is derived by integrating structural templates with these features.</p>
<sec id="s4-5-3-1">
<label>4.5.3.1</label>
<title>
<italic>De novo</italic> model building</title>
<p>In this approach, deep learning tools perform model building directly from voxel-wise backbone (and sometimes sidechain) atom, secondary structure and residue type probabilities learned from the cryo-EM density map to generate 3D atomic models.</p>
<p>
<bold>AAnchor</bold> (amino-acid anchor) is a deep learning method designed to identify and precisely locate high-confidence anchor amino acid residues in high-resolution cryo-EM density maps (at resolutions 3.1 &#xc5; or better) (<xref ref-type="bibr" rid="B153">Rozanov and Wolfson, 2018</xref>). AAnchor uses a detection algorithm to first generate candidate locations for amino acids, filtering out those likely to be inaccurate and a classification convolutional neural network (CNN) to classify these candidate volume cubes into one of 21 labels (20 amino acids plus none). The detected anchor residues can be crucial for various local <italic>de novo</italic> modeling tasks, including accurately positioning secondary structures, modeling loops, and facilitating general fragment-based modeling.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>A<sup>2</sup>-Net</bold> takes a cryo-EM density map and a protein sequence as input and determines the 3D structure of the protein (<xref ref-type="bibr" rid="B188">Xu et al., 2019</xref>). The method employs deep convolutional neural networks (CNNs) to detect amino acids by learning the conformational densities of individual amino acids within the density volume. To further enhance detection, prior knowledge of protein sequences is incorporated by designing a sequence-guided neighbor loss during training. A<sup>2</sup>-Net processes 3D density volumes to generate 3D feature volumes and uses localization network (locNet) and recognition network (recNet) to detect and classify amino acids in 3D space and estimate its pose, which determines the 3D coordinates of atoms in each amino acid. In locNet, a 3D Region Proposal Network (RPN) uses 3D convolutional layers to propose amino acid locations. The RPN classifies valid proposals and estimates their coordinates. In recNet, amino acid proposals are then fed to a new Aspect-ratio Preserved Region of Interest (APRoI) layer (a specialized type of pooling layer), which extracts Regions of Interest (RoI) into fixed cubic which are further processed by 3D convolutional layers to predict its amino acid category. For atomic-level detail, PoseNet, a 3D stacked hourglass network (a type of fully convolutional neural network designed for volumetric data, structured in an encoder&#x2013;decoder style with skip connections), regresses the 3D coordinates of each atom in amino acids, completing the pose estimation. <underline>
<italic>Model building</italic>
</underline>: To construct protein main chains and thus the full molecular structure, A<sup>2</sup>-Net uses Monte Carlo Tree Search (MCTS) algorithm (<xref ref-type="bibr" rid="B157">Shen, 2018</xref>) (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section) which iteratively builds a search tree to effectively search and thread candidate amino acid proposals. To make MCTS search computationally feasible with many proposals, a K-nearest neighbors (KNN) graph is created which connects spatially close amino acid proposals, narrowing the search space for the MCTS. This search process is further optimized by implementing tree pruning which incorporates a convolutional neural network (CNN)-based Peptide Bond Recognition Network (PBNet) to predict peptide bonds between proposals which reduces the edges in the KNN-graph.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>Cascaded-CNN (C-CNN)</bold> is a model building tool that comprises of a series of convolutional neural networks (CNNs), each predicting a specific component of protein structure from the input cryo-EM density maps (<xref ref-type="bibr" rid="B160">Si et al., 2020</xref>). The overall goal of C-CNN is to accurately predict C&#x3b1; atom locations using information from the intermediate secondary structure elements (SSEs) and backbone predictions. C-CNN leverages the fully connected network design and dilated convolutions to classify a full 3D image in a single pass. Dilated convolutional layers increase the receptive field while preserving the size of the input image. C-CNN comprises of three feedforward neural networks, taking cryo-EM density maps as input. SSE CNN predicts &#x3b1;-helices, &#x3b2;-sheets, or loops/turns for each voxel and output a confidence map for each SSE. These three SSE maps along with input density maps serve as input to the Backbone CNN to predict whether each voxel is part of the backbone structure of the protein or not and outputs two backbone confidence maps. C&#x3b1;-Atom CNN then predicts C&#x3b1; atom locations by taking all the previous maps - input density map, SSE and backbone confidence maps - and classifies if a voxel is part of a C&#x3b1; atom or not producing 2 C&#x3b1; atom confidence maps. Each voxel receives a confidence value, with its final classification determined by the highest confidence among the output maps. <underline>
<italic>Model building</italic>
</underline>: The confidence maps from the C-CNN are post-processed using a path-walking technique to generate protein backbone structure with precise C&#x3b1; atom locations. The path-walking algorithm navigates high-confidence backbone regions, connecting C&#x3b1; atoms using a novel tabu-search algorithm that scores movements based on the location&#x2019;s local density prediction confidence and distance, and incorporates backbone torsion angles and known geometrical parameters of secondary structures as weights. The algorithm continues tracing until no suitable C&#x3b1; atoms can be found or previously processed areas are encountered. The output is a PDB file of disconnected C&#x3b1; atom traces. The disconnected traces, representing partial protein backbones, are further refined through path combination and backbone refinement. First, the traces are converted into a graph where C&#x3b1; atoms are nodes and connections are edges. The path combination step then merges these disjoint graphs into a single, fully connected representation of the protein&#x2019;s backbone. After this, a backbone refinement step removes false-positive connections, leaving only accurate C&#x3b1; node and edge connections. Further improvements to the &#x3b1;-helix SSEs of the backbone trace are done using a helix-refinement algorithm. Finally, a novel quality assessment-based combinatorial algorithm is used to map protein sequences onto the reconstructed C&#x3b1; traces, generating full-atom protein structures. This algorithm also reconstructs side-chain atoms using PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>) and SCWRL4 (<xref ref-type="bibr" rid="B92">Krivov et al., 2009a</xref>), based on the C&#x3b1; coordinates of each segment.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>Structure Generator</bold> is a fully automated, template-free deep learning method for protein model building in cryo-EM density maps (<xref ref-type="bibr" rid="B100">Li, 2020</xref>). It uses RotamerNet, a 3D convolutional neural network (CNN) built on the ResNet architecture, to output the predicted amino acid and rotamer identity, along with the proposed coordinates for its C&#x3b1; atom. RotamerNet analyzes the density profiles to propose a set of candidate amino acids and their 3D locations, unconstrained by the known protein sequence. <underline>
<italic>Model building</italic>
</underline>: Structure Generator uses a graph convolutional network (GCN) to create an embedding from initial rotamer-based amino acid identities and predicted candidate 3D C&#x3b1; locations. Following this, a bidirectional long short-term memory (LSTM) module processes this embedding to order and label the candidate identities and atomic positions, ensuring consistency with the input protein sequence to ultimately generate a structural model. The graph convolutional network (GCN) efficiently encodes the output from RotamerNet as a graph, while a bidirectional Long Short-Term Memory (LSTM) module accurately decodes this information to generate a directed amino acid chain.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DeepTracer</bold> is a fully automated deep learning method designed for the rapid <italic>de novo</italic> structure determination of multi-chain proteins directly from high-resolution cryo-EM density maps (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>). Its core is a specialized convolutional neural network (CNN) made up of four connected U-Nets. From the preprocessed cryo-EM density maps, each U-Net predicts a distinct aspect of the protein structure (atoms, backbone, secondary structure elements, and amino acid types). The Atoms U-Net predicts if a voxel contains either a C&#x3b1; atom, a nitrogen (N) atom, a carbon (C) atom, or no atom (four output channels). The Backbone U-Net determines if each voxel belongs to the protein backbone, side chains, or non-protein regions (three output channels). The Secondary Structure U-Net recognizes loops, &#x3b1;-helices, &#x3b2;-sheets and no structure (four output channels). The Amino Acid Type U-Net determines the specific type of amino acid at each voxel (21 output channels, 20 standard amino acids plus no amino acid). <underline>
<italic>Model building</italic>
</underline>: To create an initial model structure, DeepTracer first determines disconnected chains using the output of the Backbone U-Net by identifying connected areas of backbone voxels, with each disconnected area being designated as a separate chain. It then calculates the precise 3D coordinates of each C&#x3b1; atom using output C&#x3b1; channels of the Atoms U-Net. For connecting the C&#x3b1; atoms into continuous chains, DeepTracer uses a modified traveling salesman problem (TSP) algorithm (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section). Instead of simply minimizing distance (as in a traditional TSP), DeepTracer uses a custom confidence function to determine the likelihood of two atoms being connected and the overall goal of the TSP algorithm becomes maximizing the sum of these confidence scores. DeepTracer then uses a custom dynamic programming (DP) algorithm (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section) for protein sequence alignment, aligning segments of the predicted amino acid sequence with the known amino acid sequence of the target protein. Based on the alignment, the initially predicted amino acid types are updated for greater accuracy. DeepTracer then builds the complete protein backbone by adding carbon (C) and nitrogen (N) atoms to the previously placed C&#x3b1; atoms. For this task, it uses U-Net provided confidence maps for C and N atoms and applies molecular mechanics principles specific to peptide chains including planar peptide geometry, to ensure chemically accurate placement. At the final step, DeepTracer predicts sidechains, aiming to accurately position the side-chain atoms for each amino acid. This is achieved using SCWRL4 (<xref ref-type="bibr" rid="B93">Krivov et al., 2009b</xref>), an automated tool that takes the complete protein backbone and amino acid types as input and outputs a sterically plausible protein structure.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DeepTracer-2.0</bold> enhances the capabilities of DeepTracer by incorporating the identification of nucleic acids alongside amino acids from cryo-EM density maps (<xref ref-type="bibr" rid="B127">Nakamura et al., 2023</xref>). DeepTracer-2.0 achieves this through an initial segmentation step that separates the cryo-EM map into distinct macromolecular densities. Following this, the pipeline employs two specialized U-Net architectures: an amino acid U-Net for protein backbone and C&#x3b1; atom determination, and a newly integrated nucleotide U-Net for identifying phosphate (P) and sugar carbon atoms in nucleic acid regions. This nucleotide U-Net, distinct from the amino acid U-Net due to the differing molecular structures of proteins and nucleic acids, predicts the structural aspects of nucleic acids. The Atoms U-Net, with four output channels, identifies whether each input voxel contains a phosphate (P), sugar carbon atoms -C1&#x2032;, C4&#x2032; or no atoms. Concurrently, its Backbone U-Net, with three output channels, determines if a voxel belongs to the sugar-phosphate backbone, the nitrogenous base, or neither. Both U-Nets are optimized for defining the DNA/RNA phosphate backbone. <underline>
<italic>Model building</italic>
</underline>: DeepTracer-2.0 post-processing phase refines the predictions from nucleotide U-Net, the predicted phosphate (P) and carbon atom (C1&#x2032;, C4&#x2032;) positions, to build an accurate sugar-phosphate backbone consistent with DNA/RNA biological principles. This involves reducing spurious phosphate predictions and connecting the remaining ones based on characteristic DNA/RNA geometry, considering the influence of sugar puckers, and utilizing pseudotorsion angles to simplify backbone construction. Finally, the refined phosphate atoms and cryo-EM density map data are fed to Brickworx model (<xref ref-type="bibr" rid="B35">Chojnowski et al., 2015</xref>), which completes the nucleotide modeling by identifying matching double-stranded helical motifs for DNA or extending to recurrent RNA motifs, including single-stranded segments, ensuring the final structure adheres to known nucleic acid conformations. The nucleotide post-processing step allows DeepTracer-2.0 to model the complete nucleotide structure from the cryo-EM density map and sequence data. After independent post-processing to complete each structure, the predicted protein and DNA/RNA models are combined, ultimately yielding a comprehensive model of the entire macromolecular complex.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DeepMM</bold> uses a multi-task Densely Connected Convolutional Network (DenseNet) architecture to construct all-atom models from near-atomic resolution cryo-EM density maps (<xref ref-type="bibr" rid="B75">He and Huang, 2021b</xref>). Compared to CNNs, DenseNets, which connect each layer to all subsequent layers in a feed-forward fashion within each dense block, alleviate the vanishing-gradient problem, and encourage feature reuse while reducing the number of parameters. DeepMM features two embedded DenseNets. DenseNet A simultaneously predicts the probability of main-chain atoms (N, C and C&#x3b1;) and C&#x3b1; positions for each voxel, creating a 3D probability map. From this map, local dense points (LDPs) are then identified using mean-shift algorithm (<xref ref-type="bibr" rid="B19">Carreira-Perpinan, 2006</xref>) and are used by a main-chain tracing algorithm, MAINMAST (<xref ref-type="bibr" rid="B170">Terashi and Kihara, 2018</xref>), to generate possible main-chain paths. MAINMAST connects LDPs to form a minimum spanning tree (MST), in which total distance of connected points is minimized, and iteratively refines this tree structure using a tabu search method (<xref ref-type="bibr" rid="B67">Glover, 1986</xref>) and the longest path within the refined tree is ultimately traced as the main-chain path. DenseNet B then predicts the amino acid and secondary structure types for each main-chain local dense point (LDP) on these main-chain paths. <underline>
<italic>Model building</italic>
</underline>: After DeepMM determines the C&#x3b1; probability, amino acid type, and secondary structure for each main-chain point on the main-chain path, the protein&#x2019;s target sequence is aligned to these main-chain paths using Smith-Waterman dynamic programming (DP) algorithm (<xref ref-type="bibr" rid="B164">Smith and Waterman, 1981</xref>), which evaluates the match between the sequence and the main-chain path using scoring matrices for both amino acid and secondary structure types. The resulting C&#x3b1; models are then ranked based on their alignment scores. Finally, the top-ranked C&#x3b1; models are used to construct the complete all-atom protein structures with the <italic>ctrip</italic> program from the Jackal modeling package (<xref ref-type="bibr" rid="B187">Xiang and Honig, 2001</xref>; <xref ref-type="bibr" rid="B134">Petrey et al., 2003</xref>) and refined using the AMBER package (<xref ref-type="bibr" rid="B22">Case et al., 2025</xref>).</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>SEGEM</bold> is an automated method that quickly and accurately builds protein backbone structures from cryo-EM density maps (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>). SEGEM employs 3D convolutional neural networks to predict both C&#x3b1; locations and their amino acid types simultaneously from cryo-EM density maps. The CNN model employed for this task involves an initial image preprocessing step. The sampled sub-images are fed into three separate CNN models. Each model has a specific prediction task: one for C&#x3b1; identification which generates a predicted C&#x3b1; probability density map, another for amino acid type prediction, and a third for secondary structure prediction. Non-Maximum Suppression (NMS) algorithm is applied to pick out local maximum C&#x3b1; probability density voxels as predicted C&#x3b1; sites. These sites then have their amino acid types predicted, a vital step for accurately assigning them to the overall protein sequence. <underline>
<italic>Model building</italic>
</underline>: At the model construction step, SEGEM uses a highly parallel pipeline to efficiently match CNN predicted sites to the native protein sequence using a score matrix. Specifically, C&#x3b1; local tracing connects predicted C&#x3b1;s with its neighbors to form continuous traces. Next, these traces are assigned to protein sequence segments by calculating a matching score, which leverages predicted amino acid types to create an amino acid scoring matrix. For any unassigned segments, protein threading using a breadth-first search with pruning strategy is employed for faster processing ultimately yielding a complete model of C&#x3b1; coordinates aligned to the protein sequence.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>SegmA</bold> is a novel deep neural method for cryo-EM density map visualization and protein modeling (<xref ref-type="bibr" rid="B154">Rozanov and Wolfson, 2023</xref>). It works by performing residue type segmentation, labeling and color-coding voxels in a cryo-EM density map based on whether they represent specific amino acids or nucleic acids. This color-coded visualization helps with both manual and automated modeling. Beyond visualization, SegmA can also predict amino acid centers of mass, score how well a protein template fits a map, and assist in <italic>de novo</italic> modeling of protein complexes. SegmA consists of a cascade of convolutional neural networks (CNNs) and group rotational equivariant CNNs (G-CNNs) to label voxels in a cryo-EM density map to one of the following categories: 20 amino acids, nucleotide, background or unconfident (uncertain). A G-CNN is rotation equivariant, unlike traditional CNNs which are only translation equivariant, meaning the feature maps transform accordingly when the input is rotated or translated. The Classification Net (CLF-NET), a G-CNN, performs an initial labeling of the processed volume. Its output is then passed to the Segmentation Net (SEG-NET), a U-Net CNN with contraction path (encoder) and an expansion path (decoder), which performs the final labeling of the voxels. Lastly, the Confidence Net (CNF-NET), another G-CNN, evaluates results from the SEG-NET assigning a binary confidence label to each voxel and only reporting the correct ones.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>ModelAngelo</bold> is a deep-learning tool that automates atomic model building in cryo-EM density maps at resolutions better than 4.0 &#xc5; (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>; <xref ref-type="bibr" rid="B86">Jamali, 2022</xref>). It generates protein models comparable to those built by human experts and creates highly accurate backbones for nucleic acid models. Additionally, ModelAngelo also identifies protein chains in cryo-EM density maps facilitating visual exploration of proteomes. ModelAngelo employs a modified feature-pyramid network (<xref ref-type="bibr" rid="B109">Lin, 2017</xref>) which is a convolutional neural network (CNN) to predict the approximate positions of protein and nucleic acid residues within the cryo-EM map. Specifically, it determines whether each voxel in the map contains an C&#x3b1; atom of an amino acid, a phosphorus (P) atom of a nucleic acid residue, or neither. This process effectively initializes the graph representation, in which each residue is a node, and edges are formed between each residue and its nearest neighbors, by identifying potential residue positions. <underline>
<italic>Model building</italic>
</underline>: ModelAngelo employs a graph neural network (GNN) to optimize residue positions and orientations, predicts amino/nucleic acid identity, and determines side chain/base torsion angles. This GNN comprises of three modules - a cryo-EM module, a sequence module, and an invariant point attention (IPA) module - where each module refines a node associated residue feature vector by integrating new information, progressively extracting more detail from the various inputs. The cryo-EM module within the GNN extracts and integrates information from the cryo-EM density map to refine residue representations. It allows the GNN, using convolutional neural networks (CNNs), to analyze the cryo-EM density around each C&#x3b1; atom and the density connecting it to neighboring nodes, thereby updating its internal representation. It uses a cross-attention mechanism to allow each residue to exchange information with its 20 closest neighbors and is driven by how connected the cryo-EM density appears between them. Unlike self-attention, which dynamically accesses all parts of the same input sequence and effectively captures long-range dependencies, cross-attention enables information flow between different data modalities. Concurrently, the module extracts a cubic section of the cryo-EM density map around current residue position which is processed by another CNN. The features extracted from this CNN are then combined with the output from the cross-attention. This combined information is used to predict amino and nucleic acid identities and outputs the updated residue feature vector. The sequence module, implemented as a Transformer module within ModelAngelo, integrates sequence information into the GNN. It performs cross-attention for each residue with the user-provided amino acid sequences, which are embedded using the pre-trained ESM-1b protein language model (<xref ref-type="bibr" rid="B149">Rives et al., 2021</xref>). The output from this cross-attention is then used in two ways: a dedicated MLP (multi-layer perceptron) generates predictions for amino and nucleic acid identities, and a second MLP generates the updated residue feature vector of the sequence module. The IPA module in the GNN integrates geometric information from the nodes in the graph. It allows the model to learn the topology of neighboring residues, such as secondary structure, by assessing their spatial relationships. ModelAngelo generates the complete atomic model by post-processing residue feature vectors. These vectors serve as input to two separate MLPs that predict position and orientation for each residue, along with torsion angles for amino acid side chains and nucleic acid bases. Predictions for amino/nucleic acid identities, derived from the cryo-EM and sequence modules, are averaged to create probability distributions. These probabilities form a Hidden Markov Model (HMM) profile (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section), which is then used with <italic>HMMER</italic> (<xref ref-type="bibr" rid="B54">Eddy, 2011</xref>) to search against input sequences. The parameters of the profile HMM are estimated from ModelAngelo predictions. Residues matching the sequence are updated, and separate chains are connected based on sequence alignment and proximity, chains shorter than four residues are removed. Finally, a complete atomic model is generated by integrating the predicted positions and orientations of each residue with their corresponding amino acid or nucleic base torsion angles, utilizing idealized geometries to ensure structural accuracy. The refined coordinates are then fed back into the GNN for three additional recycling iterations to further improve the accuracy of the model. ModelAngelo optimizes atomic positions using an L-BFGS optimizer (<xref ref-type="bibr" rid="B114">Liu and Nocedal, 1989</xref>). This final relaxation step removes unnatural side-chain distances and steric clashes.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>CryoREAD</bold> employs a two-stage deep neural network for reconstructing nucleic acid structures from cryo-EM density maps at resolutions from 2.0 to 5.0 &#xc5; (<xref ref-type="bibr" rid="B180">Wang et al., 2023</xref>). The Stage 1 network uses a cascaded, two-stage U-Net architecture, concatenating two 3D U-shape-based convolutional network (UNet) models with full-scale skip connections. The first of these U-Nets focuses on detection of sugar, phosphate, base, and protein, while the second U-Net specifically predicts individual base types (A, C, G, and T/U), leveraging information passed from the first U-Net&#x2019;s encoder. The Stage 2 network then refines these initial probabilities (protein, phosphate, sugar, base, and the four base types) and generates more accurate outputs. CryoREAD applies the mean-shift algorithm (<xref ref-type="bibr" rid="B19">Carreira-Perpinan, 2006</xref>) to cluster grid points that exceed specific probability thresholds to identify representative sugar nodes which are then connected into a graph. Edges are established between sugar nodes based on their probability and inter-node distance. <underline>
<italic>Model building</italic>
</underline>: CryoREAD uses vehicle routing problem (VRP) solver (<xref ref-type="bibr" rid="B143">Psaraftis, 1988</xref>) (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section) to trace the nucleic acid backbone(s) from the predicted sugar graph. Unlike the traveling salesman problem (TSP) solver, VRP employs multiple &#x201c;vehicles&#x201d; to visit nodes, allowing for the identification of multiple, non-overlapping paths within the graph. This approach is well-suited for cryo-EM density maps that may contain multiple nucleic acid chains, as the VRP solver aims to maximize visited nodes while minimizing total route costs. CryoREAD assigns nucleic acid sequences to the sugar nodes within the traced paths using two sub-steps: assigning base sequence fragments to paths and assembling them. Initially, sugar backbone paths are segmented and these segments are then aligned with the nucleic acid sequence using a dynamic programming (DP) algorithm (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section) to identify the top candidate sequence fragments. Subsequently, a constraint programming (CP) solver (<xref ref-type="bibr" rid="B151">Rossi, 2006</xref>) assembles these assigned sequence fragments. This solver aims to maximize the combined probability score of sugar nodes with their assigned bases while ensuring consistency across overlapping path segments and the nucleic acid sequences. At this point, the model comprises the sugar backbone and bases linked to representative sugar nodes. The final step involves incorporating nearby phosphate and base nodes that meet specific distance criteria into the sugar nodes. The output models are then refined using a two-step process: an initial refinement of predicted RNA/DNA regions using <italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>), followed by all-atom refinement in COOT (<xref ref-type="bibr" rid="B55">Emsley and Cowtan, 2004</xref>).</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>SMARTFold</bold> is a deep learning protein structure prediction model that integrates cryo-EM density map features with sequence alignment features to accurately predict protein folds and outputs full atomic structure requiring no additional post processing steps (<xref ref-type="bibr" rid="B105">Li et al., 2023</xref>). SMARTFold first uses the protein sequence into the AlphaFold-Multimer (<xref ref-type="bibr" rid="B58">Evans et al., 2022</xref>) data pipeline to generate initial MSA and residue pair representations. Simultaneously, from the raw cryo-EM density map, a 3D U-Net extracts a representative point cloud, which captures backbone confidence map from the sparsely populated 3D EM density map. From this map, support points are sampled along predicted high-confidence backbone areas. The geometric features of these support points are then extracted and embedded into a point-pair representation. To maintain the relationship between these support points and the protein residues, a point-residue pair representation is introduced. Finally, these geometric features are integrated with the sequence alignment features as input for the protein folding prediction. <underline>
<italic>Model building</italic>
</underline>: SMARTFold introduced EMformer, a novel module that integrates geometric features with sequence alignment features to predict protein structure. Within EMformer, MSA, residue pair, point-residue pair, and point pair representations all exchange and update their information. Finally, an AlphaFold2-inspired (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) structure module predicts the atomic structure. Like AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), these learned representations can be recycled to further enhance model performance.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>EMRNA</bold> is a deep learning-based method designed to automatically and accurately determine full-length, all-atom RNA structures directly from cryo-EM density maps (resolutions ranging from 2.0 to 6.0 &#xc5;) and RNA sequence as inputs (<xref ref-type="bibr" rid="B108">Li T. et al., 2025</xref>). EMRNA utilizes a Swin-Conv-UNet (SCUNet) deep learning architecture (<xref ref-type="bibr" rid="B194">Zhang et al., 2023</xref>) to predict the probability of RNA phosphate (P), C4&#x2032;, and N1/N9 atom positions, along with their corresponding nucleotide types, for every voxel in the RNA cryo-EM density map. The SCUNet architecture is built with three encoder, one transition, and three decoder Swin-Conv (SC) blocks, linked by skip connections. The &#x201c;Swin&#x201d; component - a shifted window transformer&#x2014;excels at nonlocal modeling while the &#x201c;Conv&#x201d; part, a convolutional network, provides efficient local modeling. This combination gives the SC block a significant advantage over traditional convolutional neural networks, as it can effectively capture both local and long-range structural information from the cryo-EM density maps. The local maximums identified using mean-shift algorithm, within the predicted P and C4&#x2019;s probability maps are then used to identify main-chain points (MCPs) which represent potential atom locations. <underline>
<italic>Model building</italic>
</underline>: EMRNA constructs the RNA backbone by threading SCUNet derived MCPs into multiple backbone traces by solving a traveling salesman problem (TSP) (<xref ref-type="bibr" rid="B78">Helsgaun, 2000</xref>) (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section), with diverse trace types sampled from P, C4&#x2032;, or combined positions. These traces are then scored by aligning them with the RNA sequence using a Smith&#x2013;Waterman dynamic programming algorithm (<xref ref-type="bibr" rid="B164">Smith and Waterman, 1981</xref>) and incorporating predicted secondary structure information, C4&#x2032; probabilities and nucleotide type assignments to remove incorrect paths. The most probable C4&#x2032; trace is selected via sequence alignment, after which P atoms are placed along it and N1/N9 positions are located from their respective probability maps. Once the coarse-grained backbones are established, EMRNA constructs the full-atom RNA structure. It does this by rigidly aligning A, U, G, and C nucleotide coordinates, extracted from ideal A&#x2013;U and G&#x2013;C pairings, onto the backbone using Kabsch superposition (<xref ref-type="bibr" rid="B90">Kabsch, 1976</xref>), based on their P, C4&#x2032;, and N1/N9 atoms. The process then detects possible base pairings using inter-C4&#x2032; distances, followed by detecting helices and further refinement of the base-pair conformations. The output model is energy minimized using the AMBER package (<xref ref-type="bibr" rid="B22">Case et al., 2025</xref>) and further refined using <italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>).</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: Unlike EMRNA which is specific to RNA, <bold>EM2NA</bold> works with cryo-EM density maps of protein-DNA/RNA or multi-chain DNA/RNA complexes at &#x3c; 5.0 &#xc5; resolutions and uses deep learning to automatically build all-atom nucleic acid structures including DNA (<xref ref-type="bibr" rid="B106">Li et al., 2024</xref>). EM2NA is built on a two-stage Swin-Conv-UNet (SCUNet) network architecture (<xref ref-type="bibr" rid="B194">Zhang et al., 2023</xref>). The SCUNet uniquely combines Swin Transformer blocks, which excel at non-local modeling, with Convolutional Network blocks, known for their efficient local modeling. This hybrid approach allows EM2NA to leverage both local and non-local learning capabilities, outperforming traditional CNNs. In stage-1 SCUNet, EM2NA processes raw cryo-EM maps to segment and detect DNA/RNA regions, distinguishing them from protein and background. The identified nucleic acid density is then fed into the stage-2 SCUNet. Here, the network predicts nucleotide information, including the precise positions of P, C4&#x2032;, and N1 or N9 atoms, as well as their corresponding nucleotide types at each voxel. Finally, the backbone atom probabilities generated by the stage-2 SCUNet are converted into 3D points by detecting the local maxima using a mean-shift algorithm. <underline>
<italic>Model building</italic>
</underline>: Unlike EMRNA which uses a TSP solver, EM2NA employs a Vehicle Routing Problem (VRP) algorithm (<xref ref-type="bibr" rid="B79">Helsgaun, 2017</xref>) to trace SCUNet generated initial P and C4&#x2032; points into multiple backbone paths. Since VRP allows multiple paths for traveling, it is ideal for constructing multi-chain DNA/RNA structures. Determining the correct direction for each path is straightforward, leveraging the known nucleotide geometries. Once the backbone paths are established, a Smith-Waterman algorithm (<xref ref-type="bibr" rid="B164">Smith and Waterman, 1981</xref>) is used to assign sequences to each backbone. This assignment is further refined by considering base pairing in double helices and helical geometry. Finally, with the built DNA/RNA backbone paths and assigned nucleotide types, the full-atom DNA/RNA structure is built by aligning template nucleotide conformations onto the P-C4&#x2032;-N1/N9 backbone using the Arena algorithm (<xref ref-type="bibr" rid="B133">Perry et al., 2023</xref>).</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>Cryo2Struct</bold> automatically generates atomic protein structures from medium and high-resolution cryo-EM density maps and corresponding amino acid sequences (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>). It initiates this process by using two distinct 3D transformer-based deep learning models to classify each voxel in the cryo-EM density map. One model identifies backbone atom types (C&#x3b1;, C, N, or no atom), while the other predicts the amino acid type (20 standard amino acids and the absence of an amino acid or unknown amino acid). These models are trained as sequence-to-sequence predictors, leveraging a transformer-encoder to capture long-range voxel-voxel dependencies and a skip-connected decoder (like a U-Net) for feature integration and classification. The training was conducted on the extensive Cryo2StructData dataset (<xref ref-type="bibr" rid="B66">Giri et al., 2024</xref>), which contains 6,652 cryo-EM maps for training and 740 for validation, followed by blind testing on two separate datasets. After prediction, a clustering strategy is applied to group spatially close C&#x3b1; voxel predictions within a 2.0 &#xc5; radius, selecting the centrally located voxel to represent the final C&#x3b1; atom and eliminate redundancy. <underline>
<italic>Model building</italic>
</underline>: To connect the predicted C&#x3b1; atoms into protein chains and accurately assign their amino acid types, Cryo2Struct employs an innovative Hidden Markov Model (HMM) (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section) where each predicted C&#x3b1; atom represents a hidden state. These hidden states are fully connected, with transition probabilities determined by the spatial distance between corresponding C&#x3b1; atoms. The likelihood of each hidden state emitting a specific amino acid is based on the predicted probabilities for that C&#x3b1; atom. Next, a customized Viterbi algorithm (<xref ref-type="bibr" rid="B61">Forney, 1973</xref>) aligns sequence of the target protein (or sequence for each chain for multi-chain proteins) to this HMM. This generates the most probable path of hidden states (C&#x3b1; atoms), and the path for the aligned chain represents the connected C&#x3b1; atoms and thus the backbone structure of the protein. For multi-chain proteins, these individual chain paths, combined with their aligned sequences, create the complete atomic backbone. The customized Viterbi algorithm ensures that each C&#x3b1; position is used only once in the aligned path. This is crucial because each C&#x3b1; corresponds to a single amino acid in the protein sequence. This HMM-based approach excels at assigning every amino acid of the protein to a C&#x3b1; position, assuming enough predicted C&#x3b1; atoms are present enabling Cryo2Struct to build very complete structural models from density maps.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>EModelX</bold> is a method that constructs protein complex structure models from cryo-EM density maps and protein sequences using cross modal alignment (<xref ref-type="bibr" rid="B30">Chen S. et al., 2024</xref>). It normalizes the cryo-EM density map and feeds it into a multi-task 3D residual U-Net. This U-Net incorporates skip-connections, which help maintain resolution despite max-pooling and address the vanishing gradient issue common in deep networks. The network predicts the distributions of C&#x3b1; atoms, backbone atoms, and amino acid types. C&#x3b1; candidates are identified from the predicted C&#x3b1; distribution using point-cloud clustering and non-maximum suppression (NMS). <underline>
<italic>Model building</italic>
</underline>: EModelX uses predicted distributions of C&#x3b1; atoms and amino acid types to sample C&#x3b1; traces and generate sequence profiles from cryo-EM density maps. A C&#x3b1;-sequence aligning score matrix is created, and high-confidence alignments are used to build an initial model, incorporating connectivity and sequence registration. Unmodeled gaps are then filled using a sequence-guiding C&#x3b1; threading algorithm to build the C&#x3b1; backbone model of protein complex, followed by full atom construction using PULCHRA tool (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>) which is further refined using <italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>). <bold>EModelX(&#x2b;AF)</bold>, which combines EModelX with AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) can perform template-based modeling and refine AlphaFold&#x2019;s incorrectly folded structures. C&#x3b1; traces are sampled from both the EModelX predicted C&#x3b1; atoms and the AlphaFold2 predicted structure. The comparison of the structural similarity of these traces further improves the C&#x3b1;-sequence alignment score and enhance sequence-guiding C&#x3b1; threading.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>CryFold</bold> (<xref ref-type="bibr" rid="B168">Su et al., 2025</xref>) introduces a novel approach for <italic>de novo</italic> model building for cryo- EM density maps, leveraging key advancements from AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) and ModelAngelo (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>). CryFold accelerates automated protein model building and produces more complete models while reducing the requirement for map resolution using two-step processes. First, a 3D convolution-based network U-Net takes the cryo-EM density map as input and output a probability map, with each voxel representing the likelihood of containing a C&#x3b1; atom. The U-Net architecture features a bottleneck structure in its encoder to preserve spatial information during downsampling. Its decoder utilizes the Res2Net architecture to extract rich semantic information during upsampling. These two types of information are then combined to generate the final C&#x3b1; atom probability map. The initial C&#x3b1; atom coordinates are then refined using the mean-shift algorithm to obtain a precise set of C&#x3b1; atom coordinates. <underline>
<italic>Model building</italic>
</underline>: CryFold then constructs the complete all-atom structure from the input density map, amino acid sequences, and predicted C&#x3b1; atoms using an enhanced transformer network called Cry-Net. Cry-Net comprises of two transformer-based modules. Cryformer (encoder) transforms the density map into initial node and edge representations guided by the predicted C&#x3b1; atom positions. The Structure Module (decoder) generates all-atom positions from these refined representations. Cry-Net iteratively updates these key protein structure representations through local attention mechanism leveraging spatial restraints from the density map. Cryformer processes initial node and edge representations along with ESM-2 sequence embeddings (<xref ref-type="bibr" rid="B110">Lin et al., 2023</xref>). Its core components include sequence attention, which integrates protein sequence information into node features via a cross-attention mechanism using sequence embeddings from ESM-2 (<xref ref-type="bibr" rid="B110">Lin et al., 2023</xref>). Node and edge attention update their respective representations through self-attention. A 3D Rotary Position Embedding (3D-RoPE) effectively encodes each node&#x2019;s positional information into all attention calculations, leveraging the inherent spatial constraints from the density map. Cryformer then assigns each node to one of the 20 amino acids by using information from the original node representation (density map of the node), updated node representation (incorporating neighboring node information), and sequence data from the sequence attention layer. A separate multi-layer perceptron (MLP) generates amino acid probability vectors for all nodes. The Structure Module decodes the updated representations from Cryformer into an all-atom structure. It predicts backbone frames and torsion angles using Cryformer updated node and edge representations. The module employs a self-attention mechanism on node features, restricted to a node and its nearest neighbors using constraints from the density map, with 3D-RoPE integrating positional information into attention scores. Finally, all-atom positions for each node are generated from the predicted backbone frame, backbone and side-chain torsion angles, and amino acid type. These positions undergo a post-processing step, similar to ModelAngelo (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>).</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DeepCryoRNA</bold>, is a novel deep learning-based method designed for automated reconstruction of RNA 3D structures from protein-free cryo-EM density maps at resolutions 6 &#xc5; or better (<xref ref-type="bibr" rid="B101">Li and Chen, 2025</xref>). DeepCryoRNA employs a MultiResUNet neural network (<xref ref-type="bibr" rid="B84">Ibtehaz and Rahman, 2020</xref>), a variant of U-Net architecture, to predict 18 types of RNA atoms (12 backbone, six base) from preprocessed cryo-EM density maps. The encoder-decoder structure of MultiResUNet employs a multi-resolution design allowing it to integrate both local and global information for superior image segmentation. After prediction, atoms are clustered to remove redundancy from neighboring voxel predictions. <underline>
<italic>Model building</italic>
</underline>: DeepCryoRNA constructs nucleotides based on the clustered atoms predicted from MultiResUNet by factoring in atom classes and pairwise atomic distances. It identifies nucleotide types by analyzing base atom classes and their quantities. The process then links neighboring nucleotides into short chains, and these short chains are further connected to form complete long chains. Multiple complete chains can be derived, representing various connection pathways. The model uses a modified Gotoh algorithm (<xref ref-type="bibr" rid="B68">Gotoh, 1982</xref>) for global sequence alignments to match these complete long chains with native RNA sequences. DeepCryoRNA then selects the top 10 alignment results to assign native chain information to the corresponding complete long chains, yielding 10 all-atom RNA structures. These structures undergo post-processing, including energy minimization ultimately generating refined RNA 3D structures. The all-atom RNA structures are then refined using QRNAS software (<xref ref-type="bibr" rid="B166">Stasiewicz et al., 2019</xref>), to fix broken bonds and resolve steric clashes between atoms.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>E3-CryoFold</bold> is an efficient, end-to-end deep learning method that takes a cryo-EM density map and corresponding protein sequence as input and provides a one-shot inference to output the complete atomic structure (<xref ref-type="bibr" rid="B182">Wang et al., 2025</xref>). E3-CryoFold concurrently uses 3D and sequence transformers to extract features from density maps and protein sequences, respectively. While self-attention captures long-range dependencies within each modality, cross-attention modules integrate information between them. In E3-CryoFold, cross-attention is used to integrate spatially contextualized information from density maps into the sequence representation facilitating the integration of information from both modalities. To ensure this integration, E3-CryoFold embeds both modalities into a shared hidden space using 3D and sequence encoders. <underline>
<italic>Model building</italic>
</underline>: E3-CryoFold constructs the final 3D atomic models using an SE (3)-equivariant (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section) graph neural network, SE (3) GNN, which is conditioned on the extracted combined spatial-sequential features. SE (3)-equivariant GNN is a graph neural network for 3D data that ensures predictions transform consistently when the input is rotated or translated in 3D space. E3-CryoFold reconstructs the protein backbone by first initializing random coordinates and building a k-nearest neighbors (kNN) graph to define local spatial relationships between residues. Node embeddings are derived from integrated spatial and sequence features which are generated by combining spatial information from the cryo-EM density map with sequence information. These embeddings capture both local and global protein features, allowing the model to utilize the inherent relationship between a protein&#x2019;s sequence and its 3D structure. Each residue&#x2019;s local frame (orientation and position) is iteratively updated by an SE (3) GNN, which aggregates relative rotation and translation information from neighbors. This process ensures the backbone reconstruction respects geometric relationships and spatial transformations, ultimately allowing the recovery of 3D coordinates for all backbone atoms.</p>
</sec>
<sec id="s4-5-3-2">
<label>4.5.3.2</label>
<title>Hybrid model building</title>
<p>In this approach, deep learning tools generally integrate voxel-wise backbone (and sometimes sidechain) atom, secondary structure and residue type probabilities learned from the cryo-EM density map with template structures or fragments (such as those predicted by AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) or derived from the PDB (<xref ref-type="bibr" rid="B190">Zardecki et al., 2016</xref>)) to accomplish model building.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>SEGEM&#x2b;&#x2b;</bold>, an enhanced version of the SEGEM method integrates AlphaFold2 (AF2) (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) protein structure prediction algorithm with data from cryo-EM density maps (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>). This allows SEGEM&#x2b;&#x2b; to not only identify accurately folded regions within AF2 structures by utilizing SEGEM predicted C&#x3b1; probability densities from cryo-EM density maps, but also to correct incorrectly folded areas through protein threading on the cryo-EM map itself. <underline>
<italic>Model building</italic>
</underline>: SEGEM&#x2b;&#x2b; calculates a confidence score for each C&#x3b1; in the AF2 structure, based on its alignment with the SEGEM predicted C&#x3b1; probability density map. This allows SEGEM&#x2b;&#x2b; to identify high-confidence, correctly folded AF2 structure fragments. These reliable fragments then serve as an improved base model, guiding subsequent protein threading on the predicted C&#x3b1; probability map to build a more accurate final protein structure. In essence, SEGEM&#x2b;&#x2b; leverages strong AF2 predictions to refine its cryo-EM model, while simultaneously using cryo-EM data to validate and correct any inaccuracies in the AF2 predictions.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>CR-I-TASSER</bold> (cryo-EM iterative threading assembly refinement) is a hybrid method that combines deep neural-network learning with I-TASSER assembly simulations to automate cryo-EM structure determination (<xref ref-type="bibr" rid="B193">Zhang et al., 2022</xref>). It uses multithreading algorithms to identify templates from the Protein Data Bank (PDB) (<xref ref-type="bibr" rid="B190">Zardecki et al., 2016</xref>), aiding structural assembly. CR-I-TASSER employs a 3D convolutional neural network (CNN) with a residual architecture to create sequence-order-independent C&#x3b1; atom trace models from cryo-EM density maps to improve threading template quality. <underline>
<italic>Model building</italic>
</underline>: CR-I-TASSER employs deep learning-based template refinement and regeneration, and density map-guided structural reassembly simulations. Using local meta-threading server (LOMETS) (<xref ref-type="bibr" rid="B198">Zheng et al., 2019</xref>), CR-I-TASSER derives threading templates from the PDB. The 3D CNN predicted C&#x3b1; conformation then refines threading templates through multiple heuristic iterative algorithms that align query and template sequences with the C&#x3b1; conformation for template reselection and C&#x3b1; trace regeneration. Finally, guided by cryo-EM density map correlations and deep-learning derived template restraints, the iterative threading assembly refinement method (I-TASSER) method (<xref ref-type="bibr" rid="B189">Yang et al., 2015</xref>) assembles full atomic structures which are then refined using fragment-guided molecular dynamics (<xref ref-type="bibr" rid="B192">Zhang et al., 2011</xref>).</p>
<p>
<underline>
<italic>Model prediction</italic>
</underline>: <bold>DEMO-EM</bold> (domain enhanced modeling using cryo-electron microscopy) is an automated method designed to assemble accurate full-length structural models of multi-domain proteins from cryo-EM density maps by integrating single-domain modeling and deep residual network learning techniques with progressive domain assembly and refinement procedure (<xref ref-type="bibr" rid="B202">Zhou et al., 2022</xref>). DEMO-EM uses D-I-TASSER (<xref ref-type="bibr" rid="B200">Zheng et al., 2025</xref>), which incorporates deep learning-based spatial restraints (including inter-residue contact and hydrogen-bonding potentials) into its iterative threading assembly simulations to generate an initial structural model for each domain. Meanwhile, inter-domain distances are predicted by DomainDist, a deep convolutional neural-network architecture with ResNet basic blocks. DomainDist guides the assembly of domain orientations by providing inter-domain distance maps. Each individual domain model generated by D-I-TASSER is independently fitted to the density map using a quasi-Newton optimization algorithm, Limited-memory Broyden&#x2013;Fletcher&#x2013;Goldfarb&#x2013;Shanno (L-BFGS) (<xref ref-type="bibr" rid="B114">Liu and Nocedal, 1989</xref>). Since L-BFGS is a local optimization method, simulations are initiated from multiple starting positions to identify the best location and orientation of the domain with the highest correlation with the density map. The initial full-length models are then optimized through a two-step assembly and refinement process. Model-density correlations primarily guide the domain assembly and refinement simulations. Following a rigid-body Replica Exchange Monte Carlo (REMC) simulation, the top scoring model from this stage is then refined further by flexible assembly, which incorporates atom, segment, and domain-level refinements using REMC simulation guided by the density correlation and inter-domain distance profiles. Finally, the lowest-energy model undergoes side-chain repacking with FASPR (<xref ref-type="bibr" rid="B83">Huang et al., 2020</xref>) to create the final model which is refined by fragment-guided molecule dynamics (FG-MD) simulations (<xref ref-type="bibr" rid="B192">Zhang et al., 2011</xref>). DEMO-EM can also assemble domain structures generated by any method other than D-I-TASSER. <bold>DEMO-EM2</bold>, an improved version of DEMO-EM, is an automated method for constructing protein complex models from cryo-EM density maps (<xref ref-type="bibr" rid="B195">Zhang et al., 2024</xref>). Unlike DEMO-EM, which is designed for multi-domain proteins, DEMO-EM2 focuses specifically on assembling protein complexes through an iterative assembly procedure. Instead of using D-I-TASSER, DEMO-EM2 employs AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>), due to its outstanding performance in protein structure prediction, to derive model of each individual chain. DEMO-EM2 incorporates several advancements over DEMO-EM including preprocessing the density map to reduce interference from noise during chain or domain fitting and using a differential evolution (DE) algorithm (<xref ref-type="bibr" rid="B167">Storn and Price, 1997</xref>) in addition to the quasi-Newton optimization, preventing it from getting trapped in local optima. Further, it also masks out density map regions that have already been matched with chain models, ensuring different chains do not align the same areas.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DeepTracer-ID</bold> is a server-based, <italic>de novo</italic> protein identification method that uses high-resolution cryo-EM density maps (better than 4.2 &#xc5; resolution) to identify candidate proteins within a user-specified organism without requiring additional information (<xref ref-type="bibr" rid="B24">Chang et al., 2022</xref>). It achieves this by using DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>), a deep learning method, to automatically generate a protein backbone model from the input cryo-EM density map. <underline>
<italic>Model building</italic>
</underline>: DeepTracer generated protein backbone model is used by DeepTracer-ID to search against the library of AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) predictions for all proteins in the given organism using three different alignment algorithms. PyMOL-align (<xref ref-type="bibr" rid="B50">DeLano and Lam, 2005</xref>) considers both sequence and structural similarities and is the default option. PyMOL-cealign (<xref ref-type="bibr" rid="B158">Shindyalov and Bourne, 1998</xref>) is ideal for proteins with low or no sequence similarity, or when side-chain densities in the cryo-EM map are not well-resolved. FATCAT (<xref ref-type="bibr" rid="B104">Li et al., 2020</xref>) specializes in flexible protein structure comparison, simultaneously optimizing alignment and minimizing rigid-body movements. It can be particularly useful for mitigating errors in AlphaFold2 predictions, and for smaller proteins or those where local environment dictates their 3D structure.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>EMBuild</bold> is an automated, deep learning-based method designed to construct multi-chain protein complex models directly from intermediate-resolution cryo-EM density maps (<xref ref-type="bibr" rid="B76">He et al., 2022</xref>). EMBuild employs a nested U-Net (UNet&#x2b;&#x2b;) architecture with dense skip connections to predict a precise main-chain probability map from the input cryo-EM density map, and AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) to predict 3D structures of the input protein sequences. The main-chain probability map assigns a probability to each grid point, indicating the likelihood of a main-chain atom being present in that vicinity. Instead of directly fitting protein chains to the raw cryo-EM density, EMBuild uses the accurate main-chain probability map with more precise location information for main-chain atoms, which significantly improves the precision of subsequent chain fitting. <underline>
<italic>Model building</italic>
</underline>: EMBuild aligns each AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) predicted structures of individual protein chains to the main-chain probability map using a Fast Fourier Transform (FFT)-based global alignment (<xref ref-type="bibr" rid="B184">Wen et al., 2020</xref>). To account for potential deviations between the input protein chain model and ground truth structure, a semi-flexible domain refinement strategy is then employed: each chain is first rigidly fitted, and then its individual structural domains are locally refined. For each fitted protein chain, EMBuild calculates a main-chain match score to quantify its fit to the probability map. With all individual chain fitting results, the final protein complex structure is assembled by identifying the optimal combination of fitted chains. This is achieved through an iterative Bron&#x2013;Kerbosch maximum clique algorithm, which selects combinations with the highest total main-chain match score while preventing severe atomic clashes between chains. Unassembled chains are then iteratively integrated into the complex through further fitting and the complex is refined using <italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>). Structural category annotations from EMInfo (<xref ref-type="bibr" rid="B18">Cao et al., 2025</xref>) have been shown to improve the modeling accuracy of EMBuild. EMBuild treats all density voxels equally during fitting, which can lead to inaccuracies in fitting at protein regions containing a mixture of &#x3b1;-helices, &#x3b2;-sheets, and coils. By incorporating secondary structure details from EMInfo, <bold>EMBuild &#x2b; EMInfo</bold> can accurately fit protein fragments by matching them to density voxels of the corresponding secondary structure type.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>FFF</bold> (&#x201c;Fragment-guided Flexible Fitting&#x201d;) uses a deep-learning-based multi-level recognition network to capture diverse structural features from cryo-EM density maps (<xref ref-type="bibr" rid="B29">Chen et al., 2023</xref>). Inspired by RetinaNet (ResNet-based), but adapted with 3D convolutions, this network predicts not only a voxel-wise backbone probability map but also unifies four distinct coarse-grained tasks: C&#x3b1; atom detection, C&#x3b1; location prediction, pseudo-peptide vector (PPV) estimation, and amino acid (AA) classification. The network&#x2019;s backbone (BB) component identifies whether each voxel is part of the protein backbone. The C&#x3b1; detection module then estimates the likelihood of a grid cell containing a C&#x3b1; atom. For cells likely to have a C&#x3b1; atom, the amino acid classification module predicts the specific type of amino acid. Finally, the pseudo-peptide vector (PPV) estimation module determines vectors connecting a C&#x3b1; atom to its consequent C&#x3b1; atoms. <underline>
<italic>Model building</italic>
</underline>: FFF uses the extracted pseudo-peptide vectors (PPVs) to generate and recognize protein structural fragments. This involves connecting the C&#x3b1; atoms of residues into fragments, a process guided by selecting neighboring atoms based on the estimated PPVs and the known protein sequence. Once the protein fragments and backbone maps are identified, targeted molecular dynamics (TMD) (<xref ref-type="bibr" rid="B156">Schlitter et al., 1994</xref>) is used to refine and update an initial structure, aligning it with the recognized fragments. Next, molecular dynamics flexible fitting (MDFF) (<xref ref-type="bibr" rid="B175">Trabuco et al., 2009</xref>) updates the entire backbone conformation of initial structure to match the predicted backbone map yielding the complete protein structure of the target protein. During this MDFF step, positional restraints are added to the atoms initially selected in the TMD phase, preventing significant deviations.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>CrAI</bold> is an automatic deep learning method that detects and aligns antibodies (Fabs and VHHs) within cryo-EM density maps at resolutions up to 10.0 &#xc5; (<xref ref-type="bibr" rid="B117">Mallet et al., 2025</xref>). The core of CrAI is a customized 3D U-Net architecture, trained on a curated dataset. It uses a unique representation of antibody structures to facilitate the learning process, framing the task as a special instance of 3D object detection. To prevent redundant predictions of overlapping objects, CrAI employs a Non-Maximal Suppression (NMS) algorithm, a crucial post-processing technique to refine the output of object detection models. <underline>
<italic>Model prediction</italic>
</underline>: After detection, CrAI fits pre-classified Fab or VHH templates to the predicted locations and poses, providing a structural model of antibodies (Fabs and VHHs) within cryo-EM density maps.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DeepMainmast</bold> is a method for protein structure modeling from cryo-EM density maps at resolutions between 2.5 and 5.0 &#xc5; (<xref ref-type="bibr" rid="B172">Terashi et al., 2024</xref>). It achieves this by combining protein main-chain tracing using deep learning with structure modeling from AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>). DeepMainmast utilizes Emap2sf (Emap to structural features), a deep-learning method having a U-shaped network (UNet) architecture with skip connections (<xref ref-type="bibr" rid="B82">Huang, 2020</xref>). The network, consisting of three encoder blocks and two decoder blocks built upon a three-dimensional convolutional layer (Conv3d), outputs probability values for 20 amino acid types and backbone atom (N, C&#x3b1;, C) at each grid point in the density map which is required for subsequent C&#x3b1;-tracing. Local dense points (LDPs) are created by clustering grid points with a high probability for C&#x3b1; using the mean shift algorithm (<xref ref-type="bibr" rid="B19">Carreira-Perpinan, 2006</xref>). <underline>
<italic>Model building</italic>
</underline>: DeepMainmast reconstructs protein structures from cryo-EM density maps through a multi-stage process. It starts by connecting LDPs, identified from high C&#x3b1; probability regions, into C&#x3b1; paths using a VRP (Vehicle Routing Problem) solver which efficiently finds optimal routes by minimizing costs based on distance and main-chain atom probabilities. Once C&#x3b1; paths are established, they are aligned with the target protein sequence using the Smith-Waterman algorithm (<xref ref-type="bibr" rid="B164">Smith and Waterman, 1981</xref>), with matching scores determined by DAQ (AA) scores (<xref ref-type="bibr" rid="B171">Terashi et al., 2022</xref>) calculated from Emap2sf output. This generates numerous C&#x3b1; fragments, with the entire process repeated across various parameter combinations (C&#x3b1; probability cutoff, number of VRP vehicles, and cost function parameters). A key innovation in DeepMainmast is the integration of AlphaFold2 (AF2) models, specifically the top-ranked one based on pLDDT scores. AF2 models contribute by providing additional C&#x3b1; fragments to fill gaps in low-density regions and also serve as global structure for fitting to the density map. Next, C&#x3b1; protein models are assembled from these combined fragment libraries using a constraint programming (CP) solver (<xref ref-type="bibr" rid="B132">Perron and Lee, 2011</xref>). This solver optimizes fragment combinations to maximize the total DAQ score while preventing steric clashes, ensuring consistent amino acid positioning, and maintaining consistent C&#x3b1;-C&#x3b1; distances. In parallel, AF2 models are also directly superimposed onto the density map using structure fitting program, VESPER (<xref ref-type="bibr" rid="B72">Han et al., 2021</xref>). Finally, these refined C&#x3b1; models are converted into full-atom structures using PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>), with any missing regions subsequently filled and refined by RosettaCM (<xref ref-type="bibr" rid="B165">Song et al., 2013</xref>).</p>
<p>
<underline>
<italic>Model prediction</italic>
</underline>: <bold>DeepTracer-Refine</bold> (<xref ref-type="bibr" rid="B31">Chen J. et al., 2024</xref>) improves protein structure prediction by combining DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) (a map-to-model method) with AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) (a sequence-to-model method). It splits AlphaFold structures into compact domains, identifying optimal separation points based on AlphaFold&#x2019;s predicted Local Distance Difference Test (pLDDT) metric, which estimates the confidence level for each residue in its prediction. Each of these smaller domains then undergoes rigid body alignment using a selection of algorithms, including PyMOL cealign (<xref ref-type="bibr" rid="B158">Shindyalov and Bourne, 1998</xref>), PyMOL align (<xref ref-type="bibr" rid="B50">DeLano and Lam, 2005</xref>), and Chimera MatchMaker (<xref ref-type="bibr" rid="B121">Meng et al., 2006</xref>), with the best fit chosen for maximum residue coverage. This iterative alignment process continuously updates AlphaFold&#x2019;s residue locations as each domain is aligned to DeepTracer&#x2019;s prediction, ultimately providing a more refined and accurate protein structure. <underline>
<italic>Model prediction</italic>
</underline>: <bold>DeepTracer-LowResEnhance</bold> (<xref ref-type="bibr" rid="B115">Ma and Si, 2025</xref>) is a computational method that enhances low-resolution cryo-EM density maps by integrating structural predictions from AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) with a deep learning-based map refinement strategy. The input sequence is first processed by AlphaFold2 to generate an initial 3D structure, which is then used to generate a simulated map using ChimeraX (<xref ref-type="bibr" rid="B122">Meng et al., 2023</xref>). Both the simulated map and the original cryo-EM map are then fed into the CryoFEM (<xref ref-type="bibr" rid="B45">Dai et al., 2023</xref>) module which averages these maps, splits them into chunks, and uses a UNet-based deep neural network to reconstruct a refined map. This integration leverages AlphaFold&#x2019;s accurate sequence-based structural predictions with cryo-EM data, significantly enhancing model quality in low-resolution cases. Finally, DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) generates a high-accuracy 3D protein structure model from the refined cryo-EM map.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>CryoJAM</bold> is a deep learning-based tool designed to automate and enhance the challenging process of fitting large protein complexes into medium-resolution cryo-EM density maps, thereby accelerating their structural modeling (<xref ref-type="bibr" rid="B20">Carrion et al., 2024</xref>). The 3D convolutional neural network (CNN) of CryoJAM leverages a U-Net architecture and a novel composite loss function that incorporates both Fourier-shell correlation (FSC) and Root Mean Squared Error (RMSE). FSC serves as a proxy for the quality of fit in Fourier space, while RMSE directly optimizes atomic accuracy in real space. The UNet-based architecture of CryoJAM handles both 3D volumetric cryo-EM densities and homolog structures and its outputs represent the adjusted homolog coordinates. <underline>
<italic>Model building</italic>
</underline>: CryoJAM generates a volume representing predicted C&#x3b1; atom locations within the cryo-EM density highlighting backbone density. Since this output is continuous volume, CryoJAM employs a post-processing workflow to derive discrete all-atom coordinates. This involves using a KD-tree (<xref ref-type="bibr" rid="B163">Skrodzki, 2019</xref>) to select the top C&#x3b1; voxel activations, binarizing the volume, and outputting 3D coordinates. A greedy matching algorithm aligns these selected C&#x3b1; atoms to their closest counterparts in the input structure for adjustment. Finally, these C&#x3b1; traces are processed by PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>) which can construct physically realistic all-atom structures from only C&#x3b1; coordinates.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DiffModeler</bold> is a fully automated method designed to model large protein complex structures, effectively fitting them into cryo-EM density maps with resolutions up to approximately 15.0 &#xc5; (<xref ref-type="bibr" rid="B181">Wang et al., 2024</xref>). It employs a diffusion model to trace protein backbones by capturing local density patterns representing protein backbones in low resolution cryo-EM density maps. It then integrates this diffusion model enhanced map with AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) predicted structures for accurate structure fitting, thereby enhancing the extraction of structural information from intermediate-resolution cryo-EM density maps. A diffusion model is a generative model within a probabilistic framework, trained to generate data samples that closely resemble the underlying data distribution. The conditional diffusion model of DiffModeler starts with random Gaussian noise and a cryo-EM density map as inputs. The model employs encoder-decoder network architecture (based on U-Net). The encoder first processes a cryo-EM density box, computing and embedding its hidden features. The decoder then begins with random Gaussian noise and iteratively refines its density estimates, moving closer to the ground-truth traced backbone. The entire encoder-decoder network is optimized by comparing the predicted and ground-truth traced backbones. Once trained, the conditional diffusion model generates a refined, traced backbone based on the input cryo-EM density. <underline>
<italic>Model building</italic>
</underline>: DiffModeler fits AlphaFold2 (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) predicted single-chain protein structures into the diffusion model enhanced map using VESPER algorithm (<xref ref-type="bibr" rid="B72">Han et al., 2021</xref>). VESPER aligns each subunit with the diffused map and generates the top 100 candidate poses. A subsequent assembly phase then uses a greedy algorithm to combine suitable poses from these subunits, thereby constructing the complete protein complex structure. Additionally, DiffModeler splits the multi-domain AlphaFold2 structures into individual domains using SWORD2 (<xref ref-type="bibr" rid="B43">Cretin et al., 2022</xref>) and uses these domains in the fitting process to mitigate inaccuracies, particularly in AlphaFold2 models where domain orientations are incorrect despite accurate individual domains. <bold>DMcloud</bold> is local structure fitting tool for medium to low resolution cryo-EM density maps (<xref ref-type="bibr" rid="B173">Terashi et al., 2025</xref>). It fits structures by converting molecular models and cryo-EM density maps into point clouds for precise local alignments and iterative refinements to address erroneous AlphaFold2 models that have accurate local structural details, but their global conformation is inaccurate.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>Cryo2struct2</bold> (<xref ref-type="bibr" rid="B64">Giri and Cheng, 2025</xref>) is a deep learning model that combines sequence-based features from a Protein Language Model (ESM) (<xref ref-type="bibr" rid="B110">Lin et al., 2023</xref>) with cryo-EM density maps to derive templates for AlphaFold3 (<xref ref-type="bibr" rid="B2">Abramson et al., 2024</xref>) structure predictions. This integration allows Cryo2Struct2 to generate more accurate atomic models, especially for large proteins with flexible or complex conformations and those with regions of low-resolution and missing density. Unlike Cryo2Struct (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>) which uses two separate 3D transformer models to predict atom types and amino acid types respectively, its successor Cryo2Struct2 uses a unified deep learning model with a shared transformer encoder to extract features from cryo-EM density maps. This encoder feeds into two specialized decoders for atom-type and amino acid-type predictions. The architecture of the model is based on 3D SegFormer (<xref ref-type="bibr" rid="B131">Perera et al., 2024</xref>) and is designed to integrate ESM protein language model embeddings with map features. This is done by transforming the ESM embeddings via a multi-layer perceptron (MLP) and adding them to the multi-scale feature representations from the map, ensuring sequence-level information is incorporated. The transformer encoder, utilizing an efficient self-attention mechanism, captures hierarchical features from the density map, while each decoder predicts its specific labels (atom types or amino acid types) for every voxel. The atom-type decoder directly predicts labels (C&#x3b1;, N, C, or no atom), while the amino acid-type decoder, benefiting from the atom-type features as an auxiliary input, predicts 21 different amino acid classes (20 standard amino acids plus the absence of an amino acid or unknown amino acid). Cryo2Struct2 is also trained on Cryo2StructData dataset (<xref ref-type="bibr" rid="B66">Giri et al., 2024</xref>) and uses two clustering thresholds: 2 &#xc5; and 3 &#xc5; for clustering predicted C&#x3b1; voxels. <underline>
<italic>Model building</italic>
</underline>: Cryo2Struct2 uses a two-step process to generate accurate protein models. First, predicted atom and amino acid type probabilities are used to build a Hidden Markov Model (HMM) (<xref ref-type="boxed-text" rid="dBox1">Box 1</xref>: Glossary section). This HMM, processed by a modified Viterbi algorithm (<xref ref-type="bibr" rid="B61">Forney, 1973</xref>) (similar to Cryo2Struct (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>)), aligns the protein amino acid sequence to the predicted and clustered C&#x3b1; voxel coordinates to construct an initial 3D atomic protein backbone. These initial structures then serve as templates for advanced structure prediction capabilities of AlphaFold3 (<xref ref-type="bibr" rid="B2">Abramson et al., 2024</xref>) and guide AlphaFold3 to generate structures that are consistent with the cryo-EM density. To obtain structurally meaningful templates, the query protein sequence is aligned to the template sequences derived from Cryo2Struct2 generated structural predictions. The templates allow AlphaFold3 to refine the structure, incorporate prior structural information, and ultimately improve the accuracy of the final atomic model while maintaining consistency with experimental cryo-EM density data.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DEMO-EMfit</bold> is a method for fitting atomic structures of protein and protein-nucleic acid complexes into cryo-EM and cryo-ET maps (<xref ref-type="bibr" rid="B17">Cai et al., 2025</xref>). It integrates deep learning-based backbone map extraction from cryo-EM density map with a global-local structural pose search and optimization. Since DEMO-EMfit utilizes the correlation between the cryo-EM density map and the backbone atoms of the structure during fitting procedure, it first extracts key structural features from input density maps. For this, it leverages DiffModeler (<xref ref-type="bibr" rid="B181">Wang et al., 2024</xref>), a deep learning method based on diffusion model, to generate a backbone density map from input cryo-EM density map that exclusively contains backbone atom information. <underline>
<italic>Model building</italic>
</underline>: DEMO-EMfit employs Fast Fourier Transform (FFT) and Limited-memory Broyden&#x2013;Fletcher&#x2013;Goldfarb&#x2013;Shanno (L-BFGS) (<xref ref-type="bibr" rid="B114">Liu and Nocedal, 1989</xref>) algorithms for global and local searches to determine the optimal structure pose. Initially, an FFT-based global search generates raw poses by exhaustively exploring possible structure orientations in Fourier space, evaluating them using the density correlation coefficient between the structure and the map. The top-scoring poses from this global search then undergoes a local search using the L-BFGS algorithm for refinement. Finally, domain-level optimization is applied to further refine the fitted model, addressing potential domain-level biases.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>DEMO-EMol</bold> is an improved server for accurately assembling protein-nucleic acid complex structures from cryo-EM density maps (<xref ref-type="bibr" rid="B197">Zhang Z. et al., 2025</xref>). It integrates deep learning-based map segmentation of protein and nucleic acid regions with an iterative structure fitting and assembly process guided by map constraints. DEMO-EMol begins by segmenting protein and nucleic acid regions from the input density map using the U-Net&#x2b;&#x2b; architecture from EMNUSS (<xref ref-type="bibr" rid="B74">He and Huang, 2021a</xref>) with the training dataset obtained from the first stage dataset of EM2NA (<xref ref-type="bibr" rid="B106">Li et al., 2024</xref>), another deep learning tool specific for modeling nucleic acids in cryo-EM maps. These separate protein and nucleic acid density maps are then used to iteratively fit and assemble their respective chain models. <underline>
<italic>Model building</italic>
</underline>: DEMO-EMol independently fits protein and nucleic acid chain models into their respective segmented maps by sequentially optimizing their poses using the L-BFGS algorithm (<xref ref-type="bibr" rid="B114">Liu and Nocedal, 1989</xref>). Since L-BFGS is a local optimization method, multiple initial poses are explored for each chain. To enhance accuracy and reduce the search space, a map masking strategy is employed, masking map regions already matched. The L-BFGS optimization uses a composite scoring function that integrates global and local model-to-map correlation coefficients (<xref ref-type="bibr" rid="B17">Cai et al., 2025</xref>) along with Fourier Shell Correlation (FSC). Once all chain models are fitted, DEMO-EMol constructs the final complex model by identifying the optimal combination of all chain poses using a differential evolution algorithm (<xref ref-type="bibr" rid="B201">Zhou et al., 2020</xref>), followed by a domain-level flexible refinement where the positions and orientations of all protein domains are simultaneously optimized.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>CryoDomain</bold> is a deep neural network that identifies protein domains from low-resolution cryo-EM density maps by leveraging a dual-tower network architecture: the DensityTower and the AtomTower (<xref ref-type="bibr" rid="B46">Dai et al., 2025</xref>). Each tower undergoes self-supervised pre-training on its respective modality - raw cryo-EM density maps for the DensityTower and atomic structures for the AtomTower - to extract modality-specific features. CryoDomain then simultaneously learns embeddings from both protein domain density maps and atomic structures within a shared, low-dimensional space. The network integrates these two modalities into a unified representation through an alignment process. The DensityTower (U-Net-like architecture) network comprises of a Residual U-Net, a Swin-Conv U-Net, Conv module, and a Compress Module. The Residual U-Net and a Swin-Conv U-Net progressively learn both local and non-local spatial features from cryo-EM density maps. The Conv module reconstructs the map, and the Compress Module projects it into a density map embedding. In the AtomTower network, an AtomEncoder, Structure Module, and Fusion Module collectively extract an atomic structure embedding from the input atomic structure. During training, density map embedding is aligned with its&#x200b; corresponding atomic structure embedding. <underline>
<italic>Model building</italic>
</underline>: CryoDomain effectively transfers knowledge from rich atomic structure datasets to sparse density map datasets by integrating these two modalities through cross-modal alignment. This alignment maximizes similarity between embeddings of the same domain types while minimizing similarity between different ones, creating a unified low-dimensional representation. After alignment, CryoDomain constructs a Density-atom embedding Database (DateDB) of atomic structure embeddings, enabling protein domain identification from density maps through embedding retrieval.</p>
<p>
<underline>
<italic>Feature learning</italic>
</underline>: <bold>MICA</bold> (<xref ref-type="bibr" rid="B71">Gyawali et al., 2025</xref>) is a deep learning method that combines cryo-EM density maps with AlphaFold3 (<xref ref-type="bibr" rid="B2">Abramson et al., 2024</xref>) predicted structures to create more accurate protein models in cryo-EM density maps of resolution 1.0&#x2013;4.0 &#xc5;. Unlike other methods that use predicted structures at the end in the post-processing step, MICA integrates AlphaFold3 predicted structures with cryo-EM density maps at both input and output levels. This allows MICA to integrate the strengths of both data types, the experimental accuracy of cryo-EM maps and the completeness of AlphaFold3 predictions and compensate for low-resolution areas in maps or inaccuracies in AlphaFold3 predictions of large protein complexes. At the input stage, MICA combines cryo-EM density maps and AlphaFold3 predicted structures through representation learning before passing them to a deep learning network to build protein structures. Its deep learning architecture processes the fused representation of cryo-EM density maps and AlphaFold3 structures using a multi-task encoder-decoder system with a feature pyramid network (FPN) (<xref ref-type="bibr" rid="B109">Lin, 2017</xref>) to predict the locations of backbone atoms, C&#x3b1; atoms, and amino acid types. The predicted C&#x3b1; candidates are refined using DBSCAN clustering strategy (<xref ref-type="bibr" rid="B56">Ester, 1996</xref>) and non-maximum suppression algorithm to output C&#x3b1; atoms which along with their amino acid type predictions are used as input for the backbone tracing to build an initial protein backbone model. <underline>
<italic>Model building</italic>
</underline>: MICA uses the backbone tracing procedure of EModelX (&#x2b;AF) (<xref ref-type="bibr" rid="B30">Chen S. et al., 2024</xref>) to build an initial backbone model from the predicted backbone atoms, C&#x3b1; atoms and amino acid types. It starts by identifying high-confidence C&#x3b1; atoms, linking them as protein chains and assigning their amino acid types followed by filling in any gaps in the initial backbone model by using information from AlphaFold3 (<xref ref-type="bibr" rid="B2">Abramson et al., 2024</xref>) structures. This C&#x3b1; backbone model is then converted into a full-atom model using PULCHRA (<xref ref-type="bibr" rid="B152">Rotkiewicz and Skolnick, 2008</xref>) and refined using <italic>phenix.real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>).</p>
</sec>
</sec>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Assessment and validation</title>
<p>Assessing the accuracy of the predicted models is the critical final step in the model building pipeline (<xref ref-type="bibr" rid="B203">Zhu et al., 2025</xref>). The assessment methods can be broadly classified into three categories: <bold>predicted-target structure assessment, map-model assessment and model quality assessment</bold>. Evaluation metrics for assessing the accuracy of <bold>predicted protein models against target structures</bold> fundamentally rely on the alignment of C&#x3b1; atoms, as these atoms represent the backbone of individual residues and thus their spatial arrangement. Evaluation metrics commonly used to quantify the percentage of correctly paired C&#x3b1; atoms are recall, sequence recall, precision, F1-score, C&#x3b1; matching score, C&#x3b1; sequence match score, C&#x3b1; quality score and TM-score. <bold>Recall</bold> measures the percentage of residues where the predicted C&#x3b1; atom is within 3.0 &#xc5; of the deposited model (<xref ref-type="bibr" rid="B86">Jamali, 2022</xref>). <bold>Sequence recall</bold> measures the percentage of residues where the predicted C&#x3b1; atom is within 3 &#xc5; of the deposited model and the predicted amino acid type is correct (<xref ref-type="bibr" rid="B86">Jamali, 2022</xref>). <bold>Precision</bold> is the percentage of predicted C&#x3b1; atoms that fall within 3 &#xc5; of a C&#x3b1; atom in the deposited map (<xref ref-type="bibr" rid="B86">Jamali, 2022</xref>). The <bold>F1 score</bold> is the harmonic mean of precision and recall for C&#x3b1; atoms (<xref ref-type="bibr" rid="B86">Jamali, 2022</xref>). This metric offers a balanced assessment, considering both the specificity and sensitivity of predicted C&#x3b1; atom positions. The <bold>C&#x3b1; match score</bold> is the percentage of C&#x3b1; atoms (residues) in a predicted model that are within 3.0 &#xc5; of their corresponding residues in the true structure (<xref ref-type="bibr" rid="B174">Terwilliger et al., 2018</xref>). The <bold>sequence match score</bold> indicates the percentage of aligned residues that possess the identical amino acid type as their corresponding counterparts in the true structure (<xref ref-type="bibr" rid="B174">Terwilliger et al., 2018</xref>). The <bold>C&#x3b1; quality score</bold> is calculated by multiplying the C&#x3b1; match score by the ratio of the total predicted residues to the total residues in the experimental structure (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>). A standard <bold>TM-score</bold> (<xref ref-type="bibr" rid="B191">Zhang and Skolnick, 2004</xref>) quantifies the structural similarity between a predicted model and its corresponding known structure. The <bold>normalized TM-score</bold> (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>) is the TM-score of the atomic models, adjusted by the length of the known structure. For the deep learning-based model building methods discussed, their performance metrics are available in their respective publications. However, note that the test datasets used for evaluation often differ between these methods. The existing evaluation metrics have certain limitations because they often ignore chain-level correspondence and, in the case of TM-score, do not account for residue identity. Improved evaluation metrics such as <bold>TMRR-score</bold> (<xref ref-type="bibr" rid="B196">Zhang C. et al., 2025</xref>) have been introduced that combines TM-score with residue-recall, to measure both structural and residue type similarities. A recent study (<xref ref-type="bibr" rid="B196">Zhang C. et al., 2025</xref>) comprehensively benchmarked state-of-the-art model-building approaches using the TMRR-score and 50 cryo-EM density maps across various resolutions. This assessment evaluated how well predicted models aligned from atomic to intermediate resolutions, their runtime efficiency, and the benefits of integrating structure prediction techniques.</p>
<p>
<bold>Model validation methods</bold> use validation criteria such Ramachandran plot outliers, all-atom clash scores, deviations in bonding geometry, and rotamer preferences to assess the quality and accuracy of the macromolecular structures. MolProbity (<xref ref-type="bibr" rid="B47">Davis et al., 2007</xref>), frequently used and incorporated into the PDB validation reports, is a comprehensive web server for validating the quality of 3D structures, including proteins, nucleic acids, and their complexes. It provides detailed all-atom contact analysis to identify steric clashes and offers updated diagnostics for dihedral angles, hydrogen bonds, and van der Waals contacts at molecular interfaces. MolProbity combines multiple geometric parameters into a single, overall MolProbity score, where lower values indicate higher model quality. Specifically, this score is a log-weighted combination of the clashscore, the percentage of Ramachandran plot outliers, and the percentage of side chain rotamer outliers. MolProbity is unique in offering all-atom contact analysis and its use of highly accurate, up-to-date Ramachandran and rotamer distributions. Another metric, CaBLAM (<xref ref-type="bibr" rid="B142">Prisant et al., 2020</xref>) (Calpha-Based Low-resolution Annotation Method) assesses backbone geometry of proteins by detecting C&#x3b1; -geometry outliers to identify areas of probable secondary structure. CaBLAM is designed for low-resolution structures, where complex errors or ambiguities can render highly sensitive conformational analyses, like Ramachandran analysis, difficult or impossible to interpret. Among the deep-learning based model validation methods, the DAQ (<xref ref-type="bibr" rid="B171">Terashi et al., 2022</xref>) (Deep-learning-based Amino-acid-wise Quality) score has been computed for all the PDB entries from cryo-EM maps in the resolution range 2.5 &#xc5; - 5.0 &#xc5;. DAQ score assesses the local model quality at the residue level. A key advantage of DAQ is its ability to identify regions with incorrect amino acid assignments (e.g., sequence shifts), even when the backbone is accurately modeled. The predicted local distance difference test (pLDDT) score is a valuable tool for assessing AlphaFold (<xref ref-type="bibr" rid="B89">Jumper et al., 2021</xref>) predicted models, especially since AlphaFold predicted models are now commonly used in cryo-EM model building. pLDDT is a per-residue measure of local confidence for predicted protein structures, scaled from 0 to 100. Higher scores indicate greater confidence and typically a more accurate prediction.</p>
<p>
<bold>Map-model scores</bold> quantitatively assess how well a structure model fits the experimental cryo-EM density map. Several software packages such as TEMPy (<xref ref-type="bibr" rid="B42">Cragnolini et al., 2021</xref>), CCP-EM (<xref ref-type="bibr" rid="B88">Joseph et al., 2022</xref>) and Phenix (<xref ref-type="bibr" rid="B4">Afonine et al., 2018b</xref>) offer tools to calculate cross-correlation scores to evaluate how well a structural model fits its cryo-EM map. Recent advancements have introduced local map-model fitting scores like Strudel score (<xref ref-type="bibr" rid="B85">Istrate et al., 2021</xref>), EMRinger (<xref ref-type="bibr" rid="B13">Barad et al., 2015</xref>), Q-scores (<xref ref-type="bibr" rid="B140">Pintilie et al., 2020</xref>), DeepQs (<xref ref-type="bibr" rid="B60">Feng et al., 2024</xref>), FSC-Q (<xref ref-type="bibr" rid="B146">Ram&#xed;rez-Aportela et al., 2021</xref>), and MEDIC (<xref ref-type="bibr" rid="B147">Reggiano et al., 2023</xref>), which provide more granular insights into model quality. 3D-Strudel (<xref ref-type="bibr" rid="B85">Istrate et al., 2021</xref>) is a model-dependent tool for validating map features in cryo-EM structures ranging from 2 to 4 &#xc5; resolution. It calculates a Strudel score, which is the linear correlation coefficient between the experimental map values around a target residue and the values from a rotamer-specific map-motif obtained from the 3D-Strudel motif library. EMRinger (<xref ref-type="bibr" rid="B13">Barad et al., 2015</xref>) evaluates how well side-chain conformation of a residue in a model aligns with the cryo-EM map compared to other common rotamers. Q-score (<xref ref-type="bibr" rid="B140">Pintilie et al., 2020</xref>), now included in the PDB validation reports, quantitatively assesses the resolvability of individual atoms by comparing their density profiles to an ideal Gaussian reference profile of the atom. DeepQs (<xref ref-type="bibr" rid="B60">Feng et al., 2024</xref>) uses a 3D Vision Transformer (ViT) to estimate local Q-scores in cryo-EM maps up to 5.0 &#xc5;. FSC-Q (<xref ref-type="bibr" rid="B146">Ram&#xed;rez-Aportela et al., 2021</xref>) quantifies local resolution differences between a cryo-EM map and an atomic model through a localized Fourier Shell Correlation (FSC) analysis. It compares the local resolutions derived from small sub-maps of both the experimental map and a model-derived map. An FSC-Q value near zero indicates strong support for atoms by the map. MEDIC (Model Error Detection in Cryo-EM) (<xref ref-type="bibr" rid="B147">Reggiano et al., 2023</xref>) is a robust statistical model that identifies local backbone errors in cryo-EM protein structures. It combines local fit-to-density with deep-learning-derived structural information, including energy metrics and a predicted error score from a machine learning model trained to distinguish native from decoy structures.</p>
</sec>
<sec id="s6">
<label>6</label>
<title>Availability and applications</title>
<p>Most of the tools we have discussed are open source, with their source code publicly accessible on repositories like GitHub and Zenodo (<xref ref-type="bibr" rid="B57">European Organization For Nuclear, 2013</xref>) (links provided in <xref ref-type="table" rid="T5">Table 5</xref>). These platforms provide detailed information on everything, from software and hardware dependencies to specific local installation instructions for these tools. This ensures users can successfully set up and run these programs on their own computing systems, provided they meet the necessary computing software and hardware requirements. Beyond local installations, many of these tools are also readily available on web-based or cloud-based computational platforms. This includes services like Cosmic Cryo-EM (<xref ref-type="bibr" rid="B38">Cianfrocco, 2017</xref>), Code Ocean (<xref ref-type="bibr" rid="B25">Cheifet, 2021</xref>), and Google Colab, as well as dedicated servers (links provided in <xref ref-type="table" rid="T5">Table 5</xref>). These platforms are particularly beneficial for users who may not possess the robust computational infrastructure required for local execution, such as high-performance GPUs. On these cloud-based environments, users can simply upload their necessary inputs, such as a density map or a sequence, and receive the processed output, often a built model, without needing to manage the underlying computational resources themselves. Deep learning&#x2013;based automated model-building methods have become an integral part of the structural biologist&#x2019;s toolkit, enabling applications that span from contaminant identification in heterogeneous cryo-EM datasets to the structure determination of physiologically important macromolecular complexes, as well as method development across diverse areas of structural biology. Cryo-EM studies where these tools have had notable contributions are discussed in the Supporting Information.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Availability of deep learning-based automated model building methods in cryo-EM.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">S. No.</th>
<th align="left">Method</th>
<th align="left">Open source</th>
<th align="left">Source code availability</th>
<th align="left">Web-based computational platform</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td colspan="5" align="left">Primary structure</td>
</tr>
<tr>
<td align="left">1</td>
<td align="left">
<italic>findMySequence</italic> (<xref ref-type="bibr" rid="B36">Chojnowski et al., 2022</xref>) (2021)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://gitlab.com/gchojnowski/findmysequence">https://gitlab.com/gchojnowski/findmysequence</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">
<italic>checkMySequence</italic> (<xref ref-type="bibr" rid="B33">Chojnowski, 2022</xref>) (2022)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://gitlab.com/gchojnowski/checkmysequence">https://gitlab.com/gchojnowski/checkmysequence</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">
<italic>doubleHelix</italic> (<xref ref-type="bibr" rid="B34">Chojnowski, 2023</xref>) (2023)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://gitlab.com/gchojnowski/doublehelix">https://gitlab.com/gchojnowski/doublehelix</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">
<italic>EMSequenceFinder</italic> (<xref ref-type="bibr" rid="B123">Mondal et al., 2025</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">available as part of IMP distribution <ext-link ext-link-type="uri" xlink:href="https://integrativemodeling.org/">https://integrativemodeling.org/</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td colspan="5" align="left">Secondary structure</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">CNN-classifier (<xref ref-type="bibr" rid="B102">Li et al., 2016</xref>) (2017)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">Emap2sec (<xref ref-type="bibr" rid="B116">Maddhuri Venkata Subramaniya et al., 2019</xref>) (2019)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/kiharalab/Emap2sec">https://github.com/kiharalab/Emap2sec</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://em.kiharalab.org/algorithm/emap2sec">https://em.kiharalab.org/algorithm/emap2sec</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://colab.research.google.com/github/kiharalab/Emap2sec/blob/master/Emap2sec.ipynb">https://colab.research.google.com/github/kiharalab/Emap2sec/blob/master/Emap2sec.ipynb</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://codeocean.com/capsule/4439990/tree/v3">https://codeocean.com/capsule/4439990/tree/v3</ext-link>
</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">Haruspex (<xref ref-type="bibr" rid="B124">Mostosi et al., 2020</xref>) (2020)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/thorn-lab/haruspex">https://github.com/thorn-lab/haruspex</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://colab.research.google.com/github/thorn-lab/haruspex/blob/master/colab/Haruspex.ipynb">https://colab.research.google.com/github/thorn-lab/haruspex/blob/master/colab/Haruspex.ipynb</ext-link>
</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">Emap2sec&#x2b; (<xref ref-type="bibr" rid="B179">Wang et al., 2021</xref>) (2021)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/kiharalab/Emap2secPlus">https://github.com/kiharalab/Emap2secPlus</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://em.kiharalab.org/algorithm/emap2sec+">https://em.kiharalab.org/algorithm/emap2sec&#x2b;</ext-link> <ext-link ext-link-type="uri" xlink:href="https://codeocean.com/capsule/9499750/tree/v1">https://codeocean.com/capsule/9499750/tree/v1</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://github.com/kiharalab/Emap2secPlus/blob/master/Emap2sec%2B.ipynb">https://github.com/kiharalab/Emap2secPlus/blob/master/Emap2sec%2B.ipynb</ext-link>
</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">EMNUSS (<xref ref-type="bibr" rid="B74">He and Huang, 2021a</xref>) (2021)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://huanglab.phys.hust.edu.cn/EMNUSS/">http://huanglab.phys.hust.edu.cn/EMNUSS/</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">DeepSSETracer (<xref ref-type="bibr" rid="B125">Mu et al., 2021</xref>) (2021)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://www.cs.odu.edu/%7Ebioinfo/B2I_Tools/">https://www.cs.odu.edu/&#x223c;bioinfo/B2I_Tools/&#x23;</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">11</td>
<td align="left">HaPi (<xref ref-type="bibr" rid="B62">Garcia Condado et al., 2022</xref>) (2022)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/JGarciaCondado/EMapHandedness">https://github.com/JGarciaCondado/EMapHandedness</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">12</td>
<td align="left">CryoSSESeg (<xref ref-type="bibr" rid="B155">Sazzed, 2024</xref>) (2024)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">13</td>
<td align="left">EMInfo (<xref ref-type="bibr" rid="B18">Cao et al., 2025</xref>) (2025)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://huanglab.phys.hust.edu.cn/EMInfo/">http://huanglab.phys.hust.edu.cn/EMInfo/</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://sbgrid.org/software/titles/eminfo">https://sbgrid.org/software/titles/eminfo</ext-link>
</td>
</tr>
<tr>
<td colspan="5" align="left">Tertiary and quaternary structure (<italic>de novo</italic>)</td>
</tr>
<tr>
<td align="left">14</td>
<td align="left">AAnchor (<xref ref-type="bibr" rid="B153">Rozanov and Wolfson, 2018</xref>) (2018)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">15</td>
<td align="left">A<sup>2</sup>-Net (<xref ref-type="bibr" rid="B188">Xu et al., 2019</xref>) (2019)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://cryonet.ai/">https://cryonet.ai/</ext-link>
</td>
</tr>
<tr>
<td align="left">16</td>
<td align="left">Cascaded-CNN (<xref ref-type="bibr" rid="B160">Si et al., 2020</xref>) (2020)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/DrDongSi/Ca-Backbone-Prediction">https://github.com/DrDongSi/Ca-Backbone-Prediction</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">17</td>
<td align="left">Structure Generator (<xref ref-type="bibr" rid="B100">Li, 2020</xref>) (2020)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/liponan/structure-generator">https://github.com/liponan/structure-generator</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">18</td>
<td align="left">DeepTracer (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) (2020)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://deeptracer.uw.edu/home">https://deeptracer.uw.edu/home</ext-link>
</td>
</tr>
<tr>
<td align="left">19</td>
<td align="left">DeepMM (<xref ref-type="bibr" rid="B75">He and Huang, 2021b</xref>) (2021)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/JiahuaHe/DeepMM">https://github.com/JiahuaHe/DeepMM</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://hpc.nih.gov/apps/DeepMM.html">https://hpc.nih.gov/apps/DeepMM.html</ext-link>
</td>
</tr>
<tr>
<td align="left">20</td>
<td align="left">SEGEM (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>) (2021)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">21</td>
<td align="left">SegmA (<xref ref-type="bibr" rid="B154">Rozanov and Wolfson, 2023</xref>) (2022)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/Mark-Rozanov/SegmA_3A/tree/master">https://github.com/Mark-Rozanov/SegmA_3A/tree/master</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">22</td>
<td align="left">DeepTracer-2.0 [96] (2023)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://deeptracer.uw.edu/home">https://deeptracer.uw.edu/home</ext-link>
</td>
</tr>
<tr>
<td align="left">23</td>
<td align="left">ModelAngelo (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>) (2023)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/3dem/model-angelo">https://github.com/3dem/model-angelo</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://cosmic-cryoem.org/tools/modelangelo/">https://cosmic-cryoem.org/tools/modelangelo/</ext-link>
</td>
</tr>
<tr>
<td align="left">24</td>
<td align="left">CryoREAD (<xref ref-type="bibr" rid="B180">Wang et al., 2023</xref>) (2023)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/kiharalab/CryoREAD">https://github.com/kiharalab/CryoREAD</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://em.kiharalab.org/algorithm/CryoREAD">https://em.kiharalab.org/algorithm/CryoREAD</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://colab.research.google.com/github/kiharalab/CryoREAD/blob/main/CryoREAD.ipynb">https://colab.research.google.com/github/kiharalab/CryoREAD/blob/main/CryoREAD.ipynb</ext-link>
</td>
</tr>
<tr>
<td align="left">25</td>
<td align="left">SMARTFold (<xref ref-type="bibr" rid="B105">Li et al., 2023</xref>) (2023)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">26</td>
<td align="left">EMRNA (<xref ref-type="bibr" rid="B108">Li et al., 2025b</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://huanglab.phys.hust.edu.cn/EMRNA/">http://huanglab.phys.hust.edu.cn/EMRNA/</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">27</td>
<td align="left">Cryo2Struct (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/jianlin-cheng/Cryo2Struct">https://github.com/jianlin-cheng/Cryo2Struct</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://codeocean.com/capsule/3954150/tree/v1">https://codeocean.com/capsule/3954150/tree/v1</ext-link>
</td>
</tr>
<tr>
<td align="left">28</td>
<td align="left">EmodelX (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/biomed-AI/EModelX/">https://github.com/biomed-AI/EModelX/</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://bio-web1.nscc-gz.cn/app/EModelX">https://bio-web1.nscc-gz.cn/app/EModelX</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://colab.research.google.com/github/biomed-AI/EModelX/blob/main/minimal_example.ipynb">https://colab.research.google.com/github/biomed-AI/EModelX/blob/main/minimal_example.ipynb</ext-link>
</td>
</tr>
<tr>
<td align="left">29</td>
<td align="left">EM2NA (<xref ref-type="bibr" rid="B106">Li et al., 2024</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://huanglab.phys.hust.edu.cn/em2na/download.php">http://huanglab.phys.hust.edu.cn/em2na/download.php</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">30</td>
<td align="left">CryFold (<xref ref-type="bibr" rid="B168">Su et al., 2025</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/SBQ-1999/CryFold">https://github.com/SBQ-1999/CryFold</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">31</td>
<td align="left">DeepCryoRNA (<xref ref-type="bibr" rid="B101">Li and Chen, 2025</xref>) (2025)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/Vfold-RNA/DeepCryoRNA">https://github.com/Vfold-RNA/DeepCryoRNA</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">32</td>
<td align="left">E3-CryoFold (<xref ref-type="bibr" rid="B182">Wang et al., 2025</xref>) (2025)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/A4Bio/E3-CryoFold">https://github.com/A4Bio/E3-CryoFold</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td colspan="5" align="left">Tertiary and quaternary structure (hybrid)</td>
</tr>
<tr>
<td align="left">33</td>
<td align="left">SEGEM&#x2b;&#x2b; (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>) (2021)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">34</td>
<td align="left">CR-I-TASSER (<xref ref-type="bibr" rid="B193">Zhang et al., 2022</xref>) (2022)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://zhanggroup.org/CR-I-TASSER/download.html">https://zhanggroup.org/CR-I-TASSER/download.html</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://zhanggroup.org/CR-I-TASSER/">https://zhanggroup.org/CR-I-TASSER/</ext-link>
</td>
</tr>
<tr>
<td align="left">35</td>
<td align="left">DEMO-EM (<xref ref-type="bibr" rid="B202">Zhou et al., 2022</xref>) (2022)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://zhanggroup.org/DEMO-EM/standalone_package/">https://zhanggroup.org/DEMO-EM/standalone_package/</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://zhanggroup.org/DEMO-EM/">https://zhanggroup.org/DEMO-EM/</ext-link>
</td>
</tr>
<tr>
<td align="left">36</td>
<td align="left">DeepTracer-ID (<xref ref-type="bibr" rid="B24">Chang et al., 2022</xref>) (2022)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://deeptracer.uw.edu/deeptracerid-new-job">https://deeptracer.uw.edu/deeptracerid-new-job</ext-link>
</td>
</tr>
<tr>
<td align="left">37</td>
<td align="left">EMBuild (<xref ref-type="bibr" rid="B76">He et al., 2022</xref>) (2022)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://huanglab.phys.hust.edu.cn/EMBuild/">http://huanglab.phys.hust.edu.cn/EMBuild/</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">38</td>
<td align="left">FFF (<xref ref-type="bibr" rid="B29">Chen et al., 2023</xref>) (2023)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">39</td>
<td align="left">CrAI (<xref ref-type="bibr" rid="B117">Mallet et al., 2025</xref>) (2023)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/Vincentx15/crIA-EM">https://github.com/Vincentx15/crIA-EM</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">40</td>
<td align="left">DeepMainmast (<xref ref-type="bibr" rid="B172">Terashi et al., 2024</xref>) (2023)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/kiharalab/DeepMainMast">https://github.com/kiharalab/DeepMainMast</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://em.kiharalab.org/algorithm/DeepMainMast">https://em.kiharalab.org/algorithm/DeepMainMast</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://codeocean.com/capsule/0749800/tree/v5">https://codeocean.com/capsule/0749800/tree/v5</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://colab.research.google.com/github/kiharalab/DeepMainMast/blob/main/DeepMainMast.ipynb">https://colab.research.google.com/github/kiharalab/DeepMainMast/blob/main/DeepMainMast.ipynb</ext-link>
</td>
</tr>
<tr>
<td align="left">41</td>
<td align="left">DEMO-EM2 [139] (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://zhanggroup.org/DEMO-EM/DEMO-EM2/">https://zhanggroup.org/DEMO-EM/DEMO-EM2/</ext-link>
</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">42</td>
<td align="left">DeepTracer-Refine (<xref ref-type="bibr" rid="B31">Chen et al., 2024b</xref>) (2024)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://deeptracer.uw.edu/home">https://deeptracer.uw.edu/home</ext-link>
</td>
</tr>
<tr>
<td align="left">43</td>
<td align="left">EmodelX (&#x2b;AF) (<xref ref-type="bibr" rid="B30">Chen et al., 2024a</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/biomed-AI/EModelX/">https://github.com/biomed-AI/EModelX/</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://bio-web1.nscc-gz.cn/app/EModelX">https://bio-web1.nscc-gz.cn/app/EModelX</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://colab.research.google.com/github/biomed-AI/EModelX/blob/main/minimal_example.ipynb">https://colab.research.google.com/github/biomed-AI/EModelX/blob/main/minimal_example.ipynb</ext-link>
</td>
</tr>
<tr>
<td align="left">44</td>
<td align="left">CryoJAM (<xref ref-type="bibr" rid="B20">Carrion et al., 2024</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/jtcarrion/CryoJAM">https://github.com/jtcarrion/CryoJAM</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">45</td>
<td align="left">DiffModeler (<xref ref-type="bibr" rid="B181">Wang et al., 2024</xref>) (2024)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/kiharalab/DiffModeler">https://github.com/kiharalab/DiffModeler</ext-link>
</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://em.kiharalab.org/algorithm/DiffModeler">https://em.kiharalab.org/algorithm/DiffModeler</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://em.kiharalab.org/algorithm/DiffModeler(seq)">https://em.kiharalab.org/algorithm/DiffModeler(seq)</ext-link>
<break/>
<ext-link ext-link-type="uri" xlink:href="https://em.kiharalab.org/algorithm/ComplexModeler">https://em.kiharalab.org/algorithm/ComplexModeler</ext-link>
</td>
</tr>
<tr>
<td align="left">46</td>
<td align="left">DeepTracer-LowResEnhance (<xref ref-type="bibr" rid="B115">Ma and Si, 2025</xref>) (2024)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">47</td>
<td align="left">Cryo2Struct2 [165] (2025)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/BioinfoMachineLearning/Cryo2Strut2">https://github.com/BioinfoMachineLearning/Cryo2Strut2</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">48</td>
<td align="left">DEMO-EMfit (<xref ref-type="bibr" rid="B17">Cai et al., 2025</xref>) (2025)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/xiaogenz/DEMO-EMfit">https://github.com/xiaogenz/DEMO-EMfit</ext-link>
</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">49</td>
<td align="left">CryoDomain (<xref ref-type="bibr" rid="B46">Dai et al., 2025</xref>) (2025)</td>
<td align="left">No</td>
<td align="left">N/A</td>
<td align="left">N/A</td>
</tr>
<tr>
<td align="left">50</td>
<td align="left">DEMO-EMol (<xref ref-type="bibr" rid="B197">Zhang et al., 2025b</xref>) (2025)</td>
<td align="left">Yes</td>
<td align="left">N/A</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://zhanggroup.org/DEMO-EMol/">https://zhanggroup.org/DEMO-EMol/</ext-link>
</td>
</tr>
<tr>
<td align="left">51</td>
<td align="left">MICA (<xref ref-type="bibr" rid="B71">Gyawali et al., 2025</xref>) (2025)</td>
<td align="left">Yes</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/jianlin-cheng/MICA">https://github.com/jianlin-cheng/MICA</ext-link>
</td>
<td align="left">N/A</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s7">
<label>7</label>
<title>Limitations and future directions</title>
<p>In general, the performance of deep learning-based automated model-building methods declines as the resolution of cryo-EM density maps decreases, with accurate models typically obtained only within the resolution ranges represented in their training datasets. Cryo-EM density maps often exhibit heterogeneous resolution distribution within a single reconstruction. Thus, even when the global resolution of the map falls within the preferred resolution range of a specific method, regions of lower local resolutions in the map may lie outside this range, resulting in inaccurate or incomplete structural models in those areas. Moreover, modeling efforts focus on interpretable, high-resolution regions, while lower-resolution and uninterpretable areas remain poorly characterized and lack structural labels. As the map resolution declines, backbone tracing and sequence registration become challenging for <italic>de novo</italic> approaches and may require assistance from external structure prediction methods. For hybrid methods, the accuracy of the output models may be affected by the quality of the structural templates integrated with voxel-wise structural predictions during model generation. Cryo-EM density maps contain information about the conformational heterogeneity and dynamic regions of biomolecules. However, most deep learning&#x2013;based automated model-building methods are limited by their ability to handle conformational heterogeneity and produce static models from inherently dynamic cryo-EM density map inputs. CryoBoltz (<xref ref-type="bibr" rid="B145">Raghu, 2025</xref>) uses global&#x2013;local constraints derived from input density maps to guide the sampling process of Boltz-1 (<xref ref-type="bibr" rid="B185">Wohlwend, 2025</xref>), an open-source diffusion-based protein structure prediction model. This enables CryoBoltz to generate structural ensembles that capture the underlying conformational heterogeneity of the maps. Since simulated maps lack the complexity present in experimental cryo-EM maps, such as heterogeneous resolution, conformational heterogeneity, and complex noise, deep learning-based methods trained on these simulated maps may not perform well on experimental cryo-EM maps even when the input map resolution is within the resolution range of such methods. The strengths and limitations of different deep learning architectures (<xref ref-type="sec" rid="s3">Section 3</xref>) directly influence the performance of automated model building tools that utilize them. This means that if a particular deep learning architecture has inherent limitations for a given task, those same limitations will be evident in the tools built upon it. Conversely, neural network architecture with specific advantages for certain prediction tasks will lead to more effective tools.</p>
<p>
<bold>
<italic>findMySequence</italic>
</bold> (<xref ref-type="bibr" rid="B36">Chojnowski et al., 2022</xref>), a tool to identify protein sequences in crystallography and cryo-EM maps, depends on the accuracy of the traced backbone, fragmented or mistraced models often found in low-resolution maps may affect sequence identification. While the approach scales to multimeric assemblies, it requires manual selection of intermediate fragments in an iterative modeling process. Nevertheless, the application is very fast for majority of cases. The performance of <bold>
<italic>checkMySequence</italic>
</bold> (<xref ref-type="bibr" rid="B33">Chojnowski, 2022</xref>), which automatically detects register-shift errors in protein models built into cryo-EM maps, depends on the quality of input map and model, specifically on map preprocessing and local map resolution. Further, it uses relatively long test fragments at lower local map resolutions which may result in missing short, local register-shift errors. Nevertheless, checkMySequence yields useful results where manual residue-by-residue validation is difficult. The accuracy of <bold>
<italic>doubleHelix</italic>
</bold> (<xref ref-type="bibr" rid="B34">Chojnowski, 2023</xref>) for nucleic-acid sequence identification, assignment, and validation in cryo-EM maps depends on the accuracy of input nucleic-acid models. Further, performance of its neural-network classifier is influenced by map quality and since nucleic acid regions in cryo-EM maps are typically poorly resolved, it may affect accurate sequence assignment. Moreover, the signal in density maps is often limited to distinguishing only two nucleobase types - purines and pyrimidines - complicating sequence assignment. Nevertheless, doubleHelix can successfully assign sequences to models built in cryo-EM maps at local resolution as low as 5.0 &#xc5;. The performance of <bold>
<italic>EMsequenceFinder</italic>
</bold> (<xref ref-type="bibr" rid="B123">Mondal et al., 2025</xref>) to accurately assign an amino acid sequence to backbone fragments is dependent on the accuracy of the backbone traced in the input cryo-EM density map. Reliable backbone traces can be generated by using methods listed in <xref ref-type="table" rid="T2">Table 2</xref>. Further, the program at present only considers fragments with &#x3b1;-helical and &#x3b2;-strand backbones and other secondary structure elements, such as coils and loops will be considered in future. Like other methods, its prediction accuracy declines as the map resolution deteriorates. Map preprocessing such as denoising and density modification could improve prediction accuracy by reducing noise.</p>
<p>
<bold>CNN-classifier</bold> (<xref ref-type="bibr" rid="B102">Li et al., 2016</xref>), one of the early deep learning methods developed to detect protein secondary structures in medium resolution cryo-EM density maps, is trained on a limited number of simulated cryo-EM maps (<xref ref-type="table" rid="T4">Table 4</xref>) which may limit its performance on experimental cryo-EM density maps for secondary structure detection. <bold>Emap2sec</bold> (<xref ref-type="bibr" rid="B116">Maddhuri Venkata Subramaniya et al., 2019</xref>) can only analyze protein maps and, while it detects density regions of secondary structure, it does not actually place the &#x3b1;-helices and &#x3b2;-strands within these detected density regions. Its successor, <bold>Emap2sec&#x2b;</bold> (<xref ref-type="bibr" rid="B179">Wang et al., 2021</xref>) can detect both protein secondary structure elements and nucleic acids in cryo-EM density maps and also improves the accuracy of protein secondary structure detection. <bold>Haruspex</bold> (<xref ref-type="bibr" rid="B124">Mostosi et al., 2020</xref>), a method for identifying protein secondary structures and nucleic acids in cryo-EM density maps, may incorrectly label semi-helical structures, &#x3b2;-hairpin turns, and polyproline type II (PII) helices as &#x3b1;-helices and loosely parallel structures that lack the typical hydrogen-bond pattern as &#x3b2;-strands. In future versions of Haruspex, it will predict additional classes like &#x3b2;-turns, polyproline helices, and membrane detergent regions to reduce the number of misidentified secondary structure elements and improve the overall accuracy of the method. The performance of <bold>EMNUSS</bold> (<xref ref-type="bibr" rid="B74">He and Huang, 2021a</xref>) is sensitive to map resolution, especially at middle-resolution (5.0&#x2013;9.5 &#xc5;) as EMNUSS was trained on small number of maps (120 maps) at that resolution range. Further, incorrect predictions may occur on unusual density volumes as the network of EMNUSS is not trained on lower-resolution maps. Therefore, more maps are needed for training to improve the performance and robustness of EMNUSS. <bold>DeepSSETracer</bold> (<xref ref-type="bibr" rid="B125">Mu et al., 2021</xref>), a tool to identify protein secondary structures in medium resolution maps, is designed to operate on component maps with a maximum size of 100 voxels in any dimension and requires prior segmentation of the cryo-EM density map containing multiple chains. As such its detection performance depends on the quality of the segmented maps. <bold>HaPi</bold> (<xref ref-type="bibr" rid="B62">Garcia Condado et al., 2022</xref>) struggles to determine the handedness of structures lacking clear or sufficient &#x3b1;-helices. This difficulty increases with lower resolutions, particularly as the resolution approaches 5.0 &#xc5;, where less information regarding handedness is encoded. The minimum resolution for determining the hand of an &#x3b1;-helix is its pitch, which on average is 5.4 &#xc5;. Above this resolution, an &#x3b1;-helix appears as a cylinder, which possesses no apparent handedness. <bold>CryoSSESeg</bold> (<xref ref-type="bibr" rid="B155">Sazzed, 2024</xref>), a tool to detect protein secondary structures in medium-resolution cryo-EM maps, segments the entire density of a protein chain so that the model can learn from complete secondary structures and avoiding artifacts of cutting secondary structures as seen in patch-based segmentation. However, it is important to note that this approach may require huge amount of memory when working with large protein chains. <bold>EMInfo</bold> (<xref ref-type="bibr" rid="B18">Cao et al., 2025</xref>), an automated tool for predicting protein secondary structure elements and nucleic acids in cryo-EM density maps, may struggle to accurately predict structural categories in several cases such as at the terminal ends of nucleic acids with weak density signals, in lower-resolution density regions, at the interfaces between different macromolecule categories and may identify coils with strands.</p>
<p>The main factor limiting the detection accuracy of <bold>AAnchor</bold> (<xref ref-type="bibr" rid="B153">Rozanov and Wolfson, 2018</xref>) at high resolution maps (2.2 &#xc5;) is the limited experimental cryo-EM data used (3 maps, 1.8 - 2.3 &#xc5; range) for the training of the AAnchor algorithm. Given that the number of high resolution experimental cryo-EM maps have increased since then (<xref ref-type="fig" rid="F1">Figure 1</xref>), the current version of AAnchor may provide improved results. The large-scale dataset of <bold>A<sup>2</sup>-Net</bold> (<xref ref-type="bibr" rid="B188">Xu et al., 2019</xref>), a tool for amino acid determination in a cryo-EM density map, is derived from simulated cryo-EM densities and could potentially benefit from including experimental maps in its training. Trained on a larger set of simulated cryo-EM maps, <bold>Cascaded-CNN</bold> (<xref ref-type="bibr" rid="B160">Si et al., 2020</xref>), a tool for predicting protein backbone structures in high-resolution maps, could potentially improve its performance by including experimental cryo-EM maps in its training data. Further, threshold value selected to normalize input maps before processing for model building, a common step in many methods, may affect final structure prediction. The authors of Cascaded-CNN have developed a method that automatically estimates the correct threshold value for density maps to alleviate this issue (<xref ref-type="bibr" rid="B136">Pfab and Si, 2020</xref>). <bold>Structure Generator</bold> (<xref ref-type="bibr" rid="B100">Li, 2020</xref>), a template-free method to build protein structures in cryo-EM maps, is also trained on simulated cryo-EM density profiles of proteins (<xref ref-type="table" rid="T4">Table 4</xref>), and its performance may vary on experimental cryo-EM maps. Due to the noise in experimental cryo-EM density maps, models created by <bold>DeepTracer</bold> (<xref ref-type="bibr" rid="B137">Pfab et al., 2021</xref>) may sometimes show poor fit to the density map including misplaced side-chains, geometric and connectivity errors thus requiring downstream model rebuilding and refinement using tools such as molecular dynamics flexible fitting (MDFF) (<xref ref-type="bibr" rid="B175">Trabuco et al., 2009</xref>) and <italic>phenix_real_space_refine</italic> (<xref ref-type="bibr" rid="B3">Afonine et al., 2018a</xref>). Furthermore, DeepTracer does not build structures for other macromolecules such as nucleic acids in cryo-EM density maps. <bold>DeepTracer-2.0</bold> (<xref ref-type="bibr" rid="B127">Nakamura et al., 2023</xref>) extends DeepTracer&#x2019;s capability to model protein-DNA/RNA macromolecular complexes from the cryo-EM density maps. The performance of DeepTracer-2.0 to accurately model protein-DNA/RNA complexes depends upon accurate segmentation to extract the density maps for separate macromolecules. As such its performance decreases on low resolution cryo-EM density maps, often observed for nucleic acids, due to the challenges in accurately segmenting such maps. As increasingly high-resolution density maps become available for protein-DNA/RNA complexes, its performance will improve. DeepTracer-2.0 also depends on Brickworx model (<xref ref-type="bibr" rid="B35">Chojnowski et al., 2015</xref>) for postprocessing and building models of DNA/RNA from the predicted voxels which could be time consuming. Future work will make DeepTracer-2.0 less reliant on third-party software for post-processing. <bold>DeepMM</bold> (<xref ref-type="bibr" rid="B75">He and Huang, 2021b</xref>) can introduce errors or uncertainties into the built models in cryo-EM density maps that have low overall resolution or low-resolution regions. Because DeepMM is designed for single chains, it relies on Segger (<xref ref-type="bibr" rid="B139">Pintilie et al., 2010</xref>) to segment the original map into separate regions to model multi-chain complexes. As such its performance on multi-chain complexes is dependent on the quality of the map segmentation. <bold>SEGEM</bold> (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>) showed lower amino acid prediction accuracy for the experimental test set than the simulated set due to the varying local resolution and noise in experimental maps, which makes it difficult to normalize them for convolutional neural network (CNN) training. <bold>SEGEM&#x2b;&#x2b;</bold> (<xref ref-type="bibr" rid="B28">Chen et al., 2021</xref>), which combine SEGEM and AlphaFold2 also struggles to build complete models in cryo-EM density maps with varying local resolution and noise impacting its ability to identify native C&#x3b1; sites. Nevertheless, performance of SEGEM and SEGEM&#x2b;&#x2b; is expected to improve with advancements in 3D image semantic segmentation and cryo-EM image processing. <bold>SegmA</bold> (<xref ref-type="bibr" rid="B154">Rozanov and Wolfson, 2023</xref>), a tool designed for residue type segmentation of a cryo-EM density map, may mislabel amino acid voxels as nucleotides at protein-nucleic acid interfaces and regions with low resolutions. Further, it may also struggle to accurately label amino acids with similar properties as they appear similar in cryo-EM maps. Like other methods, its performance is sensitive to map resolution and degrades as the resolution lowers for amino acids. Nevertheless, SegmA is a powerful tool to distinguish between protein residues and nucleotides. The resolution of input cryo-EM maps affects the model building performance of <bold>ModelAngelo</bold> (<xref ref-type="bibr" rid="B87">Jamali et al., 2024</xref>). The quality of the initial graph generated by the convolutional neural network (CNN) and the accuracy of amino acid classification, both crucial for mapping sequences to the main chain, are significantly affected from low-resolution cryo-EM data. Poor amino acid classifications can lead to errors in sequence assignments and subsequent incorrect chain assignments, especially in complexes with many similar sequences. Protein structure prediction methods, such as AlphaFold2, can be integrated with the features derived from low-resolution cryo-EM maps to automate model building as demonstrated by some of the methods listed in <xref ref-type="table" rid="T3">Table 3</xref>. For nucleic acids, assigning the correct sequence of nucleobases (the equivalent of side chains in RNA or DNA) to the predicted backbone becomes challenging especially at resolutions around 3.5 &#xc5; as it becomes difficult to differentiate between individual purines (adenine from guanine) or pyrimidines (cytosine from thymine/uracil). <italic>doubleHelix</italic>, a neural network classifier to identify nucleobases, can be combined with ModelAngelo predicted nucleic acid backbone to automate the model building of complete nucleic acid structures. While <bold>CryoREAD</bold> (<xref ref-type="bibr" rid="B180">Wang et al., 2023</xref>) accurately identifies nucleobase positions, here also base-type detection remains less precise, especially at lower resolutions, affecting accuracy of sequence matching. Nevertheless, CryoREAD provides accurate nucleic acid structures even from lower-resolution maps for manual sequence assignment. For large structures, where the final model from CryoREAD might have backbone gaps or incorrect base-pairing, manual refinement with tools like COOT is required. CryoREAD aims to improve nucleic acid structure modeling by integrating secondary structure information for nucleic acids predicted with high accuracy. While <bold>SMARTFold</bold> (<xref ref-type="bibr" rid="B105">Li et al., 2023</xref>) creates more complete and accurate atomic structures compared with other state-of-the-art methods, its memory intensive architecture limits its use to model protein sequences with a maximum of 2,500 residues. In addition, the multiple sequence alignment (MSA) search is time-consuming. Future versions of SMARTFold will address these issues with a smaller model that uses smaller feature channels to reduce memory use and a protein language model to replace the time-consuming MSA search. <bold>EMRNA</bold> (<xref ref-type="bibr" rid="B108">Li T. et al., 2025</xref>) performance can be inconsistent in low-resolution maps (4.0&#x2013;6.0 &#xc5;) as it is challenging to build RNA models at these resolutions. For large RNA molecules, EMRNA accurately places RNA backbone fragments but struggles to correctly order these fragments sequentially, often leading to models with a high root-mean-square deviation (RMSD). Since many of these models still retain the correct overall fold, minor manual adjustments can often fix them. However, third party software is often required to build base atoms to achieve high accuracy. Unlike <bold>EM2NA</bold> (<xref ref-type="bibr" rid="B106">Li et al., 2024</xref>), which can automatically segment and identify DNA/RNA regions in raw cryo-EM maps, EMRNA requires users to properly mask the map around the RNA being modeled for optimal performance. It is challenging for EM2NA to automatically assign the correct nucleotide type for the built model, often requiring prior knowledge and expert intervention to complete the modeling. EM2NA may struggle to recognize non-standard helical geometry of nucleic acids including structures like bulges or flipped-out nucleotides that can sometimes occur in protein-nucleic acid complexes. Nevertheless, EM2NA accelerates model building significantly compared to manually starting from scratch. While <bold>Cryo2Struct</bold> (<xref ref-type="bibr" rid="B63">Giri and Cheng, 2024</xref>) can correctly identify most C&#x3b1; atoms and build highly accurate atomic models, creating comprehensive and accurate models of large protein structures from density maps alone remains a significant challenge. This is primarily because resolution heterogeneity in cryo-EM maps limits the resolvability of every residue of a protein. Even prediction errors for a few C&#x3b1; atoms, in noisy cryo-EM maps, can limit the accurate prediction of long, continuous stretches of polypeptide chains. Supplementing cryo-EM density maps with additional inputs, such as AlphaFold2 protein structures and symmetry of the multi-chain protein complexes can produce more accurate and complete predictions. <bold>Cryo2Struct2</bold> (<xref ref-type="bibr" rid="B64">Giri and Cheng, 2025</xref>) improves Cryo2Struct by integrating cryo-EM data with advance structure prediction capabilities of AlphaFold3. <bold>EmodelX</bold> (<xref ref-type="bibr" rid="B30">Chen S. et al., 2024</xref>), a method for building protein complex structures in cryo-EM maps, could be extended to include modeling of DNA/RNA-protein assemblies and small molecules. <bold>EModelX(&#x2b;AF)</bold> (<xref ref-type="bibr" rid="B30">Chen S. et al., 2024</xref>), which combines EModelX and AlphaFold, effectively handles both low-resolution cryo-EM density maps and inaccurate AlphaFold predictions while modeling protein complexes in such density maps. Though <bold>CryFold</bold> (<xref ref-type="bibr" rid="B168">Su et al., 2025</xref>) is less reliant on map resolution for model building, modeling at low resolutions remains challenging due to the difficulty in accurately identifying protein side chains in these regions. <bold>DeepCryoRNA</bold> (<xref ref-type="bibr" rid="B101">Li and Chen, 2025</xref>), which only models RNA in the cryo-EM density maps, may have variable performance for maps with resolutions below 4.5 &#xc5;. Future iterations of DeepCryoRNA are anticipated to extend to modeling DNA or DNA/RNA-protein complexes in cryo-EM maps like CryoREAD, EM2NA and DeepTracer. One of the main limitations of <bold>E3-CryoFold</bold> (<xref ref-type="bibr" rid="B182">Wang et al., 2025</xref>) is that it currently only models the residue backbone without considering side chains. Side chain modeling of E3-CryoFold derived protein backbone can potentially be achieved using tools like SCWRL4 (<xref ref-type="bibr" rid="B92">Krivov et al., 2009a</xref>), similar to the approach taken by DeepTracer. Further, E3-CryoFold can result in inconsistent root-mean-square deviation (RMSD) between predicted and target structures due to lack of constraints during generation of atom coordinates. Incorporating atom coordinate information, derived directly from density maps, with the E3-CryoFold predictions can help with this issue.</p>
<p>The accuracy of 3D convolutional neural network (3D-CNN) and therefore <bold>CR-I-TASSER</bold> (<xref ref-type="bibr" rid="B193">Zhang et al., 2022</xref>) to predict C&#x3b1; trace decreases as resolution deteriorates. CR-I-TASSER also struggles to model target structures containing loops and disordered regions. Future iterations of CR-I-TASSER will combine 3D-CNN with multiple sequence alignment (MSA) to improve accuracy of its C&#x3b1; trace and model prediction. Although CR-I-TASSER is currently limited to model monomer proteins that require manual segmentation of cryo-EM maps, the CR-I-TASSER pipeline will be extended to model larger protein&#x2013;protein/protein&#x2013;nucleic acid complexes. <bold>DEMO-EM</bold> (<xref ref-type="bibr" rid="B202">Zhou et al., 2022</xref>) pipeline, which assembles multi-domain proteins, requires manual segmentation of maps and could be improved by including automatic segmentation techniques to reduce computational time. Another limitation of DEMO-EM is that the initial domain models, which are created by D-I-TASSER independent of the cryo-EM density data, can be inaccurate and can negatively affect the quality of the final models. Future versions of DEMO-EM will improve the accuracy of the final models by using cryo-EM density data restraints to guide the initial domain model generation. <bold>DEMO-EM2</bold> (<xref ref-type="bibr" rid="B195">Zhang et al., 2024</xref>), which constructs protein complex models, improves many aspects of DEMO-EM such as preprocessing density maps to suppress noise effects on fitting protein models and using advanced strategies to prevent the algorithm from getting stuck in a local optima. However, DEMO-EM2 can be further improved by using deep learning techniques, to identify distinct domain regions and predict distances and orientations between protein chains and domains directly from density maps, and use them as constraints to guide the assembly of protein complexes. <bold>DEMO-EMol</bold> (<xref ref-type="bibr" rid="B197">Zhang Z. et al., 2025</xref>) server, which assembles protein-nucleic acid complex structures from cryo-EM maps, aims to create an end-to-end model-to-map fitting method by using deep learning, and improve fitting efficiency by matching local and global geometric features. Additionally, it will add explicit energy terms that retain base pairing in output nucleic acid models thus improving their accuracy. <bold>DEMO-EMfit</bold> (<xref ref-type="bibr" rid="B17">Cai et al., 2025</xref>), which fits atomic structures into cryo-EM density maps, outlines several points for potential improvement in its current version. To improve both the accuracy and efficiency of fitting, end-to-end deep learning approaches can be used to directly learn structural poses from point clouds extracted from cryo-EM density maps. For better evaluation of fitting quality, a combination of physics-based Gaussian mixture models and DOT scores from VESPER may be more reliable than traditional correlation coefficient metrics. <bold>DeepTracer-ID</bold> (<xref ref-type="bibr" rid="B24">Chang et al., 2022</xref>), which can identify component proteins directly in cryo-EM maps, may struggle with modeling small proteins (&#x3c;100 amino acids) at a resolution of 4.2 &#xc5; or better. Small proteins often exist within larger complexes, yet it is rare for them to achieve near-atomic resolution in cryo-EM studies. This lack of high-quality data means there is a scarcity of detailed experimental structural information for these proteins requiring input from protein prediction methods such as AlphaFold. <bold>DeepTracer-Refine</bold> (<xref ref-type="bibr" rid="B31">Chen J. et al., 2024</xref>), which uses DeepTracer predicted structures to refine AlphaFold predictions, is limited to docking domains from AlphaFold structures in the cryo-EM density maps and at present cannot refine the structure at residue level. Further, DeepTracer-Refine struggles with AlphaFold predictions where domains are incorrectly folded as it is designed to only correct inaccurate domain arrangements in AlphaFold predictions. Future development of DeepTracer-Refine will explore residue-level refinement of the backbone. <bold>DeepTracer-LowResEnhance</bold> (<xref ref-type="bibr" rid="B115">Ma and Si, 2025</xref>), which extends DeepTracer&#x2019;s ability to build models in low-resolution cryo-EM maps, is currently limited to proteins and future investigation will explore its application to model DNA/RNA structures from low-resolution cryo-EM maps. <bold>EMBuild</bold> (<xref ref-type="bibr" rid="B76">He et al., 2022</xref>) accurately builds protein complex models into cryo-EM maps when provided with the accurate predictions of individual chains at the fragment or domain level. Its performance suffers when regions in the input structures, such as those from AlphaFold2, are inaccurate requiring removal of inaccurate or disordered parts of the predicted structures based on their pLDDT values. Like other methods, <bold>FFF</bold> (<xref ref-type="bibr" rid="B29">Chen et al., 2023</xref>) is sensitive to map resolution. Its performance degrades as resolution decreases and can lead to errors in the protein sequence alignment during the fragment recognition stage ultimately affecting subsequent structure-fitting steps. Although <bold>CrAI</bold> (<xref ref-type="bibr" rid="B117">Mallet et al., 2025</xref>) is a state-of-the-art method for detecting antibodies in cryo-EM maps, it performs better on Fabs than on VHHs. This is because VHHs are smaller, have non-canonical binding modes, and less training data is available for them. Despite these challenges, CrAI still outperforms other methods at detecting VHHs. Despite successful prediction, CrAI may have less precision as it relies on a template instead of using a ground truth structure. The performance of <bold>DeepMainmast</bold> (<xref ref-type="bibr" rid="B172">Terashi et al., 2024</xref>) decreases as the local map resolution becomes lower requiring AlphaFold2 models in low resolution regions of map to correct inaccurate backbone tracing in the density. DeepMainmast also tends to model residues in helices more accurately than &#x3b2;-strands and loops. <bold>CryoJAM</bold> (<xref ref-type="bibr" rid="B20">Carrion et al., 2024</xref>), an automated tool for fitting protein homologs into cryo-EM maps, may struggle to identify small differences between very similar homolog structures. It also faces significant challenges with low-resolution maps, or those containing membrane or detergent density. Diffusion model of <bold>DiffModeler</bold> (<xref ref-type="bibr" rid="B181">Wang et al., 2024</xref>) struggles to generate accurate backbone traces for low resolution regions limiting subsequent structure fitting to regions with higher local resolution. Further, DiffModeler&#x2019;s pipeline is currently limited to model protein complexes but will extend its capabilities to model protein/DNA/RNA complexes in future. Also, accuracy of DiffModeler is influenced by the quality of the initial AlphaFold2 models it uses, fitting individual protein domains rather than entire proteins could help overcome the inaccuracies of the initial models. DiffModeler can potentially be used for cryo-electron tomography (cryo-ET) with maps at a resolution of 15 &#xc5; or better. The performance of <bold>CryoDomain</bold> (<xref ref-type="bibr" rid="B46">Dai et al., 2025</xref>), a tool to identify protein domains in low resolution cryo-EM maps, may be influenced by the quality and accuracy of the initial segmentation of the map into its individual components. Although CryoDomain may sometimes retrieve incorrect domain types, resulting in false positives, it is more accurate and robust across a wide range of resolutions compared to other methods. <bold>MICA</bold> (<xref ref-type="bibr" rid="B71">Gyawali et al., 2025</xref>) struggles with some large protein complexes when cryo-EM data is noisy or has missing density, and when AlphaFold3 predictions do not align well with the experimental maps. Future efforts will focus on enhancing sequence and chain registration, considering symmetry of protein complex during sequence registration, and integrating advanced side-chain prediction algorithms directly into the deep learning framework of MICA.</p>
</sec>
<sec sec-type="conclusion" id="s8">
<label>8</label>
<title>Conclusion</title>
<p>Deep learning has undeniably accelerated and automated every aspect of cryo-EM structure determination pipeline. This review presented a comprehensive and up to date survey of the current landscape of deep learning-based methods (&#x223c;50) for automated model building into cryo-EM density maps. We outlined common conceptual strategies across diverse methods and summarized key aspects of these tools, including their training datasets, neural network architectures, prediction tasks, the types of biomolecules they build, and their availability as servers or publicly accessible code. By discussing the capabilities and limitations of available methods, we hope this synthesis will serve as a valuable resource and stimulate future improvements in the field.</p>
<p>The resolution revolution in cryo-EM now increasingly enables high-resolution structure determination of biomolecular complexes with interpretable density for bound small molecules and drugs. However, most of the current deep learning-based model building methods focus primarily on building the biomacromolecule itself. This highlights a pressing need for developing robust deep learning tools for automatically identifying and building the bound small molecules (<xref ref-type="bibr" rid="B91">Karolczak et al., 2024</xref>), thereby significantly accelerating structure-based drug-discovery efforts. Addressing this requires overcoming challenges related to training data. While deep learning-based methods train on large datasets, their application is often limited by the availability of map-model pairs that perfectly capture experimental reality. Real cryo-EM maps exhibit conformational heterogeneity, which the associated model does not capture. This discrepancy hinders the development of deep learning-based methods capable of modeling true conformational ensembles (<xref ref-type="bibr" rid="B26">Chen, 2025</xref>; <xref ref-type="bibr" rid="B119">Matsumoto et al., 2021</xref>; <xref ref-type="bibr" rid="B183">Wankowicz et al., 2024</xref>) or accurately representing ligand binding states (<xref ref-type="bibr" rid="B148">Riley et al., 2021</xref>). Therefore, efforts are required to curate experimental datasets of conformationally-resolved experimental map-model pairs for training deep-learning-based methods (<xref ref-type="bibr" rid="B9">Astore et al., 2025</xref>).</p>
<p>Looking ahead, as neural network architectures continue to evolve rapidly, multi-modal approaches that integrate different architectures, diverse data types, and geometric deep learning (<xref ref-type="bibr" rid="B16">Bronstein, 2021</xref>; <xref ref-type="bibr" rid="B15">Bengio, 2023</xref>) to learn from multi-dimensional data show significant promise for capturing diverse structural features of biomolecules. Deep learning-based automated model building for cryo-electron tomography (cryo-ET) datasets represent a crucial next step, holding the potential to identify and build macromolecules directly within their native cellular environment and ushering in a new era of <italic>in situ</italic> structural biology (<xref ref-type="bibr" rid="B95">Kyrilis et al., 2024</xref>).</p>
</sec>
</body>
<back>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>HB: Conceptualization, Methodology, Investigation, Data curation, Formal analysis, Validation, Visualization, Resources, Writing &#x2013; original draft, Writing &#x2013; review and editing, Supervision, Project administration. AdG: Writing &#x2013; review and editing, Funding acquisition, Project administration, Resources.</p>
</sec>
<sec sec-type="COI-statement" id="s11">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s12">
<title>Generative AI statement</title>
<p>The author(s) declare that Generative AI was used in the creation of this manuscript. for grammatical checking of sentences.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s14">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmolb.2025.1613399/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmolb.2025.1613399/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/565194/overview">Edward T. Eng</ext-link>, New York Structural Biology Center, United States</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2726584/overview">Yao Zhang</ext-link>, Michigan State University, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3041426/overview">Dominique Stephens</ext-link>, James Madison University, United States</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Abadi</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>TensorFlow: a system for large-scale machine learning</article-title>,&#x201d; in <source>Proceedings of the 12th USENIX conference on operating systems design and implementation</source>. <publisher-loc>Berkeley, CA</publisher-loc>: <publisher-name>USENIX Association</publisher-name>.</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abramson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Adler</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dunger</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Evans</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Pritzel</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Accurate structure prediction of biomolecular interactions with AlphaFold 3</article-title>. <source>Nature</source> <volume>630</volume>, <fpage>493</fpage>&#x2013;<lpage>500</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-024-07487-w</pub-id>
<pub-id pub-id-type="pmid">38718835</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Afonine</surname>
<given-names>P. V.</given-names>
</name>
<name>
<surname>Poon</surname>
<given-names>B. K.</given-names>
</name>
<name>
<surname>Read</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Sobolev</surname>
<given-names>O. V.</given-names>
</name>
<name>
<surname>Terwilliger</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Urzhumtsev</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2018a</year>). <article-title>Real-space refinement in PHENIX for cryo-EM and crystallography</article-title>. <source>Acta Crystallogr. Sect. D.</source> <volume>74</volume>, <fpage>531</fpage>&#x2013;<lpage>544</lpage>. <pub-id pub-id-type="doi">10.1107/S2059798318006551</pub-id>
<pub-id pub-id-type="pmid">29872004</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Afonine</surname>
<given-names>P. V.</given-names>
</name>
<name>
<surname>Klaholz</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Moriarty</surname>
<given-names>N. W.</given-names>
</name>
<name>
<surname>Poon</surname>
<given-names>B. K.</given-names>
</name>
<name>
<surname>Sobolev</surname>
<given-names>O. V.</given-names>
</name>
<name>
<surname>Terwilliger</surname>
<given-names>T. C.</given-names>
</name>
<etal/>
</person-group> (<year>2018b</year>). <article-title>New tools for the analysis and validation of cryo-EM maps and atomic models</article-title>. <source>Acta Crystallogr. Sect. D.</source> <volume>74</volume>, <fpage>814</fpage>&#x2013;<lpage>840</lpage>. <pub-id pub-id-type="doi">10.1107/S2059798318009324</pub-id>
<pub-id pub-id-type="pmid">30198894</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>AlQuraishi</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>End-to-End differentiable learning of protein structure</article-title>. <source>Cell Syst.</source> <volume>8</volume>, <fpage>292</fpage>&#x2013;<lpage>301.e3</lpage>. <pub-id pub-id-type="doi">10.1016/j.cels.2019.03.006</pub-id>
<pub-id pub-id-type="pmid">31005579</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alzubaidi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Humaidi</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Al-Dujaili</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Al-Shamma</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Review of deep learning: concepts, CNN architectures, challenges, applications, future directions</article-title>. <source>J. Big Data</source> <volume>8</volume>, <fpage>53</fpage>. <pub-id pub-id-type="doi">10.1186/s40537-021-00444-8</pub-id>
<pub-id pub-id-type="pmid">33816053</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Anderson</surname>
<given-names>A. C.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>The process of structure-based drug design</article-title>. <source>Chem. and Biol.</source> <volume>10</volume>, <fpage>787</fpage>&#x2013;<lpage>797</lpage>. <pub-id pub-id-type="doi">10.1016/j.chembiol.2003.09.002</pub-id>
<pub-id pub-id-type="pmid">14522049</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Astore</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Pradhan</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Thiede</surname>
<given-names>E. H.</given-names>
</name>
<name>
<surname>Hanson</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Protein dynamics underlying allosteric regulation</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>84</volume>, <fpage>102768</fpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2023.102768</pub-id>
<pub-id pub-id-type="pmid">38215528</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Astore</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Woollard</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Silva-S&#xe1;nchez</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kopylov</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Dao Duc</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>The inaugural flatiron institute Cryo-EM conformational heterogeneity challenge</article-title>. <source>bioRxiv</source> <volume>2025</volume>. <pub-id pub-id-type="doi">10.1101/2025.07.18.665582</pub-id>
<pub-id pub-id-type="pmid">41280101</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bahdanau</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Neural machine translation by jointly learning to align and translate</article-title>. <source>
<italic>Corr.</italic> abs/1409.0473</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1409.0473</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baker</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Abeysinghe</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Schuh</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Coleman</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Abrams</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Marsh</surname>
<given-names>M. P.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Modeling protein structure at near atomic resolutions with gorgon</article-title>. <source>J. Struct. Biol.</source> <volume>174</volume>, <fpage>360</fpage>&#x2013;<lpage>373</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2011.01.015</pub-id>
<pub-id pub-id-type="pmid">21296162</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bansia</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mahanta</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yennawar</surname>
<given-names>N. H.</given-names>
</name>
<name>
<surname>Ramakumar</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Small glycols discover cryptic pockets on proteins for fragment-based approaches</article-title>. <source>J. Chem. Inf. Model.</source> <volume>61</volume>, <fpage>1322</fpage>&#x2013;<lpage>1333</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.0c01126</pub-id>
<pub-id pub-id-type="pmid">33570386</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barad</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Echols</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R. Y. R.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>DiMaio</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Adams</surname>
<given-names>P. D.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>EMRinger: side chain&#x2013;directed model and map validation for 3D cryo-electron microscopy</article-title>. <source>Nat. Methods</source> <volume>12</volume>, <fpage>943</fpage>&#x2013;<lpage>946</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.3541</pub-id>
<pub-id pub-id-type="pmid">26280328</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bell</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Durmaz</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Fluty</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Ludtke</surname>
<given-names>S. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>New software tools in EMAN2 inspired by EMDatabank map challenge</article-title>. <source>J. Struct. Biol.</source> <volume>204</volume>, <fpage>283</fpage>&#x2013;<lpage>290</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2018.09.002</pub-id>
<pub-id pub-id-type="pmid">30189321</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A hitchhiker&#x27;s guide to geometric GNNs for 3D atomic systems</article-title>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bronstein</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Geometric deep learning: grids, groups, graphs, geodesics, and gauges</article-title>.</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Fitting atomic structures into Cryo-EM maps by coupling deep learning-enhanced map processing with global-local optimization</article-title>. <source>J. Chem. Inf. Model.</source> <volume>65</volume>, <fpage>3800</fpage>&#x2013;<lpage>3811</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.5c00004</pub-id>
<pub-id pub-id-type="pmid">40152222</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S. Y.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Deciphering protein secondary structures and nucleic acids in Cryo-EM maps using deep learning</article-title>. <source>J. Chem. Inf. Model.</source> <volume>65</volume>, <fpage>1641</fpage>&#x2013;<lpage>1652</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.4c01971</pub-id>
<pub-id pub-id-type="pmid">39838545</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Carreira-Perpinan</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2006</year>). &#x201c;<article-title>Acceleration strategies for gaussian mean-shift image segmentation</article-title>,&#x201d; in <source>2006 IEEE computer Society conference on computer vision and pattern recognition (CVPR&#x27;06)</source>.</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carrion</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Manjrekar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mikulevica</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>CryoJAM: automating protein homolog fitting in medium resolution Cryo-EM density maps</article-title>. <source>bioRxiv</source> <volume>2024</volume>. <pub-id pub-id-type="doi">10.1101/2024.07.10.602952</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carugo</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Djinovi&#x107;-Carugo</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Structural biology: a golden era</article-title>. <source>PLOS Biol.</source> <volume>21</volume>, <fpage>e3002187</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pbio.3002187</pub-id>
<pub-id pub-id-type="pmid">37384774</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Case</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Cerutti</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Cruzeiro</surname>
<given-names>V. W. D.</given-names>
</name>
<name>
<surname>Darden</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Duke</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Ghazimirsaeed</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Recent developments in amber biomolecular simulations</article-title>. <source>J. Chem. Inf. Model.</source> <volume>65</volume>, <fpage>7835</fpage>&#x2013;<lpage>7843</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.5c01063</pub-id>
<pub-id pub-id-type="pmid">40728386</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname>
<given-names>W.-H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H. H.</given-names>
</name>
<name>
<surname>Chung</surname>
<given-names>S. C.</given-names>
</name>
<name>
<surname>Tu</surname>
<given-names>I. P.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Cryo-EM analyses permit visualization of structural polymorphism of biological macromolecules</article-title>. <source>Front. Bioinforma.</source> <volume>1</volume>, <fpage>788308</fpage>. <pub-id pub-id-type="doi">10.3389/fbinf.2021.788308</pub-id>
<pub-id pub-id-type="pmid">36303748</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Connolly</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cvirkaite-Krupovic</surname>
<given-names>V.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>DeepTracer-ID: <italic>de novo</italic> protein identification from cryo-EM maps</article-title>. <source>Biophysical J.</source> <volume>121</volume>, <fpage>2840</fpage>&#x2013;<lpage>2848</lpage>. <pub-id pub-id-type="doi">10.1016/j.bpj.2022.06.025</pub-id>
<pub-id pub-id-type="pmid">35769006</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheifet</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Promoting reproducibility with Code Ocean</article-title>. <source>Genome Biol.</source> <volume>22</volume>, <fpage>65</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-021-02299-x</pub-id>
<pub-id pub-id-type="pmid">33608018</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Building molecular model series from heterogeneous CryoEM structures using Gaussian mixture models and deep neural networks</article-title>. <source>Commun. Biol.</source> <volume>8</volume>, <fpage>798</fpage>. <pub-id pub-id-type="doi">10.1038/s42003-025-08202-9</pub-id>
<pub-id pub-id-type="pmid">40415012</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Baldwin</surname>
<given-names>P. R.</given-names>
</name>
<name>
<surname>Ludtke</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Baker</surname>
<given-names>M. L.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>
<italic>De novo</italic> modeling in cryo-EM density maps with Pathwalking</article-title>. <source>J. Struct. Biol.</source> <volume>196</volume>, <fpage>289</fpage>&#x2013;<lpage>298</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2016.06.004</pub-id>
<pub-id pub-id-type="pmid">27436409</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>SEGEM: a fast and accurate automated protein backbone structure modeling method for Cryo-EM</article-title>. <source>IEEE Int. Conf. Bioinforma. Biomed. (BIBM)</source>, <fpage>24</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1109/bibm52615.2021.9669647</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>FFF: Fragment-Guided flexible fitting for building complete protein structures</article-title>, <fpage>19776</fpage>, <lpage>19785</lpage>. <pub-id pub-id-type="doi">10.1109/cvpr52729.2023.01894</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024a</year>). <article-title>Protein complex structure modeling by cross-modal alignment between cryo-EM maps and protein sequences</article-title>. <source>Nat. Commun.</source> <volume>15</volume>, <fpage>8808</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-024-53116-5</pub-id>
<pub-id pub-id-type="pmid">39394203</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zia</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2024b</year>). <article-title>Enhancing cryo-EM structure prediction with DeepTracer and AlphaFold2 integration</article-title>. <source>Briefings Bioinforma.</source> <volume>25</volume>, <fpage>bbae118</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbae118</pub-id>
<pub-id pub-id-type="pmid">38609330</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Single-particle cryo-EM&#x2014;How did it get here and where will it go</article-title>. <source>Science</source> <volume>361</volume>, <fpage>876</fpage>&#x2013;<lpage>880</lpage>. <pub-id pub-id-type="doi">10.1126/science.aat4346</pub-id>
<pub-id pub-id-type="pmid">30166484</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chojnowski</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Sequence-assignment validation in cryo-EM models with checkMySequence</article-title>. <source>Acta Crystallogr. Sect. D.</source> <volume>78</volume>, <fpage>806</fpage>&#x2013;<lpage>816</lpage>. <pub-id pub-id-type="doi">10.1107/S2059798322005009</pub-id>
<pub-id pub-id-type="pmid">35775980</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chojnowski</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>DoubleHelix: nucleic acid sequence identification, assignment and validation tool for cryo-EM and crystal structure models</article-title>. <source>Nucleic Acids Res.</source> <volume>51</volume>, <fpage>8255</fpage>&#x2013;<lpage>8269</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkad553</pub-id>
<pub-id pub-id-type="pmid">37395405</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chojnowski</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wale&#x144;</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Pi&#x105;tkowski</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Potrzebowski</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Bujnicki</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Brickworx builds recurrent RNA and DNA structural motifs into medium- and low-resolution electron-density maps</article-title>. <source>Acta Crystallogr. Sect. D.</source> <volume>71</volume>, <fpage>697</fpage>&#x2013;<lpage>705</lpage>. <pub-id pub-id-type="doi">10.1107/S1399004715000383</pub-id>
<pub-id pub-id-type="pmid">25760616</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chojnowski</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Simpkin</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Leonardo</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Seifert-Davila</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Vivas-Ruiz</surname>
<given-names>D. E.</given-names>
</name>
<name>
<surname>Keegan</surname>
<given-names>R. M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>findMySequence: a neural-network-based approach for identification of unknown proteins in X-ray crystallography and cryo-EM</article-title>. <source>IUCrJ</source> <volume>9</volume>, <fpage>86</fpage>&#x2013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.1107/S2052252521011088</pub-id>
<pub-id pub-id-type="pmid">35059213</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chua</surname>
<given-names>E. Y. D.</given-names>
</name>
<name>
<surname>Mendez</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Rapp</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ilca</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>Y. Z.</given-names>
</name>
<name>
<surname>Maruthi</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Better, faster, cheaper: recent advances in cryo&#x2013;electron microscopy</article-title>. <source>Annu. Rev. Biochem.</source> <volume>91</volume>, <fpage>1</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-biochem-032620-110705</pub-id>
<pub-id pub-id-type="pmid">35320683</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Cianfrocco</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>COSMIC2: a science gateway for cryo-electron microscopy structure determination</article-title>,&#x201d; in <source>Practice and experience in advanced research computing 2017: sustainability, success and impact</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>.</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cianfrocco</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Kellogg</surname>
<given-names>E. H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>What could Go wrong? A practical guide to single-particle Cryo-EM: from biochemistry to atomic models</article-title>. <source>J. Chem. Inf. Model.</source> <volume>60</volume>, <fpage>2458</fpage>&#x2013;<lpage>2469</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.9b01178</pub-id>
<pub-id pub-id-type="pmid">32078321</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>&#xc7;i&#xe7;ek</surname>
<given-names>&#xd6;.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>3D U-Net: learning dense volumetric segmentation from sparse annotation</article-title>,&#x201d; in <source>Medical image computing and computer-assisted intervention &#x2013; MICCAI 2016: 19th international conference, Athens, Greece, October 17-21, 2016, proceedings, part II</source>. <publisher-name>Springer-Verlag</publisher-name>.</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Consortium</surname>
<given-names>T. w.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>EMDB&#x2014;The electron microscopy Data bank</article-title>. <source>Nucleic Acids Res.</source> <volume>52</volume>, <fpage>D456</fpage>&#x2013;<lpage>D465</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkad1019</pub-id>
<pub-id pub-id-type="pmid">37994703</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cragnolini</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sahota</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Joseph</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Sweeney</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Malhotra</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vasishtan</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>TEMPy2: a Python library with improved 3D electron microscopy density-fitting and validation workflows</article-title>. <source>Acta Crystallogr. Sect. D.</source> <volume>77</volume>, <fpage>41</fpage>&#x2013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1107/S2059798320014928</pub-id>
<pub-id pub-id-type="pmid">33404524</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cretin</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Galochkina</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Vander Meersche</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>de Brevern</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Postic</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gelly</surname>
<given-names>J. C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>SWORD2: hierarchical analysis of protein 3D structures</article-title>. <source>Nucleic Acids Res.</source> <volume>50</volume>, <fpage>W732</fpage>&#x2013;<lpage>W738</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkac370</pub-id>
<pub-id pub-id-type="pmid">35580056</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Croll</surname>
<given-names>T. I.</given-names>
</name>
<name>
<surname>Diederichs</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Fischer</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Fyfe</surname>
<given-names>C. D.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Horrell</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Making the invisible enemy visible</article-title>. <source>Nat. Struct. and Mol. Biol.</source> <volume>28</volume>, <fpage>404</fpage>&#x2013;<lpage>408</lpage>. <pub-id pub-id-type="doi">10.1038/s41594-021-00593-7</pub-id>
<pub-id pub-id-type="pmid">33972785</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yoo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Integrating AlphaFold and deep learning for atomistic interpretation of cryo-EM maps</article-title>. <source>Briefings Bioinforma.</source> <volume>24</volume>, <fpage>bbad405</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbad405</pub-id>
<pub-id pub-id-type="pmid">37982712</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q. C.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>CryoDomain: Sequence-free protein domain identification from low-resolution Cryo-EM density maps</article-title>. <source>Proc. AAAI Conf. Artif. Intell.</source> <volume>39</volume>, <fpage>119</fpage>&#x2013;<lpage>127</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v39i1.31987</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Davis</surname>
<given-names>I. W.</given-names>
</name>
<name>
<surname>Leaver-Fay</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>V. B.</given-names>
</name>
<name>
<surname>Block</surname>
<given-names>J. N.</given-names>
</name>
<name>
<surname>Kapral</surname>
<given-names>G. J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>MolProbity: all-atom contacts and structure validation for proteins and nucleic acids</article-title>. <source>Nucleic Acids Res.</source> <volume>35</volume>, <fpage>W375</fpage>&#x2013;<lpage>W383</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkm216</pub-id>
<pub-id pub-id-type="pmid">17452350</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>de la Cruz</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Eng</surname>
<given-names>E. T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Scaling up cryo-EM for biology and chemistry: the journey from niche technology to mainstream method</article-title>. <source>Structure</source> <volume>31</volume>, <fpage>1487</fpage>&#x2013;<lpage>1498</lpage>. <pub-id pub-id-type="doi">10.1016/j.str.2023.09.009</pub-id>
<pub-id pub-id-type="pmid">37820731</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Oliveira</surname>
<given-names>T. M.</given-names>
</name>
<name>
<surname>van Beek</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shilliday</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Debreczeni</surname>
<given-names>J. &#xc9;.</given-names>
</name>
<name>
<surname>Phillips</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Cryo-EM: the resolution revolution and drug discovery</article-title>. <source>SLAS Discov.</source> <volume>26</volume>, <fpage>17</fpage>&#x2013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1177/2472555220960401</pub-id>
<pub-id pub-id-type="pmid">33016175</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>DeLano</surname>
<given-names>W. L.</given-names>
</name>
<name>
<surname>Lam</surname>
<given-names>J. W.</given-names>
</name>
</person-group> (<year>2005</year>). &#x201c;<article-title>PyMOL: a communications tool for computational models</article-title>,&#x201d;, <volume>1155</volume>. <publisher-name>AMER CHEMICAL SOC</publisher-name>.</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>DiIorio</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Kulczyk</surname>
<given-names>A. W.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Novel artificial intelligence-based approaches for <italic>ab initio</italic> structure determination and atomic model building for cryo-electron microscopy</article-title>. <source>Micromachines</source> <volume>14</volume>, <fpage>1674</fpage>. <pub-id pub-id-type="doi">10.3390/mi14091674</pub-id>
<pub-id pub-id-type="pmid">37763837</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>DiMaio</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Leaver-Fay</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bradley</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Baker</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Andr&#xe9;</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Modeling symmetric macromolecular structures in Rosetta3</article-title>. <source>PLoS One</source> <volume>6</volume>, <fpage>e20450</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0020450</pub-id>
<pub-id pub-id-type="pmid">21731614</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dosovitskiy</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An image is worth 16x16 words: transformers for image recognition at Scale</article-title>.</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eddy</surname>
<given-names>S. R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Accelerated profile HMM searches</article-title>. <source>PLOS Comput. Biol.</source> <volume>7</volume>, <fpage>e1002195</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1002195</pub-id>
<pub-id pub-id-type="pmid">22039361</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Emsley</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cowtan</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Coot: model-building tools for molecular graphics</article-title>. <source>Acta Crystallogr. Sect. D.</source> <volume>60</volume>, <fpage>2126</fpage>&#x2013;<lpage>2132</lpage>. <pub-id pub-id-type="doi">10.1107/S0907444904019158</pub-id>
<pub-id pub-id-type="pmid">15572765</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ester</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1996</year>). &#x201c;<article-title>A density-based algorithm for discovering clusters in large spatial databases with noise</article-title>,&#x201d; in <source>Proceedings of the second international conference on knowledge discovery and data mining</source>. <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>AAAI Press (Association for the Advancement of Artificial Intelligence)</publisher-name>.</mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="book">
<collab>European Organization For Nuclear</collab> (<year>2013</year>). <source>OpenAire</source>. <publisher-name>Zenodo</publisher-name>.</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Evans</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>O&#x2019;Neill</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pritzel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Antropova</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Senior</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Protein complex prediction with AlphaFold-Multimer</article-title>. <source>bioRxiv</source> <volume>2021</volume>. <pub-id pub-id-type="doi">10.1101/2021.10.04.463034</pub-id>
</mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Farheen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>AI-based methods for biomolecular structure modeling for Cryo-EM</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>90</volume>, <fpage>102989</fpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2025.102989</pub-id>
<pub-id pub-id-type="pmid">39864242</pub-id>
</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname>
<given-names>M.-F.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y. X.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>H. B.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>DeepQs: local quality assessment of cryo-EM density map by deep learning map-model fit score</article-title>. <source>J. Struct. Biol.</source> <volume>216</volume>, <fpage>108059</fpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2023.108059</pub-id>
<pub-id pub-id-type="pmid">38160703</pub-id>
</mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Forney</surname>
<given-names>G. D.</given-names>
</name>
</person-group> (<year>1973</year>). <article-title>The viterbi algorithm</article-title>. <source>Proc. IEEE</source> <volume>61</volume>, <fpage>268</fpage>&#x2013;<lpage>278</lpage>. <pub-id pub-id-type="doi">10.1109/PROC.1973.9030</pub-id>
</mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garcia Condado</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mu&#xf1;oz-Barrutia</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sorzano</surname>
<given-names>C. O. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Automatic determination of the handedness of single-particle maps of macromolecules solved by CryoEM</article-title>. <source>J. Struct. Biol.</source> <volume>214</volume>, <fpage>107915</fpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2022.107915</pub-id>
<pub-id pub-id-type="pmid">36341955</pub-id>
</mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giri</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>
<italic>De novo</italic> atomic protein structure modeling for cryoEM density maps using 3D transformer and HMM</article-title>. <source>Nat. Commun.</source> <volume>15</volume>, <fpage>5511</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-024-49647-6</pub-id>
<pub-id pub-id-type="pmid">38951555</pub-id>
</mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giri</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Atomic protein structure modeling from Cryo-EM using multi-modal deep learning and AlphaFold3</article-title>. <source>bioRxiv</source> <volume>2025</volume>. <pub-id pub-id-type="doi">10.1101/2025.03.16.643561</pub-id>
</mixed-citation>
</ref>
<ref id="B65">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giri</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Roy</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Deep learning for reconstructing protein structures from cryo-EM density maps: recent advances and future directions</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>79</volume>, <fpage>102536</fpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2023.102536</pub-id>
<pub-id pub-id-type="pmid">36773336</pub-id>
</mixed-citation>
</ref>
<ref id="B66">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giri</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Cryo2StructData: a large labeled Cryo-EM density map dataset for AI-based modeling of protein structures</article-title>. <source>Sci. Data</source> <volume>11</volume>, <fpage>458</fpage>. <pub-id pub-id-type="doi">10.1038/s41597-024-03299-9</pub-id>
<pub-id pub-id-type="pmid">38710720</pub-id>
</mixed-citation>
</ref>
<ref id="B67">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Glover</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>1986</year>). <article-title>Future paths for integer programming and links to artificial intelligence</article-title>. <source>Comput. and Operations Res.</source> <volume>13</volume>, <fpage>533</fpage>&#x2013;<lpage>549</lpage>. <pub-id pub-id-type="doi">10.1016/0305-0548(86)90048-1</pub-id>
</mixed-citation>
</ref>
<ref id="B68">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gotoh</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>1982</year>). <article-title>An improved algorithm for matching biological sequences</article-title>. <source>J. Mol. Biol.</source> <volume>162</volume>, <fpage>705</fpage>&#x2013;<lpage>708</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(82)90398-9</pub-id>
<pub-id pub-id-type="pmid">7166760</pub-id>
</mixed-citation>
</ref>
<ref id="B69">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Graves</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Generating sequences with recurrent neural networks</article-title>.</mixed-citation>
</ref>
<ref id="B70">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Greener</surname>
<given-names>J. G.</given-names>
</name>
<name>
<surname>Kandathil</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Moffat</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>D. T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A guide to machine learning for biologists</article-title>. <source>Nat. Rev. Mol. Cell Biol.</source> <volume>23</volume>, <fpage>40</fpage>&#x2013;<lpage>55</lpage>. <pub-id pub-id-type="doi">10.1038/s41580-021-00407-0</pub-id>
<pub-id pub-id-type="pmid">34518686</pub-id>
</mixed-citation>
</ref>
<ref id="B71">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gyawali</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Dhakal</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Multimodal deep learning integration of cryo-EM and AlphaFold3 for high-accuracy protein structure determination</article-title>. <source>bioRxiv</source> <volume>2025</volume>, <fpage>2025.07.03.663071</fpage>. <pub-id pub-id-type="doi">10.1101/2025.07.03.663071</pub-id>
<pub-id pub-id-type="pmid">40631196</pub-id>
</mixed-citation>
</ref>
<ref id="B72">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Christoffer</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>VESPER: global and local cryo-EM map alignment using local density vectors</article-title>. <source>Nat. Commun.</source> <volume>12</volume>, <fpage>2090</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-021-22401-y</pub-id>
<pub-id pub-id-type="pmid">33828103</pub-id>
</mixed-citation>
</ref>
<ref id="B73">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <source>2016 IEEE conference on computer vision and pattern recognition (CVPR)</source>.</mixed-citation>
</ref>
<ref id="B74">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S.-Y.</given-names>
</name>
</person-group> (<year>2021a</year>). <article-title>EMNUSS: a deep learning framework for secondary structure annotation in cryo-EM maps</article-title>. <source>Briefings Bioinforma.</source> <volume>22</volume>, <fpage>bbab156</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbab156</pub-id>
<pub-id pub-id-type="pmid">33954706</pub-id>
</mixed-citation>
</ref>
<ref id="B75">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S.-Y.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>Full-length <italic>de novo</italic> protein structure determination from cryo-EM maps using deep learning</article-title>. <source>Bioinformatics</source> <volume>37</volume>, <fpage>3480</fpage>&#x2013;<lpage>3490</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btab357</pub-id>
<pub-id pub-id-type="pmid">33978686</pub-id>
</mixed-citation>
</ref>
<ref id="B76">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S. Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Model building of protein complexes from intermediate-resolution cryo-EM maps with deep learning-guided automatic assembly</article-title>. <source>Nat. Commun.</source> <volume>13</volume>, <fpage>4066</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-022-31748-9</pub-id>
<pub-id pub-id-type="pmid">35831370</pub-id>
</mixed-citation>
</ref>
<ref id="B77">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heinig</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Frishman</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>STRIDE: a web server for secondary structure assignment from known atomic coordinates of proteins</article-title>. <source>Nucleic Acids Res.</source> <volume>32</volume>, <fpage>W500</fpage>&#x2013;<lpage>W502</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkh429</pub-id>
<pub-id pub-id-type="pmid">15215436</pub-id>
</mixed-citation>
</ref>
<ref id="B78">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Helsgaun</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>An effective implementation of the Lin&#x2013;Kernighan traveling salesman heuristic</article-title>. <source>Eur. J. Operational Res.</source> <volume>126</volume>, <fpage>106</fpage>&#x2013;<lpage>130</lpage>. <pub-id pub-id-type="doi">10.1016/S0377-2217(99)00284-2</pub-id>
</mixed-citation>
</ref>
<ref id="B79">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Helsgaun</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>An extension of the lin-kernighan-helsgaun TSP solver for constrained traveling salesman and vehicle routing problems</article-title>.</mixed-citation>
</ref>
<ref id="B80">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ho</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Denoising diffusion probabilistic models</article-title>,&#x201d; in <source>Proceedings of the 34th international conference on neural information processing systems</source>. <publisher-loc>Red Hook, NY</publisher-loc>: <publisher-name>Curran Associates Inc</publisher-name>.</mixed-citation>
</ref>
<ref id="B81">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hochreiter</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schmidhuber</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Long short-term memory</article-title>. <source>Neural Comput.</source> <volume>9</volume>, <fpage>1735</fpage>&#x2013;<lpage>1780</lpage>. <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
<pub-id pub-id-type="pmid">9377276</pub-id>
</mixed-citation>
</ref>
<ref id="B82">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>UNet 3&#x2b;: a full-scale connected UNet for medical image segmentation</article-title>,&#x201d; in <source>Icassp 2020 - 2020 IEEE international conference on acoustics, speech and signal processing (ICASSP)</source>.</mixed-citation>
</ref>
<ref id="B83">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Pearce</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>FASPR: an open-source tool for fast and accurate protein side-chain packing</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>3758</fpage>&#x2013;<lpage>3765</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa234</pub-id>
<pub-id pub-id-type="pmid">32259206</pub-id>
</mixed-citation>
</ref>
<ref id="B84">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ibtehaz</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Rahman</surname>
<given-names>M. S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>MultiResUNet: rethinking the U-Net architecture for multimodal biomedical image segmentation</article-title>. <source>Neural Netw.</source> <volume>121</volume>, <fpage>74</fpage>&#x2013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.1016/j.neunet.2019.08.025</pub-id>
<pub-id pub-id-type="pmid">31536901</pub-id>
</mixed-citation>
</ref>
<ref id="B85">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Istrate</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Murshudov</surname>
<given-names>G. N.</given-names>
</name>
<name>
<surname>Patwardhan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kleywegt</surname>
<given-names>G. J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>3D-Strudel - a novel model-dependent map-feature validation method for high-resolution cryo-EM structures</article-title>. <source>bioRxiv</source> <volume>2021</volume>, <fpage>2016</fpage>. <pub-id pub-id-type="doi">10.1101/2021.12.16.472999</pub-id>
</mixed-citation>
</ref>
<ref id="B86">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Jamali</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>A graph neural network approach to automated model building in Cryo-EM maps</article-title>,&#x201d; in <source>International conference on learning representations</source>.</mixed-citation>
</ref>
<ref id="B87">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jamali</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>K&#xe4;ll</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kimanius</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Scheres</surname>
<given-names>S. H. W.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Automated model building and protein identification in cryo-EM maps</article-title>. <source>Nature</source> <volume>628</volume>, <fpage>450</fpage>&#x2013;<lpage>457</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-024-07215-4</pub-id>
<pub-id pub-id-type="pmid">38408488</pub-id>
</mixed-citation>
</ref>
<ref id="B88">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Joseph</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Olek</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Malhotra</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cowtan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Burnley</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Atomic model validation using the CCP-EM software suite</article-title>. <source>Acta Crystallogr. Sect. D.</source> <volume>78</volume>, <fpage>152</fpage>&#x2013;<lpage>161</lpage>. <pub-id pub-id-type="doi">10.1107/S205979832101278X</pub-id>
<pub-id pub-id-type="pmid">35102881</pub-id>
</mixed-citation>
</ref>
<ref id="B89">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jumper</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Evans</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pritzel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Figurnov</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ronneberger</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Highly accurate protein structure prediction with AlphaFold</article-title>. <source>Nature</source> <volume>596</volume>, <fpage>583</fpage>&#x2013;<lpage>589</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-021-03819-2</pub-id>
<pub-id pub-id-type="pmid">34265844</pub-id>
</mixed-citation>
</ref>
<ref id="B90">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kabsch</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>1976</year>). <article-title>A solution for the best rotation to relate two sets of vectors</article-title>. <source>Acta Crystallogr. Sect. A</source> <volume>32</volume>, <fpage>922</fpage>&#x2013;<lpage>923</lpage>. <pub-id pub-id-type="doi">10.1107/S0567739476001873</pub-id>
</mixed-citation>
</ref>
<ref id="B91">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karolczak</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Przyby&#x142;owska</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Szewczyk</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Taisner</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Heumann</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Stowell</surname>
<given-names>M. H. B.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Ligand identification in CryoEM and X-ray maps using deep learning</article-title>. <source>Bioinformatics</source> <volume>41</volume>, <fpage>btae749</fpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btae749</pub-id>
<pub-id pub-id-type="pmid">39700427</pub-id>
</mixed-citation>
</ref>
<ref id="B92">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krivov</surname>
<given-names>G. G.</given-names>
</name>
<name>
<surname>Shapovalov</surname>
<given-names>M. V.</given-names>
</name>
<name>
<surname>Dunbrack</surname>
<given-names>R. L.</given-names>
<suffix>Jr</suffix>
</name>
</person-group> (<year>2009a</year>). <article-title>Improved prediction of protein side-chain conformations with SCWRL4</article-title>. <source>Proteins Struct. Funct. Bioinforma.</source> <volume>77</volume>, <fpage>778</fpage>&#x2013;<lpage>795</lpage>. <pub-id pub-id-type="doi">10.1002/prot22488</pub-id>
<pub-id pub-id-type="pmid">19603484</pub-id>
</mixed-citation>
</ref>
<ref id="B93">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krivov</surname>
<given-names>G. G.</given-names>
</name>
<name>
<surname>Shapovalov</surname>
<given-names>M. V.</given-names>
</name>
<name>
<surname>Dunbrack</surname>
<given-names>R. L.</given-names>
<suffix>Jr</suffix>
</name>
</person-group> (<year>2009b</year>). <article-title>Improved prediction of protein side-chain conformations with SCWRL4</article-title>. <source>Proteins</source> <volume>77</volume>, <fpage>778</fpage>&#x2013;<lpage>795</lpage>. <pub-id pub-id-type="doi">10.1002/prot.22488</pub-id>
<pub-id pub-id-type="pmid">19603484</pub-id>
</mixed-citation>
</ref>
<ref id="B94">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>K&#xfc;hlbrandt</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Biochemistry. The resolution revolution</article-title>. <source>Science</source> <volume>343</volume>, <fpage>1443</fpage>&#x2013;<lpage>1444</lpage>. <pub-id pub-id-type="doi">10.1126/science.1251652</pub-id>
<pub-id pub-id-type="pmid">24675944</pub-id>
</mixed-citation>
</ref>
<ref id="B95">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kyrilis</surname>
<given-names>F. L.</given-names>
</name>
<name>
<surname>Low</surname>
<given-names>J. K. K.</given-names>
</name>
<name>
<surname>Mackay</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Kastritis</surname>
<given-names>P. L.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Structural biology in cellulo: minding the gap between conceptualization and realization</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>87</volume>, <fpage>102843</fpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2024.102843</pub-id>
<pub-id pub-id-type="pmid">38788606</pub-id>
</mixed-citation>
</ref>
<ref id="B96">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lawson</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Patwardhan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Baker</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Hryc</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Garcia</surname>
<given-names>E. S.</given-names>
</name>
<name>
<surname>Hudson</surname>
<given-names>B. P.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>EMDataBank unified data resource for 3DEM</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume>, <fpage>D396</fpage>&#x2013;<lpage>D403</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv1126</pub-id>
<pub-id pub-id-type="pmid">26578576</pub-id>
</mixed-citation>
</ref>
<ref id="B97">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>1998</year>). &#x201c;<article-title>Convolutional networks for images, speech, and time series</article-title>,&#x201d; in <source>The handbook of brain theory and neural networks</source> (<publisher-name>MIT Press</publisher-name>), <fpage>255</fpage>&#x2013;<lpage>258</lpage>.</mixed-citation>
</ref>
<ref id="B98">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume>, <fpage>436</fpage>&#x2013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
<pub-id pub-id-type="pmid">26017442</pub-id>
</mixed-citation>
</ref>
<ref id="B99">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeNail</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>NN-SVG: Publication-Ready neural network Architecture schematics</article-title>. <source>J. Open Source Softw.</source> <volume>4</volume>, <fpage>747</fpage>. <pub-id pub-id-type="doi">10.21105/joss.00747</pub-id>
</mixed-citation>
</ref>
<ref id="B100">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>P.-N.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Sequence-guided protein structure determination using graph convolutional and recurrent networks</article-title>,&#x201d; in <source>2020 IEEE 20th international conference on bioinformatics and bioengineering (BIBE)</source>. <publisher-name>IEEE Computer Society</publisher-name>.</mixed-citation>
</ref>
<ref id="B101">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S.-J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>DeepCryoRNA: deep learning-based RNA structure reconstruction from cryo-EM maps</article-title>. <source>bioRxiv</source> <volume>2025</volume>, <fpage>2005</fpage>. <pub-id pub-id-type="doi">10.1101/2025.04.05.647396</pub-id>
</mixed-citation>
</ref>
<ref id="B102">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Si</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Deep convolutional neural networks for detecting secondary structures in protein density maps from cryo-electron microscopy</article-title>. <source>IEEE Int. Conf. Bioinforma. Biomed. (BIBM)</source> <volume>2016</volume>, <fpage>41</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1109/BIBM.2016.7822490</pub-id>
<pub-id pub-id-type="pmid">29770260</pub-id>
</mixed-citation>
</ref>
<ref id="B103">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>ResPRE: high-accuracy protein contact prediction by coupling precision matrix with deep residual neural networks</article-title>. <source>Bioinformatics</source> <volume>35</volume>, <fpage>4647</fpage>&#x2013;<lpage>4655</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz291</pub-id>
<pub-id pub-id-type="pmid">31070716</pub-id>
</mixed-citation>
</ref>
<ref id="B104">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Jaroszewski</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Iyer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sedova</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Godzik</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>FATCAT 2.0: towards a better understanding of the structural diversity of proteins</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume>, <fpage>W60</fpage>&#x2013;<lpage>W64</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa443</pub-id>
<pub-id pub-id-type="pmid">32469061</pub-id>
</mixed-citation>
</ref>
<ref id="B105">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ni</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>An end-to-end approach for protein folding by integrating Cryo-EM maps and sequence evolution</article-title>. <source>bioRxiv</source> <volume>2023</volume>, <fpage>565403</fpage>. <pub-id pub-id-type="doi">10.1101/2023.11.02.565403</pub-id>
</mixed-citation>
</ref>
<ref id="B106">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S. Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Automated detection and <italic>de novo</italic> structure modeling of nucleic acids from cryo-EM maps</article-title>. <source>Nat. Commun.</source> <volume>15</volume>, <fpage>9367</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-024-53721-4</pub-id>
<pub-id pub-id-type="pmid">39477926</pub-id>
</mixed-citation>
</ref>
<ref id="B107">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2025a</year>). <article-title>Advancing structure modeling from cryo-EM maps with deep learning</article-title>. <source>Biochem. Soc. Trans.</source> <volume>53</volume>, <fpage>259</fpage>&#x2013;<lpage>265</lpage>. <pub-id pub-id-type="doi">10.1042/bst20240784</pub-id>
<pub-id pub-id-type="pmid">39927816</pub-id>
</mixed-citation>
</ref>
<ref id="B108">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2025b</year>). <article-title>All-atom RNA structure determination from cryo-EM maps</article-title>. <source>Nat. Biotechnol.</source> <volume>43</volume>, <fpage>97</fpage>&#x2013;<lpage>105</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-024-02149-8</pub-id>
<pub-id pub-id-type="pmid">38396075</pub-id>
</mixed-citation>
</ref>
<ref id="B109">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>T. Y.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Feature Pyramid networks for object detection</article-title>,&#x201d; in <source>2017 IEEE conference on computer vision and pattern recognition (CVPR)</source>.</mixed-citation>
</ref>
<ref id="B110">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Akin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Rao</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hie</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Evolutionary-scale prediction of atomic-level protein structure with a language model</article-title>. <source>Science</source> <volume>379</volume>, <fpage>1123</fpage>&#x2013;<lpage>1130</lpage>. <pub-id pub-id-type="doi">10.1126/science.ade2574</pub-id>
<pub-id pub-id-type="pmid">36927031</pub-id>
</mixed-citation>
</ref>
<ref id="B111">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lindert</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Staritzbichler</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>W&#xf6;tzel</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Karaka&#x15f;</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Stewart</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Meiler</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>EM-Fold: <italic>de novo</italic> Folding of &#x3b1;-Helical Proteins Guided by Intermediate-Resolution Electron Microscopy Density Maps</article-title>. <source>Structure</source> <volume>17</volume>, <fpage>990</fpage>&#x2013;<lpage>1003</lpage>. <pub-id pub-id-type="doi">10.1016/j.str.2009.06.001</pub-id>
<pub-id pub-id-type="pmid">19604479</pub-id>
</mixed-citation>
</ref>
<ref id="B112">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ling</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Transformers in Protein: a Survey</article-title>.</mixed-citation>
</ref>
<ref id="B113">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lingyu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Reisert</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Burkhardt</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>RENNSH: a novel &#x3b1;-helix identification approach for intermediate resolution electron density maps</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>9</volume>, <fpage>228</fpage>&#x2013;<lpage>239</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2011.52</pub-id>
</mixed-citation>
</ref>
<ref id="B114">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>D. C.</given-names>
</name>
<name>
<surname>Nocedal</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1989</year>). <article-title>On the limited memory BFGS method for large scale optimization</article-title>. <source>Math. Program.</source> <volume>45</volume>, <fpage>503</fpage>&#x2013;<lpage>528</lpage>. <pub-id pub-id-type="doi">10.1007/BF01589116</pub-id>
</mixed-citation>
</ref>
<ref id="B115">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Si</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Beyond current boundaries: integrating deep learning and AlphaFold for enhanced protein structure prediction from low-resolution cryo-EM maps</article-title>. <source>Comput. Biol. Chem.</source> <volume>119</volume>, <fpage>108494</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiolchem.2025.108494</pub-id>
<pub-id pub-id-type="pmid">40460619</pub-id>
</mixed-citation>
</ref>
<ref id="B116">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maddhuri Venkata Subramaniya</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Protein secondary structure detection in intermediate-resolution cryo-EM maps using deep learning</article-title>. <source>Nat. Methods</source> <volume>16</volume>, <fpage>911</fpage>&#x2013;<lpage>917</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0500-1</pub-id>
<pub-id pub-id-type="pmid">31358979</pub-id>
</mixed-citation>
</ref>
<ref id="B117">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mallet</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Rapisarda</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Minoux</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ovsjanikov</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Finding antibodies in cryo-EM maps with CrAI</article-title>. <source>Bioinformatics</source> <volume>41</volume>, <fpage>btaf157</fpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaf157</pub-id>
<pub-id pub-id-type="pmid">40203077</pub-id>
</mixed-citation>
</ref>
<ref id="B118">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marques</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Purdy</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Yeager</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>CryoEM maps are full of potential</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>58</volume>, <fpage>214</fpage>&#x2013;<lpage>223</lpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2019.04.006</pub-id>
<pub-id pub-id-type="pmid">31400843</pub-id>
</mixed-citation>
</ref>
<ref id="B119">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Matsumoto</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ishida</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Araki</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kato</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Terayama</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Okuno</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Extraction of protein dynamics information from cryo-EM maps using deep learning</article-title>. <source>Nat. Mach. Intell.</source> <volume>3</volume>, <fpage>153</fpage>&#x2013;<lpage>160</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-020-00290-y</pub-id>
</mixed-citation>
</ref>
<ref id="B120">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>McGreevy</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Teo</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Singharoy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Schulten</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Advances in the molecular dynamics flexible fitting method for cryo-EM modeling</article-title>. <source>Methods</source> <volume>100</volume>, <fpage>50</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymeth.2016.01.009</pub-id>
<pub-id pub-id-type="pmid">26804562</pub-id>
</mixed-citation>
</ref>
<ref id="B121">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>E. C.</given-names>
</name>
<name>
<surname>Pettersen</surname>
<given-names>E. F.</given-names>
</name>
<name>
<surname>Couch</surname>
<given-names>G. S.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Ferrin</surname>
<given-names>T. E.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Tools for integrated sequence-structure analysis with UCSF Chimera</article-title>. <source>BMC Bioinforma.</source> <volume>7</volume>, <fpage>339</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-7-339</pub-id>
<pub-id pub-id-type="pmid">16836757</pub-id>
</mixed-citation>
</ref>
<ref id="B122">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>E. C.</given-names>
</name>
<name>
<surname>Goddard</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Pettersen</surname>
<given-names>E. F.</given-names>
</name>
<name>
<surname>Couch</surname>
<given-names>G. S.</given-names>
</name>
<name>
<surname>Pearson</surname>
<given-names>Z. J.</given-names>
</name>
<name>
<surname>Morris</surname>
<given-names>J. H.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>UCSF ChimeraX: tools for structure building and analysis</article-title>. <source>Protein Sci.</source> <volume>32</volume>, <fpage>e4792</fpage>. <pub-id pub-id-type="doi">10.1002/pro.4792</pub-id>
<pub-id pub-id-type="pmid">37774136</pub-id>
</mixed-citation>
</ref>
<ref id="B123">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mondal</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Satler</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ramachandran</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Saltzberg</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Chemmama</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Recognizing amino acid sidechains in a medium-resolution cryo-electron density map</article-title>. <source>Protein Sci.</source> <volume>34</volume>, <fpage>e70217</fpage>. <pub-id pub-id-type="doi">10.1002/pro.70217</pub-id>
<pub-id pub-id-type="pmid">40719420</pub-id>
</mixed-citation>
</ref>
<ref id="B124">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mostosi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Schindelin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kollmannsberger</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Thorn</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Haruspex: a neural network for the automatic identification of oligonucleotides and protein secondary structure in Cryo-Electron microscopy maps</article-title>. <source>Angew. Chem. Int. Ed.</source> <volume>59</volume>, <fpage>14788</fpage>&#x2013;<lpage>14795</lpage>. <pub-id pub-id-type="doi">10.1002/anie.202000421</pub-id>
<pub-id pub-id-type="pmid">32187813</pub-id>
</mixed-citation>
</ref>
<ref id="B125">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sazzed</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Alshammari</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A tool for segmentation of secondary structures in 3D Cryo-EM density map components using deep convolutional neural networks</article-title>. <source>Front. Bioinforma.</source> <volume>1</volume>, <fpage>710119</fpage>&#x2013;<lpage>712021</lpage>. <pub-id pub-id-type="doi">10.3389/fbinf.2021.710119</pub-id>
<pub-id pub-id-type="pmid">36303800</pub-id>
</mixed-citation>
</ref>
<ref id="B126">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>M&#xfc;ller</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Attending to graph transformers</article-title>.</mixed-citation>
</ref>
<ref id="B127">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nakamura</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Fast and automated protein-DNA/RNA macromolecular complex modeling from cryo-EM maps</article-title>. <source>Briefings Bioinforma.</source> <volume>24</volume>, <fpage>bbac632</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac632</pub-id>
<pub-id pub-id-type="pmid">36682003</pub-id>
</mixed-citation>
</ref>
<ref id="B128">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nawrocki</surname>
<given-names>E. P.</given-names>
</name>
<name>
<surname>Eddy</surname>
<given-names>S. R.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Infernal 1.1: 100-fold faster RNA homology searches</article-title>. <source>Bioinformatics</source> <volume>29</volume>, <fpage>2933</fpage>&#x2013;<lpage>2935</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt509</pub-id>
<pub-id pub-id-type="pmid">24008419</pub-id>
</mixed-citation>
</ref>
<ref id="B129">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nogales</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The development of cryo-EM into a mainstream structural biology technique</article-title>. <source>Nat. Methods</source> <volume>13</volume>, <fpage>24</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.3694</pub-id>
<pub-id pub-id-type="pmid">27110629</pub-id>
</mixed-citation>
</ref>
<ref id="B130">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Paszke</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>PyTorch: an imperative style, high-performance deep learning library</article-title>,&#x201d; in <source>Proceedings of the 33rd international conference on neural information processing systems</source> (<publisher-loc>Red Hook, NY</publisher-loc>: <publisher-name>Curran Associates Inc</publisher-name>).</mixed-citation>
</ref>
<ref id="B131">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Perera</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Navard</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yilmaz</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>SegFormer3D: an efficient transformer for 3D medical image segmentation</article-title>, <fpage>4981</fpage>, <lpage>4988</lpage>. <pub-id pub-id-type="doi">10.1109/cvprw63382.2024.00503</pub-id>
</mixed-citation>
</ref>
<ref id="B132">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Perron</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>Operations research and constraint programming at Google</article-title>,&#x201d; in <source>Principles and Practice of Constraint Programming &#x2013; CP 2011</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Lee</surname>
<given-names>J.</given-names>
</name>
</person-group> (<publisher-loc>Berlin Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>).</mixed-citation>
</ref>
<ref id="B133">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Perry</surname>
<given-names>Z. R.</given-names>
</name>
<name>
<surname>Pyle</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Arena: rapid and accurate reconstruction of full atomic RNA structures from coarse-grained models</article-title>. <source>J. Mol. Biol.</source> <volume>435</volume>, <fpage>168210</fpage>. <pub-id pub-id-type="doi">10.1016/j.jmb.2023.168210</pub-id>
<pub-id pub-id-type="pmid">37479079</pub-id>
</mixed-citation>
</ref>
<ref id="B134">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Petrey</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gimpelev</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mitros</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2003</year>). <article-title>Using multiple structure alignments, fast model building, and energetic analysis in fold recognition and homology modeling</article-title>. <source>Proteins Struct. Funct. Bioinforma.</source> <volume>53</volume>, <fpage>430</fpage>&#x2013;<lpage>435</lpage>. <pub-id pub-id-type="doi">10.1002/prot.10550</pub-id>
<pub-id pub-id-type="pmid">14579332</pub-id>
</mixed-citation>
</ref>
<ref id="B135">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pettersen</surname>
<given-names>E. F.</given-names>
</name>
<name>
<surname>Goddard</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Couch</surname>
<given-names>G. S.</given-names>
</name>
<name>
<surname>Greenblatt</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>E. C.</given-names>
</name>
<etal/>
</person-group> (<year>2004</year>). <article-title>UCSF Chimera&#x2014;A visualization system for exploratory research and analysis</article-title>. <source>J. Comput. Chem.</source> <volume>25</volume>, <fpage>1605</fpage>&#x2013;<lpage>1612</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.20084</pub-id>
<pub-id pub-id-type="pmid">15264254</pub-id>
</mixed-citation>
</ref>
<ref id="B136">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pfab</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Si</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Automated threshold selection for Cryo-EM density maps</article-title>. <source>bioRxiv</source>, <fpage>657395</fpage>. <pub-id pub-id-type="doi">10.1101/657395</pub-id>
</mixed-citation>
</ref>
<ref id="B137">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pfab</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Phan</surname>
<given-names>N. M.</given-names>
</name>
<name>
<surname>Si</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>DeepTracer for fast <italic>de novo</italic> cryo-EM protein structure modeling and special studies on CoV-related complexes</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>118</volume>, <fpage>e2017525118</fpage>. <pub-id pub-id-type="doi">10.1073/pnas.2017525118</pub-id>
<pub-id pub-id-type="pmid">33361332</pub-id>
</mixed-citation>
</ref>
<ref id="B138">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pintilie</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Diagnosing and treating issues in cryo-EM map-derived models</article-title>. <source>Structure</source> <volume>31</volume>, <fpage>759</fpage>&#x2013;<lpage>761</lpage>. <pub-id pub-id-type="doi">10.1016/j.str.2023.06.009</pub-id>
<pub-id pub-id-type="pmid">37419099</pub-id>
</mixed-citation>
</ref>
<ref id="B139">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pintilie</surname>
<given-names>G. D.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Goddard</surname>
<given-names>T. D.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Gossard</surname>
<given-names>D. C.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Quantitative analysis of cryo-EM density map segmentation by watershed and scale-space filtering, and fitting of structures by alignment to regions</article-title>. <source>J. Struct. Biol.</source> <volume>170</volume>, <fpage>427</fpage>&#x2013;<lpage>438</lpage>. <pub-id pub-id-type="doi">10.1016/j.jsb.2010.03.007</pub-id>
<pub-id pub-id-type="pmid">20338243</pub-id>
</mixed-citation>
</ref>
<ref id="B140">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pintilie</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schmid</surname>
<given-names>M. F.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Measurement of atom resolvability in cryo-EM maps with Q-scores</article-title>. <source>Nat. Methods</source> <volume>17</volume>, <fpage>328</fpage>&#x2013;<lpage>334</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-020-0731-1</pub-id>
<pub-id pub-id-type="pmid">32042190</pub-id>
</mixed-citation>
</ref>
<ref id="B141">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Postic</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ghouzam</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chebrek</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gelly</surname>
<given-names>J. C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>An ambiguity principle for assigning protein structural domains</article-title>. <source>Sci. Adv.</source> <volume>3</volume>, <fpage>e1600552</fpage>. <pub-id pub-id-type="doi">10.1126/sciadv.1600552</pub-id>
<pub-id pub-id-type="pmid">28097215</pub-id>
</mixed-citation>
</ref>
<ref id="B142">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Prisant</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>V. B.</given-names>
</name>
<name>
<surname>Richardson</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Richardson</surname>
<given-names>D. C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>New tools in MolProbity validation: CaBLAM for CryoEM backbone, UnDowser to rethink &#x201c;waters,&#x201d; and NGL Viewer to recapture online 3D graphics</article-title>. <source>Protein Sci.</source> <volume>29</volume>, <fpage>315</fpage>&#x2013;<lpage>329</lpage>. <pub-id pub-id-type="doi">10.1002/pro.3786</pub-id>
<pub-id pub-id-type="pmid">31724275</pub-id>
</mixed-citation>
</ref>
<ref id="B143">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Psaraftis</surname>
<given-names>H. N.</given-names>
</name>
</person-group> (<year>1988</year>). &#x201c;<article-title>Dynamic vehicle routing problems</article-title>,&#x201d; in <source>Vehicle routing: methods and studies</source> (<publisher-loc>North-Holland</publisher-loc>), <fpage>223</fpage>&#x2013;<lpage>248</lpage>.</mixed-citation>
</ref>
<ref id="B144">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rabiner</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Juang</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>1986</year>). <article-title>An introduction to hidden Markov models</article-title>. <source>IEEE ASSP Mag.</source> <volume>3</volume>, <fpage>4</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1109/MASSP.1986.1165342</pub-id>
</mixed-citation>
</ref>
<ref id="B145">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Raghu</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Multiscale guidance of AlphaFold3 with heterogeneous cryo-EM data</article-title>. <source>ArXiv abs</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2506.04490</pub-id>
</mixed-citation>
</ref>
<ref id="B146">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ram&#xed;rez-Aportela</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Maluenda</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fonseca</surname>
<given-names>Y. C.</given-names>
</name>
<name>
<surname>Conesa</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Marabini</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Heymann</surname>
<given-names>J. B.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>FSC-Q: a CryoEM map-to-atomic model quality validation based on the local Fourier shell correlation</article-title>. <source>Nat. Commun.</source> <volume>12</volume>, <fpage>42</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-020-20295-w</pub-id>
<pub-id pub-id-type="pmid">33397925</pub-id>
</mixed-citation>
</ref>
<ref id="B147">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reggiano</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lugmayr</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Farrell</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Marlovits</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>DiMaio</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Residue-level error detection in cryoelectron microscopy models</article-title>. <source>Structure</source> <volume>31</volume>, <fpage>860</fpage>&#x2013;<lpage>869.e4</lpage>. <pub-id pub-id-type="doi">10.1016/j.str.2023.05.002</pub-id>
<pub-id pub-id-type="pmid">37253357</pub-id>
</mixed-citation>
</ref>
<ref id="B148">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Riley</surname>
<given-names>B. T.</given-names>
</name>
<name>
<surname>Wankowicz</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>de Oliveira</surname>
<given-names>S. H. P.</given-names>
</name>
<name>
<surname>van Zundert</surname>
<given-names>G. C. P.</given-names>
</name>
<name>
<surname>Hogan</surname>
<given-names>D. W.</given-names>
</name>
<name>
<surname>Fraser</surname>
<given-names>J. S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>qFit 3: protein and ligand multiconformer modeling for X-ray crystallographic and single-particle cryo-EM density maps</article-title>. <source>Protein Sci.</source> <volume>30</volume>, <fpage>270</fpage>&#x2013;<lpage>285</lpage>. <pub-id pub-id-type="doi">10.1002/pro.4001</pub-id>
<pub-id pub-id-type="pmid">33210433</pub-id>
</mixed-citation>
</ref>
<ref id="B149">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rives</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Meier</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sercu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Goyal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences</article-title>, <source>Proc. Natl. Acad. Sci. U.S.A.</source> <volume>118</volume>, <fpage>e2016239118</fpage>. <pub-id pub-id-type="doi">10.1073/pnas.2016239118</pub-id>
<pub-id pub-id-type="pmid">33876751</pub-id>
</mixed-citation>
</ref>
<ref id="B150">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Ronneberger</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>U-Net: Convolutional networks for biomedical image segmentation</article-title>,&#x201d; in <source>Medical image computing and computer-assisted intervention &#x2013; MICCAI 2015</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Navab</surname>
<given-names>N.</given-names>
</name>
</person-group> (<publisher-name>Springer International Publishing</publisher-name>).</mixed-citation>
</ref>
<ref id="B151">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Rossi</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2006</year>). <source>Handbook of constraint programming</source>. <publisher-name>Elsevier Science Inc</publisher-name>.</mixed-citation>
</ref>
<ref id="B152">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rotkiewicz</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Skolnick</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Fast procedure for reconstruction of full-atom protein models from reduced representations</article-title>. <source>J. Comput. Chem.</source> <volume>29</volume>, <fpage>1460</fpage>&#x2013;<lpage>1465</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.20906</pub-id>
<pub-id pub-id-type="pmid">18196502</pub-id>
</mixed-citation>
</ref>
<ref id="B153">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rozanov</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wolfson</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>AAnchor: CNN guided detection of anchor amino acids in high resolution cryo-EM density maps</article-title>. <source>IEEE Int. Conf. Bioinforma. Biomed. (BIBM)</source>, <fpage>88</fpage>&#x2013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1109/bibm.2018.8621288</pub-id>
</mixed-citation>
</ref>
<ref id="B154">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rozanov</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wolfson</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>SegmA: Residue Segmentation of cryo-EM density maps</article-title>. <source>IEEE Int. Conf. Bioinforma. Biomed. (BIBM)</source>, <fpage>2191</fpage>&#x2013;<lpage>2196</lpage>. <pub-id pub-id-type="doi">10.1109/bibm58861.2023.10385980</pub-id>
</mixed-citation>
</ref>
<ref id="B155">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sazzed</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Determining protein secondary structures in heterogeneous medium-resolution Cryo-EM images using CryoSSESeg</article-title>. <source>ACS Omega</source> <volume>9</volume>, <fpage>26409</fpage>&#x2013;<lpage>26416</lpage>. <pub-id pub-id-type="doi">10.1021/acsomega.4c02608</pub-id>
<pub-id pub-id-type="pmid">38911779</pub-id>
</mixed-citation>
</ref>
<ref id="B156">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schlitter</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Engels</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kr&#xfc;ger</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>1994</year>). <article-title>Targeted molecular dynamics: a new approach for searching pathways of conformational transitions</article-title>. <source>J. Mol. Graph.</source> <volume>12</volume>, <fpage>84</fpage>&#x2013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1016/0263-7855(94)80072-3</pub-id>
<pub-id pub-id-type="pmid">7918256</pub-id>
</mixed-citation>
</ref>
<ref id="B157">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>M-Walk: learning to walk over graphs using Monte Carlo tree search</article-title>,&#x201d; in <source>Proceedings of the 32nd international conference on neural information processing systems</source>. <publisher-loc>Red Hook, NY</publisher-loc>: <publisher-name>Curran Associates Inc</publisher-name>.</mixed-citation>
</ref>
<ref id="B158">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shindyalov</surname>
<given-names>I. N.</given-names>
</name>
<name>
<surname>Bourne</surname>
<given-names>P. E.</given-names>
</name>
</person-group> (<year>1998</year>). <article-title>Protein structure alignment by incremental combinatorial extension (CE) of the optimal path</article-title>. <source>Protein Eng.</source> <volume>11</volume>, <fpage>739</fpage>&#x2013;<lpage>747</lpage>. <pub-id pub-id-type="doi">10.1093/protein/11.9.739</pub-id>
<pub-id pub-id-type="pmid">9796821</pub-id>
</mixed-citation>
</ref>
<ref id="B159">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Si</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nasr</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>A machine learning approach for the identification of protein secondary structure elements from Electron cryo-microscopy density maps</article-title>. <source>Biopolymers</source> <volume>97</volume>, <fpage>698</fpage>&#x2013;<lpage>708</lpage>. <pub-id pub-id-type="doi">10.1002/bip.22063</pub-id>
<pub-id pub-id-type="pmid">22696406</pub-id>
</mixed-citation>
</ref>
<ref id="B160">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Si</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Moritz</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Pfab</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Deep learning to predict protein backbone structure from high-resolution Cryo-EM density maps</article-title>. <source>Sci. Rep.</source> <volume>10</volume>, <fpage>4282</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-60598-y</pub-id>
<pub-id pub-id-type="pmid">32152330</pub-id>
</mixed-citation>
</ref>
<ref id="B161">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Si</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Nakamura</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Firozi</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Artificial intelligence advances for <italic>de novo</italic> molecular structure modeling in cryo-electron microscopy</article-title>. <source>WIREs Comput. Mol. Sci.</source> <volume>12</volume>, <fpage>e1542</fpage>. <pub-id pub-id-type="doi">10.1002/wcms.1542</pub-id>
</mixed-citation>
</ref>
<ref id="B162">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Si</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Nakamura</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Smart <italic>de novo</italic> Macromolecular Structure Modeling from Cryo-EM Maps</article-title>. <source>J. Mol. Biol.</source> <volume>435</volume>, <fpage>167967</fpage>. <pub-id pub-id-type="doi">10.1016/j.jmb.2023.167967</pub-id>
<pub-id pub-id-type="pmid">36681181</pub-id>
</mixed-citation>
</ref>
<ref id="B163">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Skrodzki</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>The k-d tree data structure and a proof for neighborhood computation in expected logarithmic time</article-title>. <source>
<italic>ArXiv</italic> abs/1903.04936</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1903.04936</pub-id>
</mixed-citation>
</ref>
<ref id="B164">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smith</surname>
<given-names>T. F.</given-names>
</name>
<name>
<surname>Waterman</surname>
<given-names>M. S.</given-names>
</name>
</person-group> (<year>1981</year>). <article-title>Identification of common molecular subsequences</article-title>. <source>J. Mol. Biol.</source> <volume>147</volume>, <fpage>195</fpage>&#x2013;<lpage>197</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(81)90087-5</pub-id>
<pub-id pub-id-type="pmid">7265238</pub-id>
</mixed-citation>
</ref>
<ref id="B165">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>DiMaio</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R. Y. R.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Miles</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Brunette</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>High-Resolution comparative modeling with RosettaCM</article-title>. <source>Structure</source> <volume>21</volume>, <fpage>1735</fpage>&#x2013;<lpage>1742</lpage>. <pub-id pub-id-type="doi">10.1016/j.str.2013.08.005</pub-id>
<pub-id pub-id-type="pmid">24035711</pub-id>
</mixed-citation>
</ref>
<ref id="B166">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stasiewicz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mukherjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Nithin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bujnicki</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>QRNAS: software tool for refinement of nucleic acid structures</article-title>. <source>BMC Struct. Biol.</source> <volume>19</volume>, <fpage>5</fpage>. <pub-id pub-id-type="doi">10.1186/s12900-019-0103-1</pub-id>
<pub-id pub-id-type="pmid">30898165</pub-id>
</mixed-citation>
</ref>
<ref id="B167">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Storn</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Price</surname>
<given-names>K. V.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Differential evolution &#x2013; a simple and efficient heuristic for global optimization over continuous spaces</article-title>. <source>J. Glob. Optim.</source> <volume>11</volume>, <fpage>341</fpage>&#x2013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1023/a:1008202821328</pub-id>
</mixed-citation>
</ref>
<ref id="B168">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Amunts</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Improved model building for cryo-EM maps using local attention and 3D rotary position embedding</article-title>. <source>bioRxiv</source> <volume>2024</volume>, <fpage>2013</fpage>. <pub-id pub-id-type="doi">10.1101/2024.11.13.623164</pub-id>
</mixed-citation>
</ref>
<ref id="B169">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Sutskever</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Sequence to sequence learning with neural networks</article-title>,&#x201d; in <source>Proceedings of the 28th international conference on neural information processing systems - volume 2</source>. <publisher-name>MIT Press</publisher-name>.</mixed-citation>
</ref>
<ref id="B170">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>
<italic>De novo</italic> main-chain modeling for EM maps using MAINMAST</article-title>. <source>Nat. Commun.</source> <volume>9</volume>, <fpage>1618</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-04053-7</pub-id>
<pub-id pub-id-type="pmid">29691408</pub-id>
</mixed-citation>
</ref>
<ref id="B171">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Maddhuri Venkata Subramaniya</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Tesmer</surname>
<given-names>J. J. G.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Residue-wise local quality estimation for protein models from cryo-EM maps</article-title>. <source>Nat. Methods</source> <volume>19</volume>, <fpage>1116</fpage>&#x2013;<lpage>1125</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-022-01574-4</pub-id>
<pub-id pub-id-type="pmid">35953671</pub-id>
</mixed-citation>
</ref>
<ref id="B172">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Prasad</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Nakamura</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>DeepMainmast: integrated protocol of protein structure modeling for cryo-EM with deep learning and structure prediction</article-title>. <source>Nat. Methods</source> <volume>21</volume>, <fpage>122</fpage>&#x2013;<lpage>131</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-023-02099-0</pub-id>
<pub-id pub-id-type="pmid">38066344</pub-id>
</mixed-citation>
</ref>
<ref id="B173">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>DMcloud: macromolecular structure modeling with local structure fitting for medium to low resolution Cryo-EM maps</article-title>. <source>Microsc. Microanal.</source> <volume>31</volume>, <fpage>ozaf048.1083</fpage>&#x2013;<lpage>1083</lpage>. <pub-id pub-id-type="doi">10.1093/mam/ozaf048.1083</pub-id>
</mixed-citation>
</ref>
<ref id="B174">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terwilliger</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Adams</surname>
<given-names>P. D.</given-names>
</name>
<name>
<surname>Afonine</surname>
<given-names>P. V.</given-names>
</name>
<name>
<surname>Sobolev</surname>
<given-names>O. V.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A fully automatic method yielding initial models from high-resolution cryo-electron microscopy maps</article-title>. <source>Nat. Methods</source> <volume>15</volume>, <fpage>905</fpage>&#x2013;<lpage>908</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-018-0173-1</pub-id>
<pub-id pub-id-type="pmid">30377346</pub-id>
</mixed-citation>
</ref>
<ref id="B175">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trabuco</surname>
<given-names>L. G.</given-names>
</name>
<name>
<surname>Villa</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Schreiner</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Harrison</surname>
<given-names>C. B.</given-names>
</name>
<name>
<surname>Schulten</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Molecular dynamics flexible fitting: a practical guide to combine cryo-electron microscopy and X-ray crystallography</article-title>. <source>Methods</source> <volume>49</volume>, <fpage>174</fpage>&#x2013;<lpage>180</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymeth.2009.04.005</pub-id>
<pub-id pub-id-type="pmid">19398010</pub-id>
</mixed-citation>
</ref>
<ref id="B176">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Vaswani</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Attention is all you need</article-title>,&#x201d; in <source>Proceedings of the 31st international conference on neural information processing systems</source>. <publisher-loc>Red Hook, NY</publisher-loc>: <publisher-name>Curran Associates Inc</publisher-name>.</mixed-citation>
</ref>
<ref id="B177">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Veli&#x10d;kovi&#x107;</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Everything is connected: graph neural networks</article-title>. <source>Curr. Opin. Struct. Biol.</source> <volume>79</volume>, <fpage>102538</fpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2023.102538</pub-id>
<pub-id pub-id-type="pmid">36764042</pub-id>
</mixed-citation>
</ref>
<ref id="B178">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>P. B.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>On the interpretation of electron microscopic maps of biological macromolecules</article-title>. <source>Protein Sci.</source> <volume>26</volume>, <fpage>122</fpage>&#x2013;<lpage>129</lpage>. <pub-id pub-id-type="doi">10.1002/pro.3060</pub-id>
<pub-id pub-id-type="pmid">27706888</pub-id>
</mixed-citation>
</ref>
<ref id="B179">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Alnabati</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Aderinwale</surname>
<given-names>T. W.</given-names>
</name>
<name>
<surname>Maddhuri Venkata Subramaniya</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Detecting protein and DNA/RNA structures in cryo-EM maps of intermediate resolution using deep learning</article-title>. <source>Nat. Commun.</source> <volume>12</volume>, <fpage>2302</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-021-22577-3</pub-id>
<pub-id pub-id-type="pmid">33863902</pub-id>
</mixed-citation>
</ref>
<ref id="B180">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>CryoREAD: <italic>de novo</italic> structure modeling for nucleic acids in cryo-EM maps using deep learning</article-title>. <source>Nat. Methods</source> <volume>20</volume>, <fpage>1739</fpage>&#x2013;<lpage>1747</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-023-02032-5</pub-id>
<pub-id pub-id-type="pmid">37783885</pub-id>
</mixed-citation>
</ref>
<ref id="B181">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Taluja</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>DiffModeler: large macromolecular structure modeling for cryo-EM maps using a diffusion model</article-title>. <source>Nat. Methods</source> <volume>21</volume>, <fpage>2307</fpage>&#x2013;<lpage>2317</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-024-02479-0</pub-id>
<pub-id pub-id-type="pmid">39433880</pub-id>
</mixed-citation>
</ref>
<ref id="B182">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S. Z.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>End-to-end cryo-EM complex structure determination with high accuracy and ultra-fast speed</article-title>. <source>Nat. Mach. Intell.</source> <volume>7</volume>, <fpage>1091</fpage>&#x2013;<lpage>1103</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-025-01056-0</pub-id>
</mixed-citation>
</ref>
<ref id="B183">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wankowicz</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Ravikumar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Riley</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Raju</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hogan</surname>
<given-names>D. W.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Automated multiconformer model building for X-ray crystallography and cryo-EM</article-title>. <source>eLife</source> <volume>12</volume>, <fpage>RP90606</fpage>. <pub-id pub-id-type="doi">10.7554/eLife.90606</pub-id>
<pub-id pub-id-type="pmid">38904665</pub-id>
</mixed-citation>
</ref>
<ref id="B184">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S. Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Topology-independent and global protein structure alignment through an FFT-based algorithm</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>478</fpage>&#x2013;<lpage>486</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz609</pub-id>
<pub-id pub-id-type="pmid">31384919</pub-id>
</mixed-citation>
</ref>
<ref id="B185">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Wohlwend</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <source>Boltz-1 Democratizing Biomolecular Interaction Modeling</source>. <publisher-loc>Harbor, NY</publisher-loc>: <publisher-name>bioRxiv</publisher-name>, <fpage>2011.2019.624167</fpage>. <pub-id pub-id-type="doi">10.1101/2024.11.19.624167</pub-id>
</mixed-citation>
</ref>
<ref id="B186">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wriggers</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Using Situs for the integration of multi-resolution structures</article-title>. <source>Biophys. Rev.</source> <volume>2</volume>, <fpage>21</fpage>&#x2013;<lpage>27</lpage>. <pub-id pub-id-type="doi">10.1007/s12551-009-0026-3</pub-id>
<pub-id pub-id-type="pmid">20174447</pub-id>
</mixed-citation>
</ref>
<ref id="B187">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Honig</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Extending the accuracy limits of prediction for side-chain conformations</article-title>. <source>J. Mol. Biol.</source> <volume>311</volume>, <fpage>421</fpage>&#x2013;<lpage>430</lpage>. <pub-id pub-id-type="doi">10.1006/jmbi.2001.4865</pub-id>
<pub-id pub-id-type="pmid">11478870</pub-id>
</mixed-citation>
</ref>
<ref id="B188">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q. C.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A2-Net: molecular structure estimation from Cryo-EM density volumes</article-title>. <source>Proc. AAAI Conf. Artif. Intell.</source> <volume>33</volume>, <fpage>1230</fpage>&#x2013;<lpage>1237</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v33i01.33011230</pub-id>
</mixed-citation>
</ref>
<ref id="B189">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Roy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Poisson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>The I-TASSER Suite: protein structure and function prediction</article-title>. <source>Nat. Methods</source> <volume>12</volume>, <fpage>7</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.3213</pub-id>
<pub-id pub-id-type="pmid">25549265</pub-id>
</mixed-citation>
</ref>
<ref id="B190">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zardecki</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dutta</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Goodsell</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Voigt</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Burley</surname>
<given-names>S. K.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>RCSB protein data bank: a resource for chemical, biochemical, and structural explorations of large and small biomolecules</article-title>. <source>J. Chem. Educ.</source> <volume>93</volume>, <fpage>569</fpage>&#x2013;<lpage>575</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jchemed.5b00404</pub-id>
</mixed-citation>
</ref>
<ref id="B191">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Skolnick</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Scoring function for automated assessment of protein structure template quality</article-title>. <source>Proteins Struct. Funct. Bioinforma.</source> <volume>57</volume>, <fpage>702</fpage>&#x2013;<lpage>710</lpage>. <pub-id pub-id-type="doi">10.1002/prot.20264</pub-id>
<pub-id pub-id-type="pmid">15476259</pub-id>
</mixed-citation>
</ref>
<ref id="B192">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Atomic-Level protein structure refinement using fragment-guided molecular dynamics conformation sampling</article-title>. <source>Structure</source> <volume>19</volume>, <fpage>1784</fpage>&#x2013;<lpage>1795</lpage>. <pub-id pub-id-type="doi">10.1016/j.str.2011.09.022</pub-id>
<pub-id pub-id-type="pmid">22153501</pub-id>
</mixed-citation>
</ref>
<ref id="B193">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Freddolino</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>CR-I-TASSER: assemble protein structures from cryo-EM density maps using deep convolutional neural networks</article-title>. <source>Nat. Methods</source> <volume>19</volume>, <fpage>195</fpage>&#x2013;<lpage>204</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-021-01389-9</pub-id>
<pub-id pub-id-type="pmid">35132244</pub-id>
</mixed-citation>
</ref>
<ref id="B194">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Practical blind image denoising <italic>via</italic> Swin-Conv-UNet and data synthesis</article-title>. <source>Mach. Intell. Res.</source> <volume>20</volume>, <fpage>822</fpage>&#x2013;<lpage>836</lpage>. <pub-id pub-id-type="doi">10.1007/s11633-023-1466-0</pub-id>
</mixed-citation>
</ref>
<ref id="B195">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Freddolino</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>DEMO-EM2: assembling protein complex structures from cryo-EM maps through intertwined chain and domain fitting</article-title>. <source>Briefings Bioinforma.</source> <volume>25</volume>, <fpage>bbae113</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbae113</pub-id>
<pub-id pub-id-type="pmid">38517699</pub-id>
</mixed-citation>
</ref>
<ref id="B196">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Condon</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dao Duc</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2025a</year>). <article-title>A comprehensive survey and benchmark of deep learning-based methods for atomic model building from cryo-electron microscopy density maps</article-title>. <source>Briefings Bioinforma.</source> <volume>26</volume>, <fpage>bbaf322</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbaf322</pub-id>
<pub-id pub-id-type="pmid">40645653</pub-id>
</mixed-citation>
</ref>
<ref id="B197">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2025b</year>). <article-title>DEMO-EMol: modeling protein-nucleic acid complex structures from cryo-EM maps by coupling chain assembly with map segmentation</article-title>. <source>Nucleic Acids Res.</source> <volume>53</volume>, <fpage>W228</fpage>&#x2013;<lpage>W237</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaf416</pub-id>
<pub-id pub-id-type="pmid">40366028</pub-id>
</mixed-citation>
</ref>
<ref id="B198">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wuyun</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Pearce</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>LOMETS2: improved meta-threading server for fold-recognition and structure-based function annotation for distant-homology proteins</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume>, <fpage>W429</fpage>&#x2013;<lpage>W436</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz384</pub-id>
<pub-id pub-id-type="pmid">31081035</pub-id>
</mixed-citation>
</ref>
<ref id="B199">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wuyun</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Pearce</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>FUpred: detecting protein domains through deep-learning-based contact map prediction</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>3749</fpage>&#x2013;<lpage>3757</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa217</pub-id>
<pub-id pub-id-type="pmid">32227201</pub-id>
</mixed-citation>
</ref>
<ref id="B200">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wuyun</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Deep-learning-based single-domain and multidomain protein structure prediction with D-I-TASSER</article-title>. <source>Nat. Biotechnol.</source> <pub-id pub-id-type="doi">10.1038/s41587-025-02654-4</pub-id>
<pub-id pub-id-type="pmid">40410405</pub-id>
</mixed-citation>
</ref>
<ref id="B201">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>X. G.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>C. X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Underestimation-Assisted global-local cooperative differential evolution and the application to protein structure prediction</article-title>. <source>IEEE Trans. Evol. Comput.</source> <volume>24</volume>, <fpage>536</fpage>&#x2013;<lpage>550</lpage>. <pub-id pub-id-type="doi">10.1109/tevc.2019.2938531</pub-id>
<pub-id pub-id-type="pmid">33603321</pub-id>
</mixed-citation>
</ref>
<ref id="B202">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Progressive assembly of multi-domain protein structures from cryo-EM density maps</article-title>. <source>Nat. Comput. Sci.</source> <volume>2</volume>, <fpage>265</fpage>&#x2013;<lpage>275</lpage>. <pub-id pub-id-type="doi">10.1038/s43588-022-00232-1</pub-id>
<pub-id pub-id-type="pmid">35844960</pub-id>
</mixed-citation>
</ref>
<ref id="B203">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Terashi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Farheen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Nakamura</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kihara</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>AI-based quality assessment methods for protein structure models from cryo-EM</article-title>. <source>Curr. Res. Struct. Biol.</source> <volume>9</volume>, <fpage>100164</fpage>. <pub-id pub-id-type="doi">10.1016/j.crstbi.2025.100164</pub-id>
<pub-id pub-id-type="pmid">39996138</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>