<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1268899</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2023.1268899</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Bioinformatics</subject>
<subj-group>
<subject>Technology and Code</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Blob-B-Gone: a lightweight framework for removing blob artifacts from 2D/3D MINFLUX single-particle tracking data</article-title>
<alt-title alt-title-type="left-running-head">Vogler et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2023.1268899">10.3389/fbinf.2023.1268899</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Vogler</surname>
<given-names>Bela T. L.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2392915/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Reina</surname>
<given-names>Francesco</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Eggeling</surname>
<given-names>Christian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Leibniz Institute of Photonic Technology e.V., Member of the Leibniz Centre for Photonics in Infection Research (LPI)</institution>, <addr-line>Jena</addr-line>, <country>Germany</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Institute of Applied Optics and Biophysics, Faculty of Physics and Astronomy, Friedrich Schiller University Jena</institution>, <addr-line>Jena</addr-line>, <country>Germany</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Jena Center for Soft Matter, Friedrich Schiller University Jena</institution>, <addr-line>Jena</addr-line>, <country>Germany</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Abbe Center of Photonics, Friedrich Schiller University Jena</institution>, <addr-line>Jena</addr-line>, <country>Germany</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/240852/overview">Thomas Pengo</ext-link>, University of Minnesota Twin Cities, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/122116/overview">Paolo Bianchini</ext-link>, Italian Institute of Technology (IIT), Italy</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2423887/overview">Jacqueline Leung</ext-link>, National Institute of Allergy and Infectious Diseases (NIH), United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Bela T. L. Vogler, <email>bela.vogler@uni-jena.de</email>
</corresp>
<fn fn-type="other" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>ORCID:Bela T. L. Vogler, <ext-link ext-link-type="uri" xlink:href="http://orcid.org/0000-0002-5598-5738">orcid.org/0000-0002-5598-5738</ext-link>; Francesco Reina, <ext-link ext-link-type="uri" xlink:href="http://orcid.org/0000-0001-6752-9089">orcid.org/0000-0001-6752-9089</ext-link>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>11</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>3</volume>
<elocation-id>1268899</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>07</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>11</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Vogler, Reina and Eggeling.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Vogler, Reina and Eggeling</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>In this study, we introduce Blob-B-Gone, a lightweight framework to computationally differentiate and eventually remove dense isotropic localization accumulations (blobs) caused by artifactually immobilized particles in MINFLUX single-particle tracking (SPT) measurements. This approach uses purely geometrical features extracted from MINFLUX-detected single-particle trajectories, which are treated as point clouds of localizations. Employing <italic>k-means&#x2b;&#x2b;</italic> clustering, we perform single-shot separation of the feature space to rapidly extract blobs from the dataset without the need for training. We automatically annotate the resulting sub-sets and, finally, evaluate our results by means of principal component analysis (PCA), highlighting a clear separation in the feature space. We demonstrate our approach using two- and three-dimensional simulations of freely diffusing particles and blob artifacts based on parameters extracted from hand-labeled MINFLUX tracking data of fixed 23-nm bead samples and two-dimensional diffusing quantum dots on model lipid membranes. Applying Blob-B-Gone, we achieve a clear distinction between blob-like and other trajectories, represented in F1 scores of 0.998 (2D) and 1.0 (3D) as well as 0.995 (balanced) and 0.994 (imbalanced). This framework can be straightforwardly applied to similar situations, where discerning between blob and elongated time traces is desirable. Given a number of localizations sufficient to express geometric features, the method can operate on any generic point clouds presented to it, regardless of its origin.</p>
</abstract>
<kwd-group>
<kwd>artifact removal</kwd>
<kwd>MINFLUX</kwd>
<kwd>single-particle tracking</kwd>
<kwd>clustering</kwd>
<kwd>annotation</kwd>
<kwd>point clouds</kwd>
<kwd>geometry</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational BioImaging</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>When performing single-particle tracking (SPT) experiments using optical microscopy, the vast majority of methodologies require some form of labeling to enable single-molecule detection, be it fluorescent tags or some form of highly scattering tag for scattering-based detection (<xref ref-type="bibr" rid="B10">Manzo and Garcia-Parajo, 2015</xref>). In a concrete experimental setting, the labeling procedure inevitably leads to several side effects, such as unbound tags or cross-linked reporters being present as immobile objects in the field of view. These artifacts may, in turn, lead to large accumulations of localizations in small regions, hereinafter defined as blobs, which may disturb the final detection. This is especially unwanted in cases where the diffusing species of interest undergoes transient confinements or trapping, rendering the detection of these behaviors especially complex.</p>
<p>Recently developed MINFLUX microscopy (<xref ref-type="bibr" rid="B2">Balzarotti et al., 2017</xref>; <xref ref-type="bibr" rid="B5">Gwosch et al., 2020</xref>; <xref ref-type="bibr" rid="B16">Schmidt et al., 2021</xref>) has shown great potential for high-throughput single-particle tracking (SPT) in two- and three-dimensions of single fluorescently tagged molecules. Due to the nature of its implementation, especially concerning its commercial version, a MINFLUX microscope detects all fluorescent reporters in a pre-defined region of interest (ROI), producing an array of coordinates and corresponding time stamps. Thus, immobile markers have a higher chance of being tracked in MINFLUX multiple times compared to the freely moving particles, which diffuse in and out of the ROI. In the case of SPT, it is preferable to use reporters that may be tracked for extended periods of time, such as metallic core quantum dots or, especially, photostable fluorescent dyes. Together with the core concept of MINFLUX of producing localization with comparatively few photons, the presence of immobile particle artefacts may be accentuated.</p>
<p>Due to the single-digit nanometer resolution of MINFLUX microscopy, the size of markers is a non-negligible factor in MINFLUX data. In general, we observe that immobile particles appear as either circular or spherical isotopically distributed point clouds with their radius proportional to the size of the marker. In the following work, we will refer to these spherical artifacts as blobs. These blobs need to be removed from the dataset before any analysis of particle motion can take place, as they can drastically influence the analysis of specific cases of particle diffusion, such as those characterized by transient confinements or &#x201c;hopping&#x201d; behavior (<xref ref-type="bibr" rid="B9">Kusumi et al., 2005</xref>; <xref ref-type="bibr" rid="B6">Honigmann et al., 2013</xref>). Classically, sorting is done by hand or by means of conventional statistical diffusion analysis and outlier reduction. This, however, becomes significantly more costly in computation time and power when applied to large quantities of long trajectories produced by high-throughput techniques such as MINFLUX. Recent AI-enabled tools for classification and clustering of particle trajectories are able to deliver promising results but require heavy computational power, large databases for training, and are specific in their application (<xref ref-type="bibr" rid="B13">Mu&#xf1;oz-Gil et al., 2020</xref>).</p>
<p>We propose a lightweight solution to this issue that is based on point cloud geometry and is able to rapidly identify and sort out all blob-like particle trajectories in two- and three-dimensional SPT data. Though this application was designed with MINFLUX in mind, it can, in essence, be transferred and applied to any point cloud-based technique, given that one sub-population of data exhibits blob-like behavior.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>2 Methodology</title>
<p>The following section will introduce and describe the methods used to remove blobs from point cloud datasets. First, we investigate deliberately created MINFLUX SPT blobs to understand their geometry. Following that, we describe how we simulated and diversified diffusion and blob datasets in two and three dimensions to attain more rigid test samples. We use untrained single-shot <italic>k-means clustering</italic> (<xref ref-type="bibr" rid="B4">David Arthur, 2007</xref>) based on five designed classical geometrical features, which are calculated for each set independently. Lastly, we give a brief explanation of why we chose them and how they are computed.</p>
<sec id="s2-1">
<title>2.1 MINFLUX blob artifacts</title>
<p>A single immobile bead sample was used to collect examples of blob-like structures using MINFLUX microscopy. This is a GATTA-Beads &#x201c;R&#x201d; sample (<italic>GATTAquant</italic> GmbH), purchased as a pre-mounted standard microscope slide from the producer. The beads have a nominal diameter of 23&#xa0;nm (verified by STED microscopy) and are filled with ATTO 647N dye.</p>
<p>The beads were imaged using a commercial MINFLUX setup (<italic>abberior</italic> GmbH), which is based on an iterative localization approach (<xref ref-type="bibr" rid="B16">Schmidt et al., 2021</xref>). The setup used for the measurements reported herein is comprised of a confocal and MINFLUX illumination/detection unit attached to an Olympus IX83 microscope body. The illumination and detection objective is a &#xd7;100 oil immersion objective lens (UPL SAPO100XO/1.4, Olympus). For our MINFLUX measurements, we used a 642&#xa0;nm excitation line, and the microscope detects the fluorescence with two avalanche photodiodes with detection ranges of 650&#x2013;685&#xa0;nm and 685&#x2013;760&#xa0;nm to estimate the localization of the fluorescence emitters in two dimensions (for the experiments contained herein). Photon detection takes place in a confocal microscopy fashion through a pinhole size corresponding to 1.0 AU. The localizations are derived from the sum of the photons from the two samples. Hardware control is provided using a version of <italic>Imspector</italic> software (<italic>abberior</italic> GmbH) that supports MINFLUX detection.</p>
<p>The <italic>abberior</italic> MINFLUX localizes particles with an iterative scanning approach with a pre-defined sequence of iterations (<xref ref-type="bibr" rid="B5">Gwosch et al., 2020</xref>; <xref ref-type="bibr" rid="B16">Schmidt et al., 2021</xref>). In brief, the microscope initially detects a fluorescence signal in an area by scanning a donut-shaped beam in a certain number of positions in an orbit with a radius L in various positions in a small field of view. Subsequently, the microscope &#x201c;closes in&#x201d; on the fluorescence signal by reducing the radius L and refining the position estimation. Each successive iteration also requires a different number of photons to produce localizations and can have a different dwell time and excitation laser power, as reported in the parameter table. When the microscope reaches the last iteration of the sequence, the system is locked on the latest detected particle and continues localizing it with the same parameters as long as photons are detected or the particle is detected within the scanning orbit [through the center frequency ratio (CFR) metric (<xref ref-type="bibr" rid="B16">Schmidt et al., 2021</xref>)].</p>
<p>In the case of a diffusing fluorescent molecule, the position update serves as a direct tracking method, delivering particle trajectories out of the microscope without additional particle tracking steps, as it is more usual for SPT through other microscopy techniques.</p>
<p>Essential parameters for the scanning sequences used, which are optimized for SPT, are listed in <xref ref-type="table" rid="T1">Tables 1</xref>&#x2013;<xref ref-type="table" rid="T3">3</xref> for immobile two- and three-dimensional as well as two-dimensional mobile tracking, respectively. We need to highlight that another variable that changes between pattern iterations is the excitation laser power. We express this by listing a laser power multiplier amongst the parameters, which refers to a reference excitation power of 1.78&#xa0;&#x3bc;W at the sample plane.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>MINFLUX scanning parameters for 2D tracking of immobile particles.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">2D tracking</th>
<th align="left">First iteration</th>
<th align="left">Second iteration</th>
<th align="left">Third iteration</th>
<th align="left">Fourth iteration</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">L (nm)</td>
<td align="left">284</td>
<td align="left">302</td>
<td align="left">150</td>
<td align="left">100</td>
</tr>
<tr>
<td align="left">Pattern shape</td>
<td align="left">Hexagon</td>
<td align="left">Hexagon</td>
<td align="left">Hexagon</td>
<td align="left">Hexagon</td>
</tr>
<tr>
<td align="left">Collected photons (counts)</td>
<td align="left">40</td>
<td align="left">20</td>
<td align="left">20</td>
<td align="left">20</td>
</tr>
<tr>
<td align="left">Laser power multiplier (times)</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">Pattern dwell time (&#xb5;s)</td>
<td align="left">100</td>
<td align="left">100</td>
<td align="left">100</td>
<td align="left">100</td>
</tr>
<tr>
<td align="left">Pattern repeat (times)</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">Center frequency ratio (CFR)</td>
<td align="left">&#x2212;1.0</td>
<td align="left">&#x2212;1.0</td>
<td align="left">0.8</td>
<td align="left">&#x2212;1</td>
</tr>
<tr>
<td align="left">Background threshold (kHz)</td>
<td align="left">15</td>
<td align="left">30</td>
<td align="left">30</td>
<td align="left">50</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>MINFLUX scanning parameters for 3D tracking of immobile particles.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">3D tracking</th>
<th align="left">First iteration</th>
<th align="left">Second iteration</th>
<th align="left">Third iteration</th>
<th align="left">Fourth iteration</th>
<th align="left">Fifth iteration</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">L (nm)</td>
<td align="left">285</td>
<td align="left">1,440</td>
<td align="left">285</td>
<td align="left">150</td>
<td align="left">100</td>
</tr>
<tr>
<td align="left">Pattern shape</td>
<td align="left">Hexagonal (lateral localization)</td>
<td align="left">Z line shape (axial localization)</td>
<td align="left">Octahedron</td>
<td align="left">Octahedron</td>
<td align="left">Octahedron</td>
</tr>
<tr>
<td align="left">Collected photons (counts)</td>
<td align="left">40</td>
<td align="left">300</td>
<td align="left">40</td>
<td align="left">30</td>
<td align="left">30</td>
</tr>
<tr>
<td align="left">Laser power multiplier</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">2</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">Pattern dwell time (&#xb5;s)</td>
<td align="left">500</td>
<td align="left">2000</td>
<td align="left">200</td>
<td align="left">200</td>
<td align="left">200</td>
</tr>
<tr>
<td align="left">Pattern repeat (times)</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">Center frequency ratio (CFR)</td>
<td align="left">&#x2212;1</td>
<td align="left">&#x2212;1</td>
<td align="left">&#x2212;1</td>
<td align="left">0.9</td>
<td align="left">&#x2212;1</td>
</tr>
<tr>
<td align="left">Background threshold (kHz)</td>
<td align="left">30</td>
<td align="left">30</td>
<td align="left">35</td>
<td align="left">40</td>
<td align="left">60</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>MINFLUX scanning parameters for 2D tracking of mobile particles.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">2D tracking</th>
<th align="left">First iteration</th>
<th align="left">Second iteration</th>
<th align="left">Third iteration</th>
<th align="left">Fourth iteration</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">L (nm)</td>
<td align="left">282</td>
<td align="left">300</td>
<td align="left">150</td>
<td align="left">100</td>
</tr>
<tr>
<td align="left">Pattern shape</td>
<td align="left">Hexagon</td>
<td align="left">Hexagon</td>
<td align="left">Hexagon</td>
<td align="left">Hexagon</td>
</tr>
<tr>
<td align="left">Collected photons (counts)</td>
<td align="left">40</td>
<td align="left">20</td>
<td align="left">20</td>
<td align="left">20</td>
</tr>
<tr>
<td align="left">Laser power multiplier (times)</td>
<td align="left">6.8</td>
<td align="left">6.8</td>
<td align="left">13.44</td>
<td align="left">19.86</td>
</tr>
<tr>
<td align="left">Pattern dwell time (&#xb5;s)</td>
<td align="left">100</td>
<td align="left">100</td>
<td align="left">100</td>
<td align="left">100</td>
</tr>
<tr>
<td align="left">Pattern repeat (times)</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">Center frequency ratio (CFR)</td>
<td align="left">&#x2212;1.0</td>
<td align="left">&#x2212;1.0</td>
<td align="left">0.8</td>
<td align="left">&#x2212;1</td>
</tr>
<tr>
<td align="left">Background threshold (kHz)</td>
<td align="left">15</td>
<td align="left">30</td>
<td align="left">30</td>
<td align="left">50</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s2-1-1">
<title>2.1.1 Reconstructing blobs</title>
<p>Using MINFLUX tracking data of immobile fluorescent beads as a base model, we attempt to empirically recreate blobs <italic>in silico</italic> as the backbone for later simulations. Initially, we inspect the spread of localizations of an arbitrarily chosen MINFLUX blob using a two-dimensional histogram (<xref ref-type="fig" rid="F1">Figure 1A</xref> central panel). We further show the respective distributions along X and Y, which reveals a clear center-symmetrical Gaussian shape (<xref ref-type="fig" rid="F1">Figure 1A</xref> side panels).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Two-dimensional histogram of 239,041 localizations <bold>(A)</bold> of an immobile <italic>GATTAquant</italic> 23-nm bead, acquired using MINFLUX single-particle tracking over 36&#xa0;s. Three white circles are shown additionally for reference. The X-axis profile is shown as a 1D histogram in the upper panel, and the Y-axis profile is in the right-side panel. A common full red line in both graphs marks the cumulative histogram, i.e., the total count of localizations until the respective coordinate. The dotted red line highlights 50% of all localizations. The extracted standard deviations in <italic>X</italic> and <italic>Y</italic> directions for the entire dataset is shown in <bold>(B)</bold>. Crosses mark the mean value of X/Y standard deviation. A gray box highlights the dataset used in <bold>(A)</bold>.</p>
</caption>
<graphic xlink:href="fbinf-03-1268899-g001.tif"/>
</fig>
<p>To understand the expanse of the localization dispersion, we extract standard deviations along all axes for every dataset taken and show them grouped by sample in <xref ref-type="fig" rid="F1">Figure 1B</xref>. It is immediately apparent that they are close but not identical, indicating a slightly elliptical shape, which we have to consider during simulation.</p>
<p>We further note that approximately 50% of localizations are found within a radius of 6&#xa0;nm, while the overwhelming majority are located within the bounds of 12&#xa0;nm from the center. Following these observations, we generate synthetic blobs as multivariate two- or three-dimensional normal distributions (<xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>).</p>
<p>While we can arbitrarily choose mean values to spawn the blobs across a sandbox, we need to give a covariance matrix which describes the distributions&#x2019; extent in space. As we observe the blobs to be concentric and isotropic, we will simplify the covariance matrix across all simulations to be diagonalized, meaning that all axes are independent of one another. The entries of said diagonal matrices are based on the standard deviations extracted from observation (<xref ref-type="sec" rid="s10">Supplementary Figure S2</xref>).</p>
</sec>
<sec id="s2-1-2">
<title>2.1.2 MINFLUX SPT data</title>
<p>After verifying the effectiveness of the proposed algorithm when applied to ground truth simulated data, we additionally acquired a set of MINFLUX SPT data to serve as our reference set when applying the artifact removal to real-world conditions.</p>
<p>To this end, we prepared giant unilamellar vesicles (GUVs) through electroformation using a solution of POPC:Chol 1:1 with DSPE-PEG20k-Biotin <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0.01</mml:mn>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> and DOPE Atto 488 <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0.01</mml:mn>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, similar to the process of <xref ref-type="bibr" rid="B12">M&#xe9;l&#xe9;ard et al. (2009</xref>). We then plasma-cleaned a <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mrow>
<mml:mn>25</mml:mn>
<mml:mo>&#x2212;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> coverslip to rapture the GUVs and create GUV patches. We used <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of phosphate-buffered saline (PBS, <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mn>137</mml:mn>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> NaCl, <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> phosphate, and <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mn>2.7</mml:mn>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> KCl) to keep the supported lipid bilayer (SLB) hydrated. Finally, we labeled the biotinylated lipid in the SLB with <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> of Qdot 655 streptavidin conjugate (<inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> concentration, Invitrogen by Thermo Fisher Scientific) to perform SPT.</p>
<p>Multiple two-dimensional datasets have been taken on the same sample at different spots to increase variability and statistical rigidity. In the end, all sub-sets were pooled together, and any track or blob with less than 500 localizations was discarded to eradicate hardly analyzable trace fragments. Thus, both the artifact and free set range from &#x223c;500 to &#x223c;5,000 localizations.</p>
<p>Consecutively, all trajectories have been hand-labeled to create a reference dataset to benchmark the method. The set includes 107 blobs and 528 mobile tracks, which indicates on average the presence of approximately 20% artifacts per dataset.</p>
</sec>
</sec>
<sec id="s2-2">
<title>2.2 Simulation</title>
<sec id="s2-2-1">
<title>2.2.1 Simulated ground truth datasets</title>
<p>As a ground truth dataset to validate our blob identification algorithm, we generated simulated datasets of blobs and diffusing particles. To represent particle diffusion, we simulated 250 traces of a variable number of localizations (between 400 and 600), with a time interval between localizations of <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>500</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> between steps. To introduce a degree of variability in the dataset, the diffusion coefficient of each simulated trajectory was randomly drawn from a uniform distribution of diffusion coefficients in the range of <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="" separators="|">
<mml:mrow>
<mml:mn>0.1</mml:mn>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>/</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mi>&#x3bc;</mml:mi>
<mml:msup>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mrow>
<mml:mfenced open="" close="]" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mo>/</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Each particle started at a position randomly chosen within a sandbox (<inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> x <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>). We updated each localization (<inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) stepwise and simultaneously for all particles following <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> with <inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msqrt>
<mml:mrow>
<mml:mfenced open="|" close="" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf19">
<mml:math id="m19">
<mml:mrow>
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0,2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. In doing so, we ensured being as close as possible to thermodynamic movement while maintaining idealized simplicity.</p>
<p>Blobs are created by drawing 400 to 600 localizations from a set of 250 two-dimensional multivariate normal distributions, where the covariance matrices were diagonalized, and the respective entries are again drawn from 1D normal distributions based on the mean and standard deviation of variances extracted from the MINFLUX tracking data of immobilized fluorescent beads. We did so to ensure that our simulation represents the variability of experimental data. Artifacts are randomly spawned in an area smaller than the area initially explored by synthetic free particles to avoid edge effects.</p>
<p>Ground truth datasets for the three dimensions are similarly generated, with the added degree of freedom of the third dimension.</p>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Geometrical features</title>
<p>From our observations and knowledge of the origins of blobs, we can infer clear features that can be systematically detected in all blobs, highlighting their similarities. The better we describe them, the more similar descriptors they will produce, leading to a closer and higher density distribution in the feature space, aiding us when clustering.</p>
<sec id="s2-3-1">
<title>2.3.1 Maximum distance</title>
<p>A straightforward metric to distinguish between blobs originating from immobile particles and freely diffusing particles is to calculate the maximum Euclidean distance between any two points within the dataset. It will be similar for all blobs in the sample; however, it varies and is potentially larger for free markers. We can define the maximum distance as<disp-formula id="e1">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>&#x2192;</mml:mo>
</mml:mover>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2192;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2200;</mml:mo>
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>&#x2192;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x2192;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo>\</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf20">
<mml:math id="m21">
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the set of points in each trajectory.</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Convex hull volume and area</title>
<p>We can assume the area or volume explored by any particle to be described by the area or volume of either a 2D polygon or 3D polyhedron with boundaries defined by the convex closure of the track, i.e., the smallest set that contains it as a subset of the Euclidean space. While blobs will result in regular spherical shapes with comparable area and volume throughout the dataset, free particles produce elongated or otherwise irregular shapes with varying parameters. We compute the convex hull using the <italic>Qhull</italic> algorithm (<xref ref-type="bibr" rid="B3">Bradford Barber et al., 1996</xref>), as implemented in the <italic>SciPy</italic> (<xref ref-type="bibr" rid="B20">Virtanen et al., 2020</xref>) package of Python, which we also use to calculate both the polygon area and polyhedron volume.</p>
</sec>
<sec id="s2-3-3">
<title>2.3.3 Ellipticity</title>
<p>For non-blobs, the convex closure polygon appears as a stretched ellipsis due to the free movement of the particle if at least one direction is generally preferred, which is usually the case. Compared to that, a particle would need to constantly move in a concentric spiral or otherwise convex isometric around its starting position to produce a sphere or circle, which is possible but of low probability. We compare the convex area to the ellipsis calculated using the maximum Euclidean distance (1) and mean Euclidean distance to the point cloud&#x2019;s center of mass.<disp-formula id="e2">
<mml:math id="m22">
<mml:mrow>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
<disp-formula id="equ1">
<mml:math id="m23">
<mml:mrow>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>with<disp-formula id="e3">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mi>&#x3c0;</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:mover accent="true">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>a</mml:mi>
<mml:mo>&#x2192;</mml:mo>
</mml:mover>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x2192;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mfenced open="|" close="" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mover accent="true">
<mml:mi>P</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>The same is applied for comparing the volume of the convex polyhedron to the volume of an ellipsoid using the following expression:<disp-formula id="e4">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>4</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:mfrac>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
</sec>
<sec id="s2-3-4">
<title>2.3.4 Center sphericality</title>
<p>Due to the isotropic distribution of localizations around the center of mass expected in blobs, the convex polygon resembles a circle, and the polyhedron resembles a sphere. Due to the bell-curve-shaped density profile along each axis, we expect and observe a significant difference between the convex closure and the area or volume described by a circle or sphere with the radius equal to the mean distance of each point to the center of mass. We effectively compare the area or volume of the highest density to the territory explored in total. In parallel to the ellipticity (2), we define the center sphericality as follows:<disp-formula id="e5">
<mml:math id="m26">
<mml:mrow>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="equ2">
<mml:math id="m27">
<mml:mrow>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
<sec id="s2-3-5">
<title>2.3.5 Convex density of points</title>
<p>Based on the convex polygon area and convex polyhedron volume, respectively, we calculate the density of localizations in the total area and volume explored, highlighting fast and freely moving particles with a low density and separating them from densely packed blob tracks.<disp-formula id="e6">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mo>&#x23;</mml:mo>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="equ3">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mo>&#x23;</mml:mo>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s2-4">
<title>2.4 Evaluation metrices</title>
<sec id="s2-4-1">
<title>2.4.1 F1-score</title>
<p>The F1-score is a widely used metric in statistical analysis of binary classification to evaluate accuracy. It is the harmonic mean of precision and recall (<xref ref-type="bibr" rid="B18">Taha and Hanbury, 2015</xref>) and thus represents both simultaneously. It is calculated as follows:<disp-formula id="e7">
<mml:math id="m30">
<mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>precision</mml:mtext>
<mml:mo>&#x22c5;</mml:mo>
<mml:mtext>recall</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>precision</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>recall</mml:mtext>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf21">
<mml:math id="m31">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the true positive, <inline-formula id="inf22">
<mml:math id="m32">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the false positive, and <inline-formula id="inf23">
<mml:math id="m33">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the false negative of the separation. F1-score values are in the range of [0,1], where larger values mean higher accuracy.</p>
</sec>
<sec id="s2-4-2">
<title>2.4.2 Silhouette score</title>
<p>Performing cluster evaluation on the model itself, the silhouette score <inline-formula id="inf24">
<mml:math id="m34">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (<xref ref-type="bibr" rid="B15">Rousseeuw, 1987</xref>) compares the mean distance between any selected sample and all other points within the cluster <inline-formula id="inf25">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the mean distance of any selected sample of one class to the next nearest cluster <inline-formula id="inf26">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to quantify the sharpness of separation. It is calculated as follows:<disp-formula id="e8">
<mml:math id="m37">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>The silhouette score yields values in the range of [&#x2212;1,1], where larger values indicate clean separation of clusters.</p>
</sec>
<sec id="s2-4-3">
<title>2.4.3 Adjusted Rand index</title>
<p>The similarity of label assignment can be quantified using the Rand index (<xref ref-type="bibr" rid="B7">Hubert and Arabie, 1985</xref>), a symmetrical measure between the number of pairs that are both in the same ground truth and clustered set <inline-formula id="inf27">
<mml:math id="m38">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> as well as pairs that are in different ground truth and clustered sets <inline-formula id="inf28">
<mml:math id="m39">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. To set a common baseline for reliable interpretation, the Rand index is adjusted (<xref ref-type="bibr" rid="B17">Steinley, 2004</xref>) so that it yields 0 for random labeling and 1 for consistent labeling. The adjusted Rand index <inline-formula id="inf29">
<mml:math id="m40">
<mml:mtext>ARI</mml:mtext>
</mml:math>
</inline-formula> is calculated as follows:<disp-formula id="e9">
<mml:math id="m41">
<mml:mrow>
<mml:mtext>ARI</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>RI</mml:mtext>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtext>RI</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>RI</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtext>RI</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>The Rand index <inline-formula id="inf30">
<mml:math id="m42">
<mml:mtext>RI</mml:mtext>
</mml:math>
</inline-formula> is defined as follows:<disp-formula id="e10">
<mml:math id="m43">
<mml:mrow>
<mml:mtext>RI</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:msubsup>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msubsup>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>for the ground truth class assignment <inline-formula id="inf31">
<mml:math id="m44">
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s2-4-4">
<title>2.4.4 V-measure</title>
<p>Given the ground truth, the V-measure is the harmonic mean of clustering <italic>homogeneity</italic> and <italic>completeness</italic> (<xref ref-type="bibr" rid="B1">Andrew Rosenberg J. V-measure, 2007</xref>), representing both in one combined metric. It is calculated as follows:<disp-formula id="e11">
<mml:math id="m45">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
</mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mtext>homogeneity</mml:mtext>
<mml:mo>&#x22c5;</mml:mo>
<mml:mtext>completeness</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mtext>homogeneity</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>completeness</mml:mtext>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>where <inline-formula id="inf32">
<mml:math id="m46">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a weight parameter set to 1 in this work.</p>
<p>The values are derived in the range of [0,1], where 1 is produced when each cluster contains only one species (homogeneity), and all candidates are assigned their ground truth label.</p>
</sec>
<sec id="s2-4-5">
<title>2.4.5 Feature correlation</title>
<p>We evaluate the correlation between two feature axes by means of Pearson&#x2019;s correlation coefficient (PCC) (<xref ref-type="bibr" rid="B22">Weisstein, 2023</xref>) as the ratio between the axes&#x2019; covariance and the product of their standard deviation. This yields a measure for the linear relationship between said features on a scale of [&#x2212;1,1], where the extrema indicate ideal anti-correlation/correlation, respectively. PCC is calculated as follows:<disp-formula id="equ4">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mtext>PCC</mml:mtext>
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:msub>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s2-5">
<title>2.5 Automatic annotation</title>
<sec id="s2-5-1">
<title>2.5.1 Blob score</title>
<p>When applying Blob-B-Gone to unknown datasets without prior training, we lack control over the assignment of blobs to clusters during the process. Though visual confirmation by plotting is a possibility, it is hardly scalable and prone to bias. Thus, we introduce the Blob score <inline-formula id="inf33">
<mml:math id="m48">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, which is the mean ratio between <italic>central sphericality</italic> and <italic>maximum distance</italic>, computed for both clusters to determine the set that is more likely to contain artifactual blobs:<disp-formula id="e12">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mover accent="true">
<mml:mrow>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>H</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
</p>
<p>We expect small distances and large center sphericality for artifacts, resulting in larger <inline-formula id="inf34">
<mml:math id="m50">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-scores, and <italic>vice versa</italic> for the free tracks. Hence, using this metric, we automatically annotate both divided sub-sets fully automatically to make further processing easy.</p>
<p>We deliberately do not use the Blob score as a means of clustering since it is highly specific and, thus, has a higher bias toward a limited type of data, which results in diminished reliability when presented with inputs diverting from that.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Simulated data</title>
<p>For each trace in the simulated datasets, we calculate a five-dimensional descriptor in the feature space using the features described in the Methodology section. Due to the considerable difference in the value across all features, the descriptors span an inhomogeneous space. To correct for that, we normalize and standardize all axes individually to match a zero-mean unit-variance distribution, thus removing feature bias and improving the rigidity of clustering. In order to evaluate the performance of the algorithm, we generate ground truth datasets by combining simulated blobs and freely diffusing particle trajectories, both in two and three dimensions. The chosen geometrical features are calculated for each of these point clouds, and we subsequently initialize k-means clustering with two suspected underlying populations. We compute the separation using the <italic>scikit-learn</italic> (<xref ref-type="bibr" rid="B14">Pedregosa et al., 2011</xref>) (Python) implementation of the <italic>greedy k-means&#x2b;&#x2b;</italic> (<xref ref-type="bibr" rid="B4">David Arthur, 2007</xref>) algorithm in a single-shot manner, without the need for prior training.</p>
<p>To quantify the performance of the algorithm, we calculate the silhouette score for goodness of clustering, the adjusted Rand index to evaluate the labeling, the V-measure to check cluster formation, and the F1-score in addition to the confusion matrix.</p>
<p>The results obtained when applied to the two-dimensional simulation dataset are shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. Out of 500 traces (250 free and 250 blobs), 499 are assigned the correct label and only one diffusing track was inaccurately flagged as a blob, leading to an F1-score of 0.998 (2D). The incorrectly labeled track, highlighted in red (<xref ref-type="fig" rid="F2">Figure 2C</xref>), shows that this trajectory has a high concentric density of localizations and a more regular, almost circular, convex outline. Therefore, this object closely resembles a blob trajectory in terms of center sphericality, localization density, and ellipticity.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Application of Blob-B-Gone to the simulated two-dimensional dataset leads to the separation listed in the confusion matrix <bold>(A)</bold>. <bold>(B)</bold> Principal component analysis of the standardized feature space reducing the dimensionality from 5D to 2D. The gray box includes metrics (silhouette score [-1,1], adjusted Rand index [-1,1], V-measure [0,1], and F1-score [0,1]) computed for the 2D single-shot k-means clustering. Each cluster mean is marked as a center of mass in black, and the decision boundary between the assigned clusters is visualized in magenta and calculated as the normal of the segment connecting the cluster means in the middle. The black arrow connects an incorrectly labeled trace to a respective time-scale colored (dark-&#x3e; bright) scatterplot. The black outline marks their convex hull. For comparison, all traces in each cluster are shown in <bold>(C,D)</bold> in various shades of the color corresponding to their predicted label. The color-scales are used to aid discerning individual tracks. On the left side, another black arrow highlights the incorrectly flagged blobs.</p>
</caption>
<graphic xlink:href="fbinf-03-1268899-g002.tif"/>
</fig>
<p>For clearer insights, we performed principal component analysis (<xref ref-type="bibr" rid="B8">Jolliffe and Cadima, 2016</xref>) (additional PCA plots are found in <xref ref-type="sec" rid="s10">Supplementary Figure S3</xref>) reducing the feature space to two dimensions (<xref ref-type="fig" rid="F2">Figure 2B</xref>), to better visualize the agglomeration of descriptors into clusters. The blob cluster (blue) appears as densely packed compared to the free (red) particles, indicating high similarity among its content. This was expected as we deliberately chose the features to highlight these geometric correlations.</p>
<p>We note that the cluster of free particles (red), though still visibly grouped, is spread far more than the blob cluster, indicating that the set of features chosen produced a higher variance of descriptors. This is caused by a higher variety in the distribution used to produce data, which is randomized free diffusion. Nonetheless, both populations appear clearly separated, implying that the features chosen still describe the data efficiently.</p>
<p>The track incorrectly flagged as an artifact is located on the far outer rim of the blob cluster, close to the diffusing one, as marked by the decision boundary in magenta (<xref ref-type="fig" rid="F2">Figure 2B</xref>). We conclude that this instance of mislabeling is a direct consequence of using single-shot <italic>k-means</italic> and could be avoided by adjusting the boundary or switching to a <italic>k-nearest-neighbor</italic> algorithm. However, this would require training prior to application. The advantage of using k-means, however, lies in the fact that it is easily scalable to a large number of data points and is capable of performing single-shot clustering without annotated data, which is advantageous in terms of resources and general applicability.</p>
<p>We apply the same procedure to the three-dimensional dataset. In this case, the clustering algorithm achieves a perfect separation between the blobs and freely diffusing particles, producing an exceptional F1-score of 1.0 and a diagonalized confusion matrix (<xref ref-type="fig" rid="F3">Figure 3A</xref>), indicating that all data points have been assigned the correct label. In principal component analysis (<xref ref-type="fig" rid="F3">Figure 3B</xref>), we further notice an even tighter clustering and clearer separation compared to the previous case (<xref ref-type="fig" rid="F3">Figure 3B</xref>), indicating that the additional non-trivial dimension causes the feature ensemble to be even more representative of the particle trajectories.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Applying Blob-B-Gone to the simulated three-dimensional dataset leads to the separation listed in the confusion matrix <bold>(A)</bold>. <bold>(B)</bold> Principal component analysis of the standardized feature space reducing the dimensionality from 5D to 2D. The gray box includes metrics (silhouette score [-1,1], adjusted Rand index [-1,1], V-measure [0,1], and F1-score [0,1]) computed for the 3D single-shot k-means clustering. Each cluster mean is marked as a center of mass in black, and the decision boundary between the assigned clusters is visualized in magenta and calculated as the normal of the segment connecting the cluster means in the middle. For comparison, all traces in each cluster are shown in <bold>(C,D)</bold> in various shades of the color corresponding to their predicted label. The color-scales are used to aid discerning individual tracks.</p>
</caption>
<graphic xlink:href="fbinf-03-1268899-g003.tif"/>
</fig>
<p>Finally, we again apply Blob-B-Gone to both datasets ignoring the ground truth to evaluate the automatic annotation using the <inline-formula id="inf35">
<mml:math id="m51">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-score introduced previously. The metric delivers a clear <inline-formula id="inf36">
<mml:math id="m52">
<mml:mrow>
<mml:mn>37</mml:mn>
<mml:mo>:</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> ratio between blobs and free particles when applied to the clusters produced when presenting the 2D data, allowing us to unambiguously distinguish and annotate the results. Equivalent to the even sharper separation in the feature space encountered in the 3D case, we receive a <inline-formula id="inf37">
<mml:math id="m53">
<mml:mrow>
<mml:mn>44</mml:mn>
<mml:mo>:</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> (blob: free) <inline-formula id="inf38">
<mml:math id="m54">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-score ratio.</p>
</sec>
<sec id="s3-2">
<title>3.2 MINFLUX SPT data</title>
<p>After successfully applying the proposed method to simulated data in two and three dimensions, we investigate its performance on <italic>in vitro</italic> MINFLUX SPT data.</p>
<p>On average, we found approximately 20% blob artifacts per set, which renders our base dataset imbalanced. To ensure symmetric feature generation and comparability between the performances of both classes, we first equalize the population counts in the dataset by randomly picking traces from each set. However, given that during a real scenario, Blob-B-Gone will be applied to an imbalanced dataset, we also assess this case.</p>
<p>When facing MINFLUX data with the same procedure as presented for the simulated data, the automatic annotation again yields a clear <inline-formula id="inf39">
<mml:math id="m55">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>:</mml:mo>
<mml:mn>33</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> (blob: free) <inline-formula id="inf40">
<mml:math id="m56">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-score ratio for either set. In both cases, the same artifact has been mislabeled a free trace, leading to almost diagonalized confusion matrices (<xref ref-type="fig" rid="F4">Figures 4A, C</xref>) and F1-scores of 0.995 (balanced) and 0.994 (imbalanced). Comparatively lower silhouette scores of <inline-formula id="inf41">
<mml:math id="m57">
<mml:mrow>
<mml:mn>0.577</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> (balanced) and <inline-formula id="inf42">
<mml:math id="m58">
<mml:mrow>
<mml:mn>0.661</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> (imbalanced) imply a narrower cluster separation, which we highlight in the respective PCAs (<xref ref-type="fig" rid="F4">Figures 4B, D</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Comparison between the performance of Blob-B-Gone applied to a balanced dataset <bold>(A,B)</bold> as opposed to an imbalanced dataset <bold>(C,D)</bold>. To construct the balanced set, traces were randomly drawn from the original imbalanced hand-labeled MINFLUX SPT. The separation achieved by the algorithm is demonstrated by the confusion matrices in <bold>(A,C)</bold> and underlined by the principal component analysis of the standardized feature space reducing the dimensionality from 5D to 2D in <bold>(B,D)</bold>. The gray box includes metrics (silhouette score [-1,1], adjusted Rand index [-1,1], V-measure [0,1], and F1-score [0,1]) computed for the 2D single-shot k-means clustering. Each cluster mean is marked as a center of mass in black, and the decision boundary between the assigned clusters is visualized in magenta and calculated as the normal of the segment connecting the cluster means in the middle. The black arrows highlight the incorrectly labeled traces to respective time-scale colored (dark-&#x3e;bright) scatterplots. The black outline marks their convex hull.</p>
</caption>
<graphic xlink:href="fbinf-03-1268899-g004.tif"/>
</fig>
<p>Even though the clusters are more spread out due to a higher variability in geometric structure in the experimental dataset, we still observe a clear separation between the two for the balanced set. In the case of the imbalanced dataset, both clusters reach closer to the separation border.</p>
<p>In total, we notice two mislabeled trajectories. One only appears in the imbalanced dataset and is likely caused by the cluster proximity. The other is a trajectory common to the balanced and imbalanced cases. We mark these outliers with black arrows and show their outline in the PCA plots of <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<p>The full-set-exclusive data point has a typical blob appearance, though the localizations do not appear as dense in the center, which probably causes it to be located on the outer rim of the blob cluster. It would, thus, be reasonable to assume that it originates from the dominant influence of the free trajectories.</p>
<p>The outlier found commonly in both sets stretches a rather long distance and convex area due to the &#x201c;hooked&#x201d; tail shape. The data point exhibits a significant shift along the first principal component compared to other blobs in both plots. The significant contrast, present in both balancing scenarios, could imply an inaccurately assigned ground truth label. However, the scatterplot strongly suggests that this is an artifact.</p>
<p>The shift described previously originates from an asymmetric contribution of the <italic>maximum distance</italic> and <italic>convex hull area</italic> feature values when compared to the <italic>sphericality</italic>, <italic>ellipticity</italic>, and <italic>convex hull density</italic> along the first principal component, which is expected considering what the features were designed to represent and highlight. We can observe this directly within the first PCA eigenvectors (<xref ref-type="fig" rid="F5">Figure 5</xref>) for either case.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Principal component analysis eigenvector values for the first two components listed together with their explained variability ratio (EVR) for Blob-B-Gone applied to the balanced <bold>(A)</bold> and imbalanced <bold>(B)</bold> datasets of MINFLUX SPT.</p>
</caption>
<graphic xlink:href="fbinf-03-1268899-g005.tif"/>
</fig>
<p>Apart from the asymmetry of the first two components within the primary eigenvectors, the principal component (PC) weights for both cases investigated are remarkably similar, which is reassuring, since it underlines a certain degree of rigidity during feature space construction. In terms of absolute contribution per feature, we notice a balanced distribution among the primary PC, while it seems that the <italic>convex hull area</italic> contributes more significantly to the second PC. Approximately 75% of information is contained within the first principal component, as shown by the explainable variability ratio (EVR) values in <xref ref-type="fig" rid="F5">Figure 5</xref>. We thus expect to find close similarity to the main correlations between the feature axes. We thus constructed cross-correlation matrices to highlight dependencies between feature axes using the PCC implementation of the <italic>pandas</italic> Python module (<xref ref-type="bibr" rid="B11">McKinney, 2010</xref>; <xref ref-type="bibr" rid="B19">The pandas development, 2023</xref>). In addition, we introduce correlation clustering and show the results as a cluster map using Python&#x2019;s <italic>seaborn</italic> implementation (<xref ref-type="bibr" rid="B21">Waskom, 2021</xref>) (<xref ref-type="fig" rid="F6">Figure 6</xref>). To ensure comparability between feature axes, we restrict ourselves to the balanced case.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Symmetric correlation matrices of the geometric feature space color-coded by their correlation coefficient [anti-correlation (&#x2212;1), correlation (1)]. A dendrogram highlights correlation clusters, i.e., over-arching generally similar behavior among feature axes. The blob feature space <bold>(A)</bold> is compared to the freely moving particle feature space <bold>(B)</bold> to underline the descriptive performance of the features presented.</p>
</caption>
<graphic xlink:href="fbinf-03-1268899-g006.tif"/>
</fig>
<p>The added dendrogram highlights clustered feature behavior and significance accordingly. Along the tree hierarchy, we find direct correspondence between the grouped correlation and the PCA weight distributions (<xref ref-type="fig" rid="F5">Figure 5</xref>). Here, we find again the asymmetric behavior given the high correlation between the <italic>maximum distance</italic> and <italic>convex hull area</italic> feature, which in turn are anti-correlated with the <italic>convex hull density</italic>, <italic>sphericality</italic>, and <italic>ellipticity</italic>, as highlighted by the uppermost clade. As the observations match the intent underlying the initial feature design, we can reasonably conclude that we successfully highlighted the geometric systematic behavior. Additional correlation heatmaps for simulation and the imbalanced set can be found in <xref ref-type="sec" rid="s10">Supplementary Figures S4, S5</xref>.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>In this work, we have presented Blob-B-Gone, a simple computational tool to isolate and distinguish between trajectories of freely diffusing particles and immobile (or highly constrained) particles, as detected in MINFLUX microscopy. Other artifacts, e.g., particle/stage drift, known <italic>a priori</italic>, should instead be addressed using similarly specialized methods prior to Blob-B-Gone to ensure accurate results. Despite the development taking place on the MINFLUX particle tracking dataset, the method can be applied to any single-particle tracking capable microscopy method. The algorithm itself merely requires two spatial coordinates per sample event, e.g., X and Y, to operate, which can originate from any particle tracking procedure.</p>
<p>The advantage of the approach presented herein lies in its sole reliance on the geometric properties of the point cloud of the single-particle localizations rather than other descriptors of more complex origin, such as diffusion coefficients. Moreover, we demonstrated how efficient clustering of single-particle trajectory, in certain scenarios, may be achieved without machine learning and large datasets of annotated data for training in a more straightforward and accessible way. The framework was originally developed to fully automate the classification of immobile tracers (i.e., blobs) and trajectories of diffusing markers in high-throughput single-particle tracking measurements as a big data problem. To this end, we have assessed its performance for idealized two- and three-dimensional simulated traces as well as hand-annotated <italic>in vitro</italic> MINFLUX SPT data on a sample routinely prepared in our laboratory.</p>
<p>Though this method was designed to extract blobs from rather homogeneous systems, we speculate that this approach could feasibly produce similar results in more complex scenarios, provided that more clusters are anticipated in the calculations to account for different diffusing behaviors. This is provided that a system expresses more complex movements in significant numbers to construct a balanced dataset.</p>
<p>Nonetheless, our method, as demonstrated by the application to the aforementioned datasets, has proven to be highly effective, with F1-scores close to 1.0 for simulation and MINFLUX data (<xref ref-type="table" rid="T4">Table 4</xref>). For simulated data, the designated descriptors enable a clean separation in the feature space (<xref ref-type="fig" rid="F2">Figure 2B</xref>, <xref ref-type="fig" rid="F3">3B</xref>), with silhouette scores of 0.706 (2D) and 0.732 (3D). Though the cluster split is not as vast in the MINFLUX SPT sets (<xref ref-type="fig" rid="F4">Figures 4B, D</xref>), causing smaller silhouette scores of 0.577 (balanced) and 0.661 (imbalanced), we retrieved adjusted Rand indices and V-measures close to 1.0 across all sets (<xref ref-type="table" rid="T4">Table 4</xref>). This indicates significant reliability in highlighting similarities and differences between all trace populations with a homogeneous label spread.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Evaluation metrics across all datasets considered in this work.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Dataset</th>
<th align="left">F1-score</th>
<th align="left">Silhouette score</th>
<th align="left">Adjusted Rand index</th>
<th align="left">V-measure</th>
<th align="left">
<inline-formula id="inf43">
<mml:math id="m59">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-score ratio</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">2D simulated </td>
<td align="left">0.998</td>
<td align="left">0.706</td>
<td align="left">0.992</td>
<td align="left">0.981</td>
<td align="left">37:1</td>
</tr>
<tr>
<td align="left">3D simulated</td>
<td align="left">1.000</td>
<td align="left">0.732</td>
<td align="left">1.000</td>
<td align="left">1.000</td>
<td align="left">44:1</td>
</tr>
<tr>
<td align="left">Balanced</td>
<td align="left">0.995</td>
<td align="left">0.577</td>
<td align="left">0.981</td>
<td align="left">0.962</td>
<td align="left">33:1</td>
</tr>
<tr>
<td align="left">Imbalanced</td>
<td align="left">0.994</td>
<td align="left">0.661</td>
<td align="left">0.984</td>
<td align="left">0.960</td>
<td align="left">33:1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As an exemplary case, we investigated the primary and secondary PCA eigenvectors (<xref ref-type="fig" rid="F5">Figure 5</xref>) for the balanced SPT data. These portrayed an almost uniform contribution between all features toward the final clustering, which implies that all of them successfully grasped an individual aspect of the traces presented. Additionally, we observed an expected center-asymmetry when comparing the weights of features highlighting a stretched outline to those of that describe spherically condensed point spreads. This matches the correlation pattern found between individual feature axes (<xref ref-type="fig" rid="F6">Figure 6</xref>). Using feature cross-correlation matrices, we successfully identified that this behavior originates from the initial design of the clustering. This demonstrates that purely geometric descriptors of the point clouds obtained from single-particle tracking applications could provide a very practical way to classify diffusion modes.</p>
<p>When facing more complex structures and artifacts, it is likely that the performance of the tool as mentioned here will deteriorate due to a higher and more imbalanced variability in the trajectory datasets. This can originate from a host of different causes, such as particle&#x2013;environment interaction, which will, in turn, produce a substantial overlap of clusters in the feature space we defined. Nevertheless, the performance may be restored, for example, by introducing more initial clusters, thus sensitizing the separation algorithm. <italic>A priori</italic> knowledge of the system, expected diffusion behavior, and, consequently, trajectory shapes, could be utilized to apply weights to features according to system-specific criteria, helping in reducing cluster overlap. Therefore, an extension of the proposed method, with labeled data, supervised training, and k-nearest-neighbor classification, could be adapted to perform a more rigid classification of any specific system. This would come at the cost of higher resources, computational time, and annotated datasets.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found at: <ext-link ext-link-type="uri" xlink:href="https://github.com/Eggeling-Lab-Microscope-Software/blob-B-gone/tree/main/Example_Data">https://github.com/Eggeling-Lab-Microscope-Software/blob-B-gone/tree/main/Example_Data</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>BV: conceptualization, data curation, formal analysis, investigation, methodology, project administration, software, validation, visualization, writing&#x2013;original draft, and writing&#x2013;review and editing. FR: investigation, resources, supervision, and writing&#x2013;review and editing. CE: funding acquisition, supervision, and writing&#x2013;review and editing.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. The authors greatly acknowledge financial support by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation; Germany&#x2019;s Excellence Strategy&#x2013;EXC 2051&#x2014;Project-ID 390713860; project number 316213987&#x2014;SFB 1278; GRK M-M-M: GRK 2723/1&#x2013;2023&#x2014;ID 44711651; and instrument funding MINFLUX Jena INST 275_405_1), the State of Thuringia (TMWWDG), and the Innovation Program by the German BMWi (ZIM; project 16KN070934/Lab-on-a-chip FCS-Easy). Furthermore, this work was supported by the BMBF, funding program LIVE2QMIC (FGZ: 13N15956), and Photonics Research Germany (FKZ: 13N15713/13N15717) and integrated into the Leibniz Center for Photonics in Infection Research (LPI). The LPI initiated by Leibniz-IPHT, Leibniz-HKI, UKJ, and FSU Jena was part of the BMBF National Roadmap for Research Infrastructures. FR was supported by the LPI grant. We acknowledge support by the German Research Foundation Projekt-Nr. 512648189 and the Open Access Publication Fund of the Thueringer Universitaets- und Landesbibliothek Jena.</p>
</sec>
<ack>
<p>The authors would like to express their utmost gratitude to Giovanni De Angelis for preparing the GUV sample based on which data were collected using MINFLUX SPT for this article.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fbinf.2023.1268899/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fbinf.2023.1268899/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.PDF" id="SM1" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book">
<collab>Andrew Rosenberg J. V-measure</collab> (<year>2007</year>). &#x201c;<article-title>A conditional entropy-based external cluster evaluation measure</article-title>,&#x201d; in <source>Proceedings of the 2007 joint conference on empirical methods in natural language processing and computational natural language learning</source> (<publisher-loc>China</publisher-loc>: <publisher-name>EMNLP-CoNLL</publisher-name>), <fpage>410</fpage>&#x2013;<lpage>420</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Balzarotti</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Eilers</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gwosch</surname>
<given-names>K. C.</given-names>
</name>
<name>
<surname>Gynn&#xe5;</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Westphal</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Stefani</surname>
<given-names>F. D.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Nanometer resolution imaging and tracking of fluorescent molecules with minimal photon fluxes</article-title>. <source>Science</source> <volume>355</volume> (<issue>6325</issue>), <fpage>606</fpage>&#x2013;<lpage>612</lpage>. <pub-id pub-id-type="doi">10.1126/science.aak9913</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bradford Barber</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dobkin</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Huhdanpaa</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>The quickhull algorithm for convex hulls</article-title>. <source>ACM Trans. Math. Softw.</source> <volume>22 (4)</volume>, <fpage>469</fpage>&#x2013;<lpage>483</lpage>. <pub-id pub-id-type="doi">10.1145/235815.235821</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>David Arthur</surname>
<given-names>S. V.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>K-means&#x2b;&#x2b; the advantages of careful seeding</article-title>,&#x201d; in <source>Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms</source>, <publisher-loc>Germany</publisher-loc>, <publisher-name>IEEE</publisher-name>, <fpage>1027</fpage>&#x2013;<lpage>1035</lpage>.</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gwosch</surname>
<given-names>K. C.</given-names>
</name>
<name>
<surname>Pape</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Balzarotti</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hoess</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ellenberg</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ries</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>MINFLUX nanoscopy delivers 3D multicolor nanometer resolution in cells</article-title>. <source>Nat. Methods</source> <volume>17</volume> (<issue>2</issue>), <fpage>217</fpage>&#x2013;<lpage>224</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0688-0</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Honigmann</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mueller</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Hell</surname>
<given-names>S. W.</given-names>
</name>
<name>
<surname>Eggeling</surname>
<given-names>C.</given-names>
</name>
</person-group> <article-title>STED microscopy detects and quantifies liquid phase separation in lipid membranes using a new far-red emitting fluorescent phosphoglycerolipid analogue</article-title>. <source>Faraday Discuss.</source> <year>2013</year>; <volume>161</volume>: <fpage>77</fpage>&#x2013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1039/C2FD20107K</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hubert</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Arabie</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>1985</year>). <article-title>Comparing partitions</article-title>. <source>J. Classif.</source> <volume>2</volume> (<issue>1</issue>), <fpage>193</fpage>&#x2013;<lpage>218</lpage>. <pub-id pub-id-type="doi">10.1007/BF01908075</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jolliffe</surname>
<given-names>I. T.</given-names>
</name>
<name>
<surname>Cadima</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Principal component analysis: a review and recent developments</article-title>. <source>Philos. Trans. A Math. Phys. Eng. Sci.</source> <volume>374</volume> (<issue>2065</issue>), <fpage>20150202</fpage>. <pub-id pub-id-type="doi">10.1098/rsta.2015.0202</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kusumi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nakada</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ritchie</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Murase</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Suzuki</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Murakoshi</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>Paradigm shift of the plasma membrane concept from the two-dimensional continuum fluid to the partitioned fluid: high-speed single-molecule tracking of membrane molecules</article-title>. <source>Annu. Rev. Biophys. Biomol. Struct.</source> <volume>34</volume>, <fpage>351</fpage>&#x2013;<lpage>378</lpage>. <pub-id pub-id-type="doi">10.1146/annurev.biophys.34.040204.144637</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manzo</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Garcia-Parajo</surname>
<given-names>M. F.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A review of progress in single particle tracking: from methods to biophysical insights</article-title>. <source>Rep. Prog. Phys.</source> <volume>78</volume> (<issue>12</issue>), <fpage>124601</fpage>. <pub-id pub-id-type="doi">10.1088/0034-4885/78/12/124601</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>McKinney</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2010</year>). &#x201c;<article-title>Data structures for statistical computing in python</article-title>,&#x201d; in <source>Proceedings of the 9th Python in Science Conference 2010</source> <volume>455 (1)</volume>, <fpage>51</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.25080/Majora-92bf1922-00a</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>M&#xe9;l&#xe9;ard</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bagatolli</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Pott</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Giant unilamellar vesicle electroformation from lipid mixtures to native membranes under physiological conditions</article-title>. <source>Methods Enzym.</source> <volume>465</volume>, <fpage>161</fpage>&#x2013;<lpage>176</lpage>. <pub-id pub-id-type="doi">10.1016/S0076-6879(09)65009-6</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Mu&#xf1;oz-Gil</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Volpe</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Garc&#xed;a-March</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Metzler</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Lewenstein</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Manzo</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>The anomalous diffusion challenge: single trajectory characterisation as a competition</article-title>,&#x201d; in <source>The anomalous diffusion challenge: single trajectory characterisation as a competition</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Volpe</surname>
<given-names>F.</given-names>
</name>
</person-group> <publisher-loc>USA</publisher-loc>, (<publisher-name>SPIE/International Society for Optical Engineering</publisher-name>), <volume>44</volume>.</citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Pedregosa</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Varoquaux</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Michel</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Bertrand</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Grisel</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <source>Scikit-learn: machine learning in Python</source>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rousseeuw</surname>
<given-names>P. J.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>Silhouettes: a graphical aid to the interpretation and validation of cluster analysis</article-title>. <source>J. Comput. Appl. Math.</source> <volume>20</volume>, <fpage>53</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/0377-0427(87)90125-7</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schmidt</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Weihs</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wurm</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Jansen</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Rehman</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sahl</surname>
<given-names>S. J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>MINFLUX nanometer-scale 3D imaging and microsecond-range tracking on a common fluorescence microscope</article-title>. <source>Nat. Commun.</source> <volume>12</volume> (<issue>1</issue>), <fpage>1478</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-021-21652-z</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Steinley</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Properties of the hubert-arable adjusted Rand Index</article-title>. <source>Psychol. Methods</source> <volume>9</volume> (<issue>3</issue>), <fpage>386</fpage>&#x2013;<lpage>396</lpage>. <pub-id pub-id-type="doi">10.1037/1082-989X.9.3.386</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taha</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Hanbury</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Metrics for evaluating 3D medical image segmentation: analysis, selection, and tool</article-title>. <source>BMC Med. Imaging</source> <volume>15</volume>, <fpage>29</fpage>. <pub-id pub-id-type="doi">10.1186/s12880-015-0068-x</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="book">
<collab>The pandas development</collab>, <source>The pandas development team</source>. <publisher-loc>Zenodo</publisher-loc>, <publisher-name>pandas-dev/pandas: Pandas. Zenodo</publisher-name>; <year>2023</year>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Virtanen</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Gommers</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Oliphant</surname>
<given-names>T. E.</given-names>
</name>
<name>
<surname>Haberland</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Reddy</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Cournapeau</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>SciPy 1.0: fundamental algorithms for scientific computing in Python</article-title>. <source>Nat. Methods</source> <volume>17</volume> (<issue>3</issue>), <fpage>261</fpage>&#x2013;<lpage>272</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-019-0686-2</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waskom</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>seaborn: statistical data visualization</article-title>. <source>JOSS</source> <volume>6</volume> (<issue>60</issue>), <fpage>3021</fpage>. <pub-id pub-id-type="doi">10.21105/joss.03021</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Weisstein</surname>
<given-names>E. W.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Statistical correlation." <italic>from MathWorld</italic>--A wolfram web resource</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://mathworld.wolfram.com/StatisticalCorrelation.html">https://mathworld.wolfram.com/StatisticalCorrelation.html</ext-link>.</comment>
</citation>
</ref>
</ref-list>
</back>
</article>