<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article article-type="methods-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2025.1610228</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Digital Health</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Secure latent Dirichlet allocation</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>Veugen</surname><given-names>Thijs</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2020;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2784832/overview"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/></contrib>
<contrib contrib-type="author"><name><surname>Dunning</surname><given-names>Vincent</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2020;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3075848/overview" /><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/></contrib>
<contrib contrib-type="author"><name><surname>Marcus</surname><given-names>Michiel</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2020;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3135722/overview"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/></contrib>
<contrib contrib-type="author"><name><surname>Kamphorst</surname><given-names>Bart</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x2020;</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3135355/overview"/><role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/><role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/></contrib>
</contrib-group>
<aff id="aff1"><label><sup>1</sup></label><institution>Unit ICT, Strategy and Policy, TNO</institution>, <addr-line>The Hague</addr-line>, <country>Netherlands</country></aff>
<aff id="aff2"><label><sup>2</sup></label><institution>Department of Semantics, Cybersecurity and Services, University of Twente</institution>, <addr-line>Enschede</addr-line>, <country>Netherlands</country></aff>
<author-notes>
<fn fn-type="edited-by"><p><bold>Edited by:</bold> Stefano Dalmiani, Monasterio Foundation, Italy</p></fn>
<fn fn-type="edited-by"><p><bold>Edited by:</bold> Maria Pisani, Toscana Gabriele Monasterio Foundation, Italy Damodaran, Vel Tech Rangarajan Dr. Sagunthala R&#x0026;D Institute of Science and Technology, India</p></fn>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Thijs Veugen <email>thijs.veugen@tno.nl</email></corresp>
<fn fn-type="other" id="fn001"><label><sup>&#x2020;</sup></label><p>ORCID Thijs Veugen <ext-link ext-link-type="uri" xlink:href="http://orcid.org/0000-0002-9898-4698">orcid.org/0000-0002-9898-4698</ext-link> Vincent Dunning <ext-link ext-link-type="uri" xlink:href="http://orcid.org/0009-0004-1148-3017">orcid.org/0009-0004-1148-3017</ext-link> Michiel Marcus <ext-link ext-link-type="uri" xlink:href="http://orcid.org/0000-0003-0936-2289">orcid.org/0000-0003-0936-2289</ext-link> Bart Kamphorst <ext-link ext-link-type="uri" xlink:href="http://orcid.org/0000-0002-9490-5841">orcid.org/0000-0002-9490-5841</ext-link></p></fn>
</author-notes>
<pub-date pub-type="epub"><day>24</day><month>07</month><year>2025</year></pub-date>
<pub-date pub-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1610228</elocation-id>
<history>
<date date-type="received"><day>11</day><month>04</month><year>2025</year></date>
<date date-type="accepted"><day>03</day><month>07</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 Veugen, Dunning, Marcus and Kamphorst.</copyright-statement>
<copyright-year>2025</copyright-year><copyright-holder>Veugen, Dunning, Marcus and Kamphorst</copyright-holder><license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Topic modelling refers to a popular set of techniques used to discover hidden topics that occur in a collection of documents. These topics can, for example, be used to categorize documents or label text for further processing. One popular topic modelling technique is Latent Dirichlet Allocation (LDA). In topic modelling scenarios, the documents are often assumed to be in one, centralized dataset. However, sometimes documents are held by different parties, and contain privacy- or commercially-sensitive information that cannot be shared. We present a novel, decentralized approach to train an LDA model securely without having to share any information about the content of the documents. We preserve the privacy of the individual parties using a combination of privacy enhancing technologies. Next to the secure LDA protocol, we introduce two new cryptographic building blocks that are of independent interest; a way to efficiently convert between secret-shared- and homomorphic-encrypted data as well as a method to efficiently draw a random number from a finite set with secret weights. We show that our decentralized, privacy preserving LDA solution has a similar accuracy compared to an (insecure) centralised approach. With 1024-bit Paillier keys, a topic model with 5 topics and 3000 words can be trained in around 16&#x2009;h. Furthermore, we show that the solution scales linearly in the total number of words and the number of topics.</p>
</abstract>
<kwd-group>
<kwd>latent Dirichlet allocation</kwd>
<kwd>secure multi-party computation</kwd>
<kwd>Shamir secret sharing</kwd>
<kwd>Paillier crypto system</kwd>
<kwd>topic modelling</kwd>
</kwd-group><counts>
<fig-count count="4"/>
<table-count count="2"/><equation-count count="347"/><ref-count count="20"/><page-count count="12"/><word-count count="0"/></counts><custom-meta-wrap><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Connected Health</meta-value></custom-meta></custom-meta-wrap>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Topic modelling is a set of techniques that can discover abstract topics over a large set of textual documents. This is useful when there is a lot of textual data that needs to be analyzed and manual analysis is infeasible. Topic modelling can help to categorize and filter the data or to find related documents. Research until now has focused on centralized datasets, where the training data is available in one database. It is possible that certain private databases contain valuable textual data for a topic model that data holders are unwilling to share. There are two main reasons why data can be too sensitive to share: either commercially sensitive, or personal information that is privacy sensitive.</p>
<p>An example of the latter motivation occurs in the medical domain, where information on patients is generated by doctors in various different hospitals or other medical institutions. Combining the textual data from these different entities is valuable for two reasons: firstly, they often contain different types of information, which makes the input to the topic model more diverse and the resulting topic model richer. Secondly, topic models generally need a large amount of input, so combining inputs to train one larger topic model would result in a better topic model. The topic model can for example be used to categorize the textual data to enrich the structured patient data with new information and predict inpatient violence (<xref ref-type="bibr" rid="B1">1</xref>), detect virus outbreaks at an early stage (<xref ref-type="bibr" rid="B2">2</xref>), or get more insight into symptoms of certain diseases.</p>
<p>Privacy-Enhancing Technologies (PETs) provide a solution that retains the advantages of big data analytics of textual data and ensures privacy (or protects other kinds of sensitivity) of the analyzed documents. In the context of the GDPR, PETs contribute to data minimization&#x2014;and therefore to proportionality&#x2014;and to data control. In our work, we specifically focus on a PET called <italic>Secure Multi-Party Computation (MPC</italic>). In a nutshell, MPC allows to perform computations on data of multiple parties while keeping the inputs secret and only revealing the outcome.</p>
<p>Our work proposes an algorithm that enables topic modelling on distributed textual documents in a privacy-preserving way, using two MPC techniques called homomorphic encryption and secret sharing. This opens the door to new business cases that require topic models over textual personal data distributed over different entities, such as the ones previously mentioned.</p>
<sec id="s1a"><label>1.1</label><title>Latent Dirichlet allocation</title>
<p>We focus on an existing algorithm called Latent Dirichlet Allocation to train a topic model for a set of documents. Intuitively, a topic model categorizes documents into different topics, where each document is assigned a combination of one or more topics. Furthermore, this gives insights into what words are often associated with these topics. Latent Dirichlet Allocation (LDA) is one of many topic modelling techniques. Among the most common topic modelling techniques, LDA is the most consistent performer over several comparison metrics, making it the most suitable algorithm for most applications (<xref ref-type="bibr" rid="B3">3</xref>). In particular, we consider LDA and use a technique called Gibbs sampling to train the model. Gibbs sampling is an iterative method to estimate latent distributions of a dataset based on observations from that dataset.</p>
<p>This means that we iterate over all the words in all the documents and observe what topic it most likely belongs to. With this topic, we then update the parameters of the topic model. This is done until the parameters converge to a stable representation of the topic model. There are also other methods to train latent parameters, but Gibbs sampling was chosen because it often yields relatively simple algorithms for approximate inference in high-dimensional models such as LDA (<xref ref-type="bibr" rid="B4">4</xref>, Figure 8).</p>
</sec>
<sec id="s1b"><label>1.2</label><title>Related work</title>
<p>Some research has already been done on privacy-preserving Latent Dirichlet Allocation. We can distinguish two lines of research: work that enables privacy-preserving LDA on centralized textual data, such that the final model does not leak information about the inputs (<xref ref-type="bibr" rid="B5">5</xref>), and work that enables LDA on distributed textual data, such that the information sent throughout the protocol does not leak information about the inputs (<xref ref-type="bibr" rid="B6">6</xref>&#x2013;<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>Our work falls into the latter category and therefore distinguishes itself from the work in the former category by enabling LDA on decentralized data instead of centralized data. We present several new secure protocols to perform each step of the LDA algorithm in a privacy-preserving way. We now provide more explanation of the other works in the latter category. A comparison between our work and related work can be found in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>.</p>
<table-wrap id="T1" position="float"><label>Table 1</label>
<caption><p>Comparison with related work.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Paper</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">Speed</th>
<th valign="top" align="center">Security</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">YN10 (<xref ref-type="bibr" rid="B8">8</xref>)</td>
<td valign="top" align="left">Medium</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Medium: leaks probability distributions of topics</td>
</tr>
<tr>
<td valign="top" align="left">WTS20 (<xref ref-type="bibr" rid="B7">7</xref>)</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">Medium</td>
<td valign="top" align="left">Medium: leaks statistics about all information</td>
</tr>
<tr>
<td valign="top" align="left">CD16 (<xref ref-type="bibr" rid="B6">6</xref>)</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">Low: leaks the complete document-topic matrix</td>
</tr>
<tr>
<td valign="top" align="left">Our work</td>
<td valign="top" align="left">High</td>
<td valign="top" align="left">Low</td>
<td valign="top" align="left">High: leaks just the total word count</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The first work on privacy-preserving LDA on distributed data was published in 2010 by Yang and Nakagawa (<xref ref-type="bibr" rid="B8">8</xref>). Similar to us, they use homomorphic encryption. They use a custom protocol to draw the topics, which reveals the distributions to all parties. Additionally, they use a slightly altered version of the LDA algorithm, as do we. Whereas they argue the validity of their alteration with a notion of convergence based on the number of changes the algorithm makes, we use a more robust analysis using the perplexity score, showing that our alteration retains the quality and convergence rate of regular LDA.</p>
<p>Wang, Tong and Shi (<xref ref-type="bibr" rid="B7">7</xref>) propose a privacy-preserving LDA solution using federated learning and differential privacy. Their solution makes it possible to do local sampling, as the intermediate values are perturbed using differential privacy techniques. As their experiments show, this comes at a quality cost, as the perplexity score is higher for their solution than for regular LDA. Instead, we use homomorphic encryption to keep all information hidden, including intermediate values.</p>
<p>Colin and Dupuy (<xref ref-type="bibr" rid="B6">6</xref>) propose a solution to decentralized LDA with varying network topologies. They claim that their solution attains privacy of the textual documents, but no privacy arguments are given. In each iteration, two nodes, each holding a number of documents, exchange (and average) their local statistics. This is similar to sharing the matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM1"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, which we avoid in our solution for privacy reasons.</p>
</sec>
<sec id="s1c"><label>1.3</label><title>Our contributions</title>
<p>We present a novel solution for decentralized topic modelling in a privacy-preserving manner using latent Dirichlet allocation. This is the first solution that does not leak anything about the content of the documents while at the same time maintaining the accuracy of non-private versions of LDA. This way, we bridge the gap between accuracy and security in distributed LDA training by presenting a solution that is both highly accurate as well as secure. Furthermore, we present two generic, cryptographic building blocks of independent interest:
<list list-type="simple">
<list-item><label>&#x2013;</label>
<p>Securely drawing a random number from a finite set without revealing the drawing probabilities, as described in <xref ref-type="sec" rid="s3d">Sections 3.4</xref>, <xref ref-type="sec" rid="s3e">3.5</xref>.</p></list-item>
<list-item><label>&#x2013;</label>
<p>A generic solution to efficiently convert (multiple) additively homomorphic encrypted values to secret sharings, as described in <xref ref-type="sec" rid="s3f">Sections 3.6</xref>, <xref ref-type="sec" rid="s4b">4.2</xref>.</p></list-item>
</list></p>
</sec>
<sec id="s1d"><label>1.4</label><title>Problem setting</title>
<p>In this work, we consider the scenario where the documents are not stored in a single database, but are distributed among multiple parties that want to train a joint topic model, but do not wish to simply share these documents with each other. Concretely, our goal is to mimic the existing LDA algorithm in a privacy-preserving manner while maintaining the <italic>same</italic> accuracy as the non-private version of the algorithm.</p>
<p>Suppose we have <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM2"><mml:mi>M</mml:mi></mml:math></inline-formula> documents, document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM3"><mml:mi>m</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM4"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>m</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>M</mml:mi></mml:math></inline-formula>, containing <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM5"><mml:msub><mml:mi>N</mml:mi><mml:mi>m</mml:mi></mml:msub></mml:math></inline-formula> words. We consider the setting where we have multiple parties, each having one or more (sensitive) documents. Let <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM6"><mml:mi>K</mml:mi></mml:math></inline-formula> be the number of topics, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM7"><mml:mi>V</mml:mi></mml:math></inline-formula> the number of terms<xref ref-type="fn" rid="FN0001"><sup>1</sup></xref> in our vocabulary. Let <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM8"><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> be the Dirichlet hyperparameters for the topics in the topics-document distribution, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM9"><mml:mi>&#x03B2;</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>V</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> the Dirichlet hyperparameters for the terms in the terms-topic distribution. All these parameters are public.</p>
<p>During the distributed algorithm, we need to manage the secret matrix elements <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM10"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, representing the number of words in document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM11"><mml:mi>m</mml:mi></mml:math></inline-formula> that have topic <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM12"><mml:mi>k</mml:mi></mml:math></inline-formula>, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM13"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, representing the number of words with term <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM14"><mml:mi>t</mml:mi></mml:math></inline-formula> that have topic <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM15"><mml:mi>k</mml:mi></mml:math></inline-formula>. Note that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM16"><mml:mo fence="false" stretchy="false">{</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2026;</mml:mo><mml:mi>M</mml:mi><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mo>,</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2026;</mml:mo><mml:mi>K</mml:mi><mml:mo fence="false" stretchy="false">}</mml:mo></mml:mrow></mml:msub></mml:math></inline-formula> is a matrix, which will be referred to as the <italic>document-topic</italic> matrix. Furthermore, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM17"><mml:mo fence="false" stretchy="false">{</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:msub><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2026;</mml:mo><mml:mi>K</mml:mi><mml:mo fence="false" stretchy="false">}</mml:mo><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2026;</mml:mo><mml:mi>V</mml:mi><mml:mo fence="false" stretchy="false">}</mml:mo></mml:mrow></mml:msub></mml:math></inline-formula> will be referred to as the <italic>topic-term</italic> matrix. The document-topic matrix can be split into <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM18"><mml:mi>M</mml:mi></mml:math></inline-formula> vectors, such that each party can manage and store only the vectors corresponding to its own documents. For the second matrix we need a different solution to avoid sharing sensitive data, see <xref ref-type="sec" rid="app3c">Section 3</xref>.</p>
<p>The purpose of the algorithm is to train the latent variable <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM19"><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, denoting the topic of the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM20"><mml:msup><mml:mi>n</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> word of document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM21"><mml:mi>m</mml:mi></mml:math></inline-formula>. In each iteration, for each document, and for each word within that document, a new topic is sampled for that word from a dynamic multinomial distribution. Given the word with index <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM22"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mi>n</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and term <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM23"><mml:mi>t</mml:mi></mml:math></inline-formula>, this distribution is proportional to:<disp-formula id="disp-formula1"><label>(1)</label><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM1"><mml:mo movablelimits="true" form="prefix">Pr</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x221D;</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x03C4;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>V</mml:mi></mml:munderover><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>&#x03C4;</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo>&#x22C5;</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x03BA;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mi>&#x03BA;</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /></mml:math></disp-formula>where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM24"><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> indicates the count <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM25"><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, excluding the current word with index <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM26"><mml:mi>i</mml:mi></mml:math></inline-formula>, and similarly <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM27"><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> (<xref ref-type="bibr" rid="B4">4</xref>). The first ratio can be roughly interpreted as the empirical probability that a word (not the current word) with topic <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM28"><mml:mi>k</mml:mi></mml:math></inline-formula> has term <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM29"><mml:mi>t</mml:mi></mml:math></inline-formula>. The second ratio can be roughly interpreted as the empirical weight of topic <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM30"><mml:mi>k</mml:mi></mml:math></inline-formula> in document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM31"><mml:mi>m</mml:mi></mml:math></inline-formula>. The hyperparameters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM32"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM33"><mml:mi>&#x03B2;</mml:mi></mml:math></inline-formula> are often called pseudo-counts (from prior belief) and contribute too.</p>
</sec>
</sec>
<sec id="s2"><label>2</label><title>Preliminaries</title>
<p>Our work leverages cryptographic techniques to ensure secrecy of the documents&#x2019; contents, while still enabling us to learn from them. There are different technologies that can be applied to enable privacy-preserving computations. In this work we use additively homomorphic encryption (AHE) (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>) and secret-sharing (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>). In its basic form, both techniques represent the messages they encrypt as integers, which is also what we follow in this work. The key difference is that AHE can be computed by a single party knowing the required information, while with secret sharing all operations need to be performed by all the parties holding the secrets. Parties can perform the linear operations on the shares individually, but for more complex operations such as multiplication and division, interaction is required between the parties. Nevertheless, for non-linear operations, secret sharing often yields more efficient solutions than AHE.</p>
<sec id="s2a"><label>2.1</label><title>Additively homomorphic encryption</title>
<p>We denote the encryption of a message or plaintext <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM34"><mml:mi>m</mml:mi></mml:math></inline-formula> by <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM35"><mml:mo stretchy="false">[</mml:mo><mml:mi>m</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>. We use the Paillier encryption scheme (<xref ref-type="bibr" rid="B9">9</xref>), which gives us the operations <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM36"><mml:mo>&#x2295;</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM37"><mml:mo>&#x2297;</mml:mo></mml:math></inline-formula> such that:<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM1"><mml:mo stretchy="false">[</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>&#x2295;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>x</mml:mi><mml:mo>+</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mtext>&#xA0;and&#xA0;</mml:mtext><mml:mi>c</mml:mi><mml:mo>&#x2297;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /></mml:math></disp-formula>for any public constant <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM38"><mml:mi>c</mml:mi></mml:math></inline-formula>, and secret messages <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM39"><mml:mi>x</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM40"><mml:mi>y</mml:mi></mml:math></inline-formula>. That is, given encryptions <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM41"><mml:mo stretchy="false">[</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM42"><mml:mo stretchy="false">[</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM43"><mml:mi>x</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM44"><mml:mi>y</mml:mi></mml:math></inline-formula>, we can obtain an encryption <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM45"><mml:mo stretchy="false">[</mml:mo><mml:mi>x</mml:mi><mml:mo>+</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> of the sum <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM46"><mml:mi>x</mml:mi><mml:mo>+</mml:mo><mml:mi>y</mml:mi></mml:math></inline-formula> without decrypting the ciphertexts. The resulting ciphertext can be decrypted to yield the result, or be input for further encrypted operations.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Secret sharing</title>
<p>Secret Sharing has similar properties but works in a fundamentally different, key-less way. Suppose we have a secret <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM47"><mml:mi>s</mml:mi></mml:math></inline-formula> and wish to use this in a computation with a set of parties <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM48"><mml:msub><mml:mi>P</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>P</mml:mi><mml:mi>n</mml:mi></mml:msub></mml:math></inline-formula>. The party holding the secret <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM49"><mml:mi>s</mml:mi></mml:math></inline-formula> can split this secret up into a number of shares <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM50"><mml:msub><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>s</mml:mi><mml:mi>n</mml:mi></mml:msub></mml:math></inline-formula> and send each <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM51"><mml:msub><mml:mi>s</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> to party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM52"><mml:msub><mml:mi>P</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>. We denote the sharing of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM53"><mml:mi>s</mml:mi></mml:math></inline-formula> by <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM54"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>s</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>s</mml:mi><mml:mi>n</mml:mi></mml:msub></mml:math></inline-formula>.</p>
<p>Each party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM55"><mml:msub><mml:mi>P</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> can then compute operations for a public constant <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM56"><mml:mi>c</mml:mi></mml:math></inline-formula> and secret sharings <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM57"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>x</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>x</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:mtext>&#xA0;and&#xA0;</mml:mtext><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>y</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>y</mml:mi><mml:mi>n</mml:mi></mml:msub></mml:math></inline-formula> for secrets <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM58"><mml:mi>x</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM59"><mml:mi>y</mml:mi></mml:math></inline-formula> such that:<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM2"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>x</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>&#x229E;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>y</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>x</mml:mi><mml:mo>+</mml:mo><mml:mi>y</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mtext>,\;&#xA0;</mml:mtext><mml:mi>c</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>x</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>c</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>x</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mspace width="thinmathspace"/><mml:mtext>and</mml:mtext><mml:mspace width="thinmathspace"/><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>x</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>&#x22A0;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>y</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>x</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>y</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>.</mml:mo></mml:math></disp-formula>In this work, we use the Shamir secret sharing scheme (<xref ref-type="bibr" rid="B12">12</xref>), which is a <italic>linear</italic> secret sharing scheme. This means we can compute the linear additions and multiplications with a public constant without interaction between the parties. Multiplication of two secrets is additionally possible with communication between the parties.</p>
</sec>
</sec>
<sec id="s3"><label>3</label><title>Secure distributed LDA</title>
<p>In this section, we present the building blocks and algorithms required for securely performing the distributed LDA algorithm. To this end, we start in <xref ref-type="sec" rid="s3a">Section 3.1</xref> with the required security assumptions. After that, in <xref ref-type="sec" rid="s3b">Section 3.2</xref> we explain our solution for securely keeping track of the document-topic and topic-term matrices. Next we describe the main algorithm for securely performing Gibbs sampling in <xref ref-type="sec" rid="s3c">Section 3.3</xref>. Finally, in <xref ref-type="sec" rid="s3d">Sections 3.4</xref>&#x2013;<xref ref-type="sec" rid="s3f">3.6</xref> we respectively introduce separate building blocks for securely drawing a new topic from secret weights, computing encrypted integer weights and converting Paillier ciphertexts into Shamir secret sharings.</p>
<sec id="s3a"><label>3.1</label><title>Security model</title>
<p>For both techniques, we assume the semi-honest setting, where each entity tries to learn as much information about the other entities&#x2019; data as possible, but does follow the steps of the protocol. For most use cases, this security model will suffice, as it is likely that honest participation will be agreed upon within a contractual agreement between the entities. Furthermore, since LDA already has some inherent privacy properties (<xref ref-type="bibr" rid="B5">5</xref>), it is unlikely that during execution a dishonest entity can retrieve a significant amount of information about other entities&#x2019; documents. However, we acknowledge this security model might not be appropriate for large-scale deployments with many potentially dishonest entities.</p>
</sec>
<sec id="s3b"><label>3.2</label><title>Tracking the matrices</title>
<p>As highlighted in <xref ref-type="sec" rid="s1d">Section 1.4</xref>, LDA essentially manages and updates two matrices: a document-topic matrix and a topic-term matrix. The document-topic matrix keeps track of the topic distribution of each document and consists of elements <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM60"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, representing the portion of document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM61"><mml:mi>m</mml:mi></mml:math></inline-formula> belonging to topic <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM62"><mml:mi>k</mml:mi></mml:math></inline-formula>. The topic-term matrix keeps track of the topic distribution of each term in the vocabulary and consists of elements <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM63"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, representing the portion of term <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM64"><mml:mi>t</mml:mi></mml:math></inline-formula> belonging to topic <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM65"><mml:mi>k</mml:mi></mml:math></inline-formula> over all documents.</p>
<p>However, these matrices are precisely the sensitive information that completely leaks the content of the documents of a party when simply giving it away. Therefore, we need to find a secure way to store these matrices without (significantly) decreasing the accuracy of the algorithm.</p>
<p>A crucial observation is that during the LDA algorithm, the matrix elements <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM66"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> of the document-topic matrix are only needed by the party actually holding document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM67"><mml:mi>m</mml:mi></mml:math></inline-formula>. Therefore, it is not needed to maintain a complete, joint matrix of all the documents, but it suffices to let each party locally maintain a part of that matrix corresponding to only its own documents.</p>
<p>On the other hand, the topic-term matrix depends on the distribution over all the documents and should therefore be available to all the parties in an oblivious way. Maintaining this matrix comes down to adding to, and subtracting from, the elements in the matrix, which suggests the use of additively homomorphic encryption for this. To avoid individual parties from decrypting and learning the entries, we furthermore need <italic>threshold</italic> decryption (<xref ref-type="bibr" rid="B10">10</xref>). This ensures that a decryption can only be done if all the parties participate. Note that if we were to do this with secret sharing, each party would need to keep track of the entire matrix, which would introduce a lot of computational overhead.</p>
</sec>
<sec id="s3c"><label>3.3</label><title>Performing the algorithm</title>
<p>A formal description of our Secure LDA solution for securely computing the topic-term matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM68"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> and the document-topic matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM69"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> can be found in <xref ref-type="table" rid="A1">Algorithm 1</xref>. In <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>, we present an intuitive overview of how our algorithm works. Roughly speaking, our Secure LDA solution consists of three phases: <italic>initialisation</italic> (blue), <italic>sampling</italic> (green) and <italic>updating</italic> (orange). Finally, the results are decrypted in a joint decryption phase (red).</p>
<fig id="F1" position="float"><label>Figure 1</label>
<caption><p>Intuitive sketch of our algorithm.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1610228-g001.tif"><alt-text content-type="machine-generated">Flowchart showing a process divided into three main stages: \"Initialization,\" \"Repeat fixed number of iterations,\" and \"Output.\" Initialization involves sampling random topics, encrypting and initializing matrices. The iteration stage includes resetting local changes, sampling new topics, updating changes, and updating encryptions. Finally, decryption occurs to output the matrices.</alt-text>
</graphic>
</fig>
<p>In the initialisation phase, the goal is to initialise the two matrices with a random distribution that will be refined. To this end, all the parties sample random topics for each word in each document, and use these to fill in an initial (local) view on the document-topic matrix and the topic-term matrix. Next, the parties need to build a global view of the complete topic-term matrix. To achieve this, the parties encrypt all the elements in their local topic-term matrix and combine these by sending the encrypted elements to each other and aggregate them into a global matrix by adding the (encrypted) matrices of all the parties element-wise.</p>
<p>After the initialisation, for a fixed number of iterations, the parties perform a <italic>sampling</italic> and an <italic>updating</italic> phase. During the sampling phase, the parties use the (secret) matrices as they are at the start of the iteration, to compute, for each word in each document, a probability distribution over the topics. The secure sampling procedure ensures that the distributions remain hidden from the parties and is outlined in <xref ref-type="sec" rid="s3d">Sections 3.4</xref>, <xref ref-type="sec" rid="s3e">3.5</xref>. For each party, the secure sampling procedure yields a new topic for each word in each document. A party uses this information to update her local version of the encrypted topic-term matrix and local document-topic matrix.</p>
<p>The distribution that is drawn from is proportional to <xref ref-type="disp-formula" rid="disp-formula1">Equation 1</xref>. Note that these distributions are in an encrypted form and the actual probabilities can thus not be seen by the parties. First, we compute the encrypted weights for all the topics using the procedure presented in <xref ref-type="sec" rid="s3e">Section 3.5</xref>. After that, we can perform a secure draw from the encrypted weights using our novel algorithm to draw from a secret probability distribution as presented in <xref ref-type="sec" rid="s3d">Section 3.4</xref>. This way, the parties obtain for each word in each document a newly sampled topic. During this sampling, the parties locally keep track of the matrix updates, which means that they decrease their local counters corresponding to the matrix elements of the old word topic by one, and increase the counters for the new topic by one.</p>
<p>The second part of each iteration then consists of each party updating its local document-topic matrix and the parties together updating the global topic-term matrix using the locally tracked changes. To this end, each party encrypts their local changes to the topic-term matrix and sends this to all the other parties. Then the parties can simply add these encrypted counters to their encrypted topic-term matrix to get the new, consistent, topic-term matrix. The document-topic matrix can be updated locally by each party without any communication.</p>
<p>We observe that the LDA algorithm requires linear computations, except for the computation of the probability <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM70"><mml:mo movablelimits="true" form="prefix">Pr</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and the secure draw that uses these probabilities in the sampling step. Therefore, we perform most of the operations for tracking the topic-term matrix using AHE, and introduce a novel mechanism to switch between AHE and secret sharing in <xref ref-type="sec" rid="s3f">Sections 3.6</xref>, <xref ref-type="sec" rid="s4b">4.2</xref> to obtain the best performance. Concretely, we use AHE for the linear operations and only switch to (Shamir) secret sharings for securely drawing the new topics.</p>
<p>Typically, convergence of an LDA algorithm is checked by monitoring the changes in the model parameters, or monitoring how well the model fits a separate set of documents. In the encrypted domain, this can be quite costly to check after each iteration. Therefore, we simply iterate a sufficiently large, fixed number of times.</p>
</sec>
<sec id="s3d"><label>3.4</label><title>Random draw with secret probabilities</title>
<p>An important building block of secure LDA is a method of drawing a new topic <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM71"><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mi>K</mml:mi><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula>, given secret weights <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM72"><mml:msub><mml:mi>w</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mrow><mml:mi mathvariant="double-struck">N</mml:mi></mml:mrow></mml:mrow></mml:math></inline-formula>, such that<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM3"><mml:mo movablelimits="true" form="prefix">Pr</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:msub><mml:mi>w</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mrow></mml:mfrac></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mspace width="1em" /><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>K</mml:mi><mml:mo>.</mml:mo></mml:math></disp-formula>The new, randomly chosen topic will be revealed to party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM73"><mml:mi>p</mml:mi></mml:math></inline-formula>, the holder of the current document. The intuition behind our solution is to compute <italic>cummulative</italic> weights <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM74"><mml:msub><mml:mi>S</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM75"><mml:mi>k</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mi>K</mml:mi><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula> such that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM76"><mml:msub><mml:mi>S</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:munderover><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>. For notational convenience, we define an &#x201C;extra&#x201D; weight <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM77"><mml:msub><mml:mi>S</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:math></inline-formula>. Next, the parties sample a random value <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM78"><mml:mi>r</mml:mi></mml:math></inline-formula> in the range <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM79"><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula> and find between which two cumulative weights this value <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM80"><mml:mi>r</mml:mi></mml:math></inline-formula> lies, which then corresponds to the sampled topic. Since <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM81"><mml:mi>r</mml:mi></mml:math></inline-formula> is sampled uniformly at random in the total range, the probability of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM82"><mml:mi>r</mml:mi></mml:math></inline-formula> precisely ending up between cumulative weights <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM83"><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM84"><mml:msub><mml:mi>S</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula> is exactly <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM85"><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>. This can be implemented with only <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM86"><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula> secure comparisons between <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM87"><mml:mi>r</mml:mi></mml:math></inline-formula> and thresholds <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM88"><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula> (with varying <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM89"><mml:mi>k</mml:mi></mml:math></inline-formula>) by traversing a binary tree from the root to the leaf representing the new topic. Note that our solution assumes that the weights are integers. In <xref ref-type="sec" rid="s3e">Section 3.5</xref>, we explain how we securely transform fractional weights into integer weights.</p>
<p>Formally, the parties do the following for every word <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM90"><mml:mi>w</mml:mi></mml:math></inline-formula> in each document:
<list list-type="simple">
<list-item><label>1.</label>
<p>The parties generate a secret random number <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM91"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>r</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM92"><mml:mi>r</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula>:
<list list-type="simple">
<list-item><label>(a)</label>
<p>They generate a secret random number <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM93"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>R</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM94"><mml:mi>R</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:msup><mml:mn>2</mml:mn><mml:mrow><mml:mi>&#x2113;</mml:mi></mml:mrow></mml:msup><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula> for sufficiently large <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM95"><mml:mi>&#x2113;</mml:mi></mml:math></inline-formula>.</p></list-item>
<list-item><label>(b)</label>
<p>They securely multiply <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM96"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>R</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula> with <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM97"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, and compute the secure truncation <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM98"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>r</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM99"><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">&#x230A;</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:mrow><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x2113;</mml:mi></mml:msup></mml:mfrac></mml:mrow><mml:mo fence="false" stretchy="false">&#x230B;</mml:mo></mml:math></inline-formula></p></list-item>
</list></p></list-item>
<list-item><label>2.</label>
<p>They find <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM100"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, such that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM101"><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2264;</mml:mo><mml:mi>r</mml:mi><mml:mo>&#x003C;</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:msub></mml:math></inline-formula>, by repeating <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM102"><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula> times:
<list list-type="simple">
<list-item><label>(a)</label>
<p>Party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM103"><mml:mi>p</mml:mi></mml:math></inline-formula> determines the next secret threshold <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM104"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>t</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula> (see below).</p></list-item>
<list-item><label>(b)</label>
<p>The parties compute the secure comparison <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM105"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>r</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, and reveal the outcome to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM106"><mml:mi>p</mml:mi></mml:math></inline-formula>.</p></list-item>
</list></p></list-item>
</list>To see that indeed a uniformly random variable <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM107"><mml:mi>r</mml:mi></mml:math></inline-formula> is generated, we count the number of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM108"><mml:mi>R</mml:mi></mml:math></inline-formula> that lead to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM109"><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mi>x</mml:mi></mml:math></inline-formula>, for <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM110"><mml:mn>0</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>x</mml:mi><mml:mo>&#x003C;</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:math></inline-formula>. We need <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM111"><mml:mi>x</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:mrow><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x2113;</mml:mi></mml:msup></mml:mfrac></mml:mrow><mml:mo>&#x003C;</mml:mo><mml:mi>x</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, i.e. <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM112"><mml:mrow><mml:mfrac><mml:mrow><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x2113;</mml:mi></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:mfrac></mml:mrow><mml:mo>&#x2264;</mml:mo><mml:mi>R</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x2113;</mml:mi></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:mi>x</mml:mi></mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:mfrac></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mfrac><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x2113;</mml:mi></mml:msup><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:mfrac></mml:mrow></mml:math></inline-formula>. The number of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM113"><mml:mi>R</mml:mi></mml:math></inline-formula> that satisfy this is <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM114"><mml:mo fence="false" stretchy="false">&#x230A;</mml:mo><mml:mrow><mml:mfrac><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x2113;</mml:mi></mml:msup><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:mfrac></mml:mrow><mml:mo fence="false" stretchy="false">&#x230B;</mml:mo></mml:math></inline-formula>, or <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM115"><mml:mo fence="false" stretchy="false">&#x230A;</mml:mo><mml:mrow><mml:mfrac><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x2113;</mml:mi></mml:msup><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub></mml:mfrac></mml:mrow><mml:mo fence="false" stretchy="false">&#x230B;</mml:mo><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>. Therefore, we need <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM116"><mml:mi>&#x2113;</mml:mi><mml:mo>&#x2265;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x03BA;</mml:mi></mml:math></inline-formula>, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM117"><mml:mi>&#x03BA;</mml:mi></mml:math></inline-formula> is the statistical security parameter, to assure that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM118"><mml:mi>r</mml:mi></mml:math></inline-formula> is statistically indistinguishable from a uniformly random variable.</p>
<p>The first threshold choice will be <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM119"><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>K</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>&#x00F7;</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:math></inline-formula>, each iteration adapting the threshold following the binary search principle. This means that if <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM120"><mml:mi>r</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mi>t</mml:mi></mml:math></inline-formula>, we go to the left and otherwise to the right. As the numbers <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM121"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula> are secret-shared, party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM122"><mml:mi>p</mml:mi></mml:math></inline-formula> needs to generate a secret-shared binary indicator vector <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM123"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>&#x03B4;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>&#x03B4;</mml:mi><mml:mi>K</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, such that the threshold can be computed by <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM124"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>t</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>&#x03B4;</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>. Party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM125"><mml:mi>p</mml:mi></mml:math></inline-formula> is the only party that can determine the binary indicator vector, because it is the only party that is allowed to learn <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM126"><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>.</p>
</sec>
<sec id="s3e"><label>3.5</label><title>Computing the integer weights</title>
<p>A key element of <xref ref-type="table" rid="A1">Algorithm 1</xref> is the secure, random sampling of new topics for all of the words. As explained in <xref ref-type="sec" rid="s3c">Section 3.3</xref>, this is done in two steps: computing the integer weights and performing the secure draw. This subsection will introduce the steps required to compute the integer weights for <xref ref-type="disp-formula" rid="disp-formula1">Equation 1</xref> given the matrices.</p>
<table-wrap id="A1" position="float"><label>Algorithm 1</label>
<caption><p>Protocol for performing the distributed LDA algorithm.</p></caption>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
</colgroup>
<tbody>
<tr>
<td valign="top" align="left">
<list list-type="simple">
<list-item><label>1.</label>
<p>Initialisation:
<list list-type="simple">
<list-item><label>(a)</label>
<p>Each party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM127"><mml:mi>p</mml:mi></mml:math></inline-formula> samples a random topic for each word of all its documents.</p></list-item>
<list-item><label>(b)</label>
<p>Each party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM128"><mml:mi>p</mml:mi></mml:math></inline-formula> sets the local counters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM129"><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM130"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, for each of its documents <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM131"><mml:mi>m</mml:mi></mml:math></inline-formula>.</p></list-item>
<list-item><label>(c)</label>
<p>The parties encrypt <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM132"><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula>, and securely aggregate them to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM133"><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>p</mml:mi></mml:munder><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x220F;</mml:mo><mml:mi>p</mml:mi></mml:munder><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>.</p></list-item>
</list></p></list-item>
<list-item><label>2.</label>
<p>Iterate a fixed number of times:
<list list-type="simple">
<list-item><label>(a)</label>
<p>For each party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM134"><mml:mi>p</mml:mi></mml:math></inline-formula> do
<list list-type="simple">
<list-item><label>i.</label>
<p>Party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM135"><mml:mi>p</mml:mi></mml:math></inline-formula> obtains the matrix elements <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM136"><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>, and sets all local counters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM137"><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mn>0</mml:mn></mml:math></inline-formula>.</p></list-item>
<list-item><label>ii.</label>
<p>Simultaneously choose a new topic for each word <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM138"><mml:mi>n</mml:mi></mml:math></inline-formula> of each document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM139"><mml:mi>m</mml:mi></mml:math></inline-formula> of party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM140"><mml:mi>p</mml:mi></mml:math></inline-formula>:
<list list-type="simple">
<list-item><label>A.</label>
<p>Set index <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM141"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mi>n</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. Let <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM142"><mml:mrow><mml:mover><mml:mi>t</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> be the term of word <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM143"><mml:mi>i</mml:mi></mml:math></inline-formula>, and let <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM144"><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> be the current topic of word <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM145"><mml:mi>i</mml:mi></mml:math></inline-formula>. Party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM146"><mml:mi>p</mml:mi></mml:math></inline-formula> adjusts the local counters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM147"><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mrow><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>t</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mrow><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>t</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM148"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>.</p></list-item>
<list-item><label>B.</label>
<p>The parties securely sample a new topic <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM149"><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> for word <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM150"><mml:mi>i</mml:mi></mml:math></inline-formula> with matrices <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM151"><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM152"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> (see <xref ref-type="sec" rid="s3e">Section 3.5</xref>), and reveal it to party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM153"><mml:mi>p</mml:mi></mml:math></inline-formula>.</p></list-item>
<list-item><label>C.</label>
<p>Party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM154"><mml:mi>p</mml:mi></mml:math></inline-formula> adjusts the local counters: <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM155"><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mrow><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>t</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mrow><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>t</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM156"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">&#x2190;</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>.</p></list-item>
</list></p></list-item>
<list-item><label>iii.</label>
<p>Party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM157"><mml:mi>p</mml:mi></mml:math></inline-formula> encrypts the local counters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM158"><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM159"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM160"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>V</mml:mi></mml:math></inline-formula>, and communicates them.</p></list-item>
</list></p></list-item>
<list-item><label>(b)</label>
<p>The parties update the matrix elements <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM161"><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM162"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM163"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>V</mml:mi></mml:math></inline-formula>, with local counts to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM164"><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x22C5;</mml:mo><mml:munder><mml:mo>&#x220F;</mml:mo><mml:mi>p</mml:mi></mml:munder><mml:mrow><mml:mo>[</mml:mo><mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>)</mml:mo></mml:mrow><mml:mi>p</mml:mi></mml:msub><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula>.</p></list-item>
</list></p></list-item>
<list-item><label>3.</label>
<p>The parties jointly decrypt the topic-term matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM165"><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> to obtain <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM166"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>.</p></list-item>
<list-item><label>4.</label>
<p>The parties output <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM167"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM168"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>.</p></list-item>
</list></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>We assume we are given matrices <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM169"><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mi mathvariant="normal">&#x0394;</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM170"><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, the first one encrypted and the second one privately known to party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM171"><mml:mi>p</mml:mi></mml:math></inline-formula>, the holder of document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM172"><mml:mi>m</mml:mi></mml:math></inline-formula>. We omit the index <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM173"><mml:mi mathvariant="normal">&#x00AC;</mml:mi><mml:mi>i</mml:mi></mml:math></inline-formula> for convenience.</p>
<p>To sample a new topic, first the weights have to be computed that determine the probabilities according to <xref ref-type="disp-formula" rid="disp-formula1">Equation 1</xref>, which we denote as <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM174"><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x221D;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup><mml:mo stretchy="false">]</mml:mo><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> for simplicity. The weights consist of numerators<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM4"><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>t</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mrow><mml:mover><mml:mi>t</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /></mml:math></disp-formula>and denominators<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM5"><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x03C4;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>V</mml:mi></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>&#x03C4;</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x03BA;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>&#x03BA;</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>]</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:math></disp-formula>The encrypted numerators and denominators can easily be computed by party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM175"><mml:mi>p</mml:mi></mml:math></inline-formula> due to the additively homomorphic property of our encryption scheme.</p>
<p>The only problem is that the hyperparameters <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM176"><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM177"><mml:mi>&#x03B2;</mml:mi></mml:math></inline-formula> are not integers, while the secret sharing scheme requires the plaintexts to be integers. For this work, we chose <italic>symmetric</italic> priors, meaning <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM178"><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x03B1;</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM179"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula>, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM180"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x03B2;</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM181"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>V</mml:mi></mml:math></inline-formula> (see <xref ref-type="sec" rid="s5c">Section 5.3</xref>). We then approximate the fractions <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM182"><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>n</mml:mi></mml:msup><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mfrac></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM183"><mml:mi>&#x03B2;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>n</mml:mi></mml:msup><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mfrac></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM184"><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>n</mml:mi></mml:msup></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM185"><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM186"><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>n</mml:mi></mml:msup></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM187"><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:math></inline-formula> are integers. Then the numerators <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM188"><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup></mml:math></inline-formula> and denominators <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM189"><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup></mml:math></inline-formula> are converted to integers <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM190"><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM191"><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup></mml:math></inline-formula> by multiplying both with <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM192"><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>d</mml:mi></mml:msup><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:math></inline-formula>.</p>
<p>Eventually, we want to obtain integer weights for the secure draw (see <xref ref-type="sec" rid="s3d">Section 3.4</xref>). To avoid costly secure integer divisions <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM193"><mml:mrow><mml:mfrac><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup></mml:mfrac></mml:mrow></mml:math></inline-formula>, we multiply these fractions with <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM194"><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x220F;</mml:mo><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:munder><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup></mml:math></inline-formula> to obtain <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM195"><mml:msub><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup><mml:mo>&#x22C5;</mml:mo><mml:munder><mml:mo>&#x220F;</mml:mo><mml:mrow><mml:mi>&#x03BA;</mml:mi><mml:mo>&#x2260;</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:munder><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>&#x03BA;</mml:mi></mml:mrow><mml:mi>d</mml:mi></mml:msubsup></mml:math></inline-formula> as follows:
<list list-type="simple">
<list-item><label>1.</label>
<p>Party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM196"><mml:mi>p</mml:mi></mml:math></inline-formula> computes the encryptions <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM197"><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup><mml:mo>&#x22C5;</mml:mo><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>d</mml:mi></mml:msup><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>d</mml:mi></mml:msup><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mover><mml:mi>t</mml:mi><mml:mo stretchy="false">&#x005E;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:msup><mml:mo stretchy="false">]</mml:mo><mml:mrow><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>n</mml:mi></mml:msup><mml:mo stretchy="false">]</mml:mo><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>d</mml:mi></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>n</mml:mi></mml:msup></mml:mrow></mml:msup></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM198"><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup><mml:mo>&#x22C5;</mml:mo><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>d</mml:mi></mml:msup><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>d</mml:mi></mml:msup><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>n</mml:mi></mml:msup><mml:mo stretchy="false">]</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>V</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:munderover><mml:mo>&#x220F;</mml:mo><mml:mrow><mml:mi>&#x03C4;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>V</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mi>&#x03C4;</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mo stretchy="false">]</mml:mo><mml:mrow><mml:msup><mml:mi>&#x03B2;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:msup><mml:mo stretchy="false">]</mml:mo><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>n</mml:mi></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:mi>V</mml:mi><mml:mo>+</mml:mo><mml:msup><mml:mi>&#x03B1;</mml:mi><mml:mi>d</mml:mi></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x03BA;</mml:mi></mml:mrow></mml:munder><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mrow></mml:msup></mml:math></inline-formula>, which are converted to secret sharings (see <xref ref-type="sec" rid="s3f">Section 3.6</xref>) for efficiency reasons.</p></list-item>
<list-item><label>2.</label>
<p>With one fan-in multiplication (<xref ref-type="bibr" rid="B13">13</xref>) the parties compute <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM199"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>W</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x220F;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>.</p></list-item>
<list-item><label>3.</label>
<p>For each <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM200"><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM201"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula>, they jointly compute the multiplicative inverse <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM202"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula> (<xref ref-type="bibr" rid="B14">14</xref>, Prot.4.11).</p></list-item>
<list-item><label>4.</label>
<p>The parties compute <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM203"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>n</mml:mi></mml:msubsup><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>W</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi><mml:mi>d</mml:mi></mml:msubsup><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM204"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula>.</p></list-item>
</list></p>
</sec>
<sec id="s3f"><label>3.6</label><title>Converting encryptions to secret-sharings</title>
<p>During the execution of <xref ref-type="table" rid="A1">Algorithm 1</xref>, we need to transform the encrypted weights <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM205"><mml:mo stretchy="false">[</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> to Shamir secret sharings <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM206"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>w</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula> to randomly draw new topics more efficiently. Suppose we have precomputed pairs <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM207"><mml:mo stretchy="false">(</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>R</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>r</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, such that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM208"><mml:mi>R</mml:mi></mml:math></inline-formula> contains <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM209"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> more bits than <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM210"><mml:mi>w</mml:mi></mml:math></inline-formula>, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM211"><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mo lspace="thickmathspace" rspace="thickmathspace">mod</mml:mo><mml:mi>N</mml:mi></mml:math></inline-formula>, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM212"><mml:mi>N</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM213"><mml:mi>N</mml:mi><mml:mo>&#x003E;</mml:mo><mml:mi>w</mml:mi></mml:math></inline-formula>, is the modulus of the Shamir secret sharing scheme. Then a conversion from <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM214"><mml:mo stretchy="false">[</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM215"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>w</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula> is relatively straightforward:
<list list-type="simple">
<list-item><label>1.</label>
<p>Compute <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM216"><mml:mo stretchy="false">[</mml:mo><mml:mi>w</mml:mi><mml:mo>+</mml:mo><mml:mi>R</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>R</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, and (jointly) decrypt it.</p></list-item>
<list-item><label>2.</label>
<p>Jointly compute <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM217"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>w</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>w</mml:mi><mml:mo>+</mml:mo><mml:mi>R</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo lspace="thickmathspace" rspace="thickmathspace">mod</mml:mo><mml:mi>N</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>r</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>.</p></list-item>
</list>Note that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM218"><mml:mi>R</mml:mi></mml:math></inline-formula> is different from the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM219"><mml:mi>R</mml:mi></mml:math></inline-formula> used earlier in <xref ref-type="sec" rid="s3d">Section 3.4</xref>. The pairs could be precomputed as follows:
<list list-type="simple">
<list-item><label>1.</label>
<p>Each party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM220"><mml:mi>i</mml:mi></mml:math></inline-formula> generates random number <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM221"><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> that has <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM222"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> more bits than <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM223"><mml:mi>w</mml:mi></mml:math></inline-formula>, and encrypts it.</p></list-item>
<list-item><label>2.</label>
<p>Each party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM224"><mml:mi>i</mml:mi></mml:math></inline-formula> computes <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM225"><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo lspace="thickmathspace" rspace="thickmathspace">mod</mml:mo><mml:mi>N</mml:mi></mml:math></inline-formula>, and generates a secret sharing <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM226"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula> for it.</p></list-item>
<list-item><label>3.</label>
<p>Each party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM227"><mml:mi>i</mml:mi></mml:math></inline-formula> sends each other party a share of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM228"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>, together with <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM229"><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>.</p></list-item>
<list-item><label>4.</label>
<p>The parties compute <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM230"><mml:mo stretchy="false">[</mml:mo><mml:mi>R</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mo>&#x2211;</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x220F;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM231"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>r</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>.</p></list-item>
</list>We have <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM232"><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mo lspace="thickmathspace" rspace="thickmathspace">mod</mml:mo><mml:mi>N</mml:mi></mml:math></inline-formula>, because <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM233"><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo lspace="thickmathspace" rspace="thickmathspace">mod</mml:mo><mml:mi>N</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo lspace="thickmathspace" rspace="thickmathspace">mod</mml:mo><mml:mi>N</mml:mi></mml:math></inline-formula>, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM234"><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>. It is not necessary that all parties generate a random number; it is sufficient that at least <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM235"><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> parties do.</p>
</sec>
</sec>
<sec id="s4"><label>4</label><title>Optimisations</title>
<p>During the development of the protocol, we came up with several optimisations to improve the performance. The optimisations that we implemented are described below. Additional optimisations, that were not implemented due to time constraints, can be found in the <xref ref-type="app" rid="app3">Appendix A</xref>.</p>
<sec id="s4a"><label>4.1</label><title>Parallelisation of secure samplings</title>
<p>We combine the sampling of all new topics of one party [step 2(a)iiB], such that we can parallelise each step of the binary search (see <xref ref-type="sec" rid="s3d">Section 3.4</xref>), and drastically reduce the number of communication rounds. This means that the probabilities from <xref ref-type="disp-formula" rid="disp-formula1">Equation 1</xref> are not recomputed after each single topic sampling, but only when during one iteration all words of all documents of a certain party have been assigned a new topic. This version, which we will refer to as <italic>batched</italic> LDA, enables us to execute all secure comparisons at the same level of the binary tree (see <xref ref-type="sec" rid="s3d">Section 3.4</xref>) in parallel, and significantly reduce the total number of communication rounds. The disadvantage is that the drawing probabilities are not constantly adjusted, which might lead to accuracy loss, see <xref ref-type="sec" rid="s5d1">Section 5.4.1</xref>.</p>
</sec>
<sec id="s4b"><label>4.2</label><title>Multiple conversions</title>
<p>We have multiple conversions that can be efficiently combined into one protocol. Suppose we have weights <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM236"><mml:msub><mml:mi>w</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>w</mml:mi><mml:mi>&#x03C9;</mml:mi></mml:msub></mml:math></inline-formula>, and corresponding pairs <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM237"><mml:mo stretchy="false">(</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM238"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>&#x03C9;</mml:mi></mml:math></inline-formula>, such that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM239"><mml:mi>&#x03C9;</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>+</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>w</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo><mml:mo>+</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>n</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x003C;</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>N</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo></mml:math></inline-formula>, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM240"><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>w</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo></mml:math></inline-formula> is an upper bound on the bit size of the weights, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM241"><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>n</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo></mml:math></inline-formula> is the bit size of number of parties <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM242"><mml:mi>n</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM243"><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>N</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo></mml:math></inline-formula> the bit size of the encryption modulus. Then the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM244"><mml:mi>&#x03C9;</mml:mi></mml:math></inline-formula> conversions can be combined as follows.
<list list-type="simple">
<list-item><label>1.</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM245"><mml:mo stretchy="false">[</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>&#x03C9;</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>&#x03C9;</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>&#x03C9;</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>&#x03C9;</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></p></list-item>
<list-item><label>2.</label>
<p>For <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM246"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mi>&#x03C9;</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM247"><mml:mn>1</mml:mn></mml:math></inline-formula> do <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM248"><mml:mo stretchy="false">[</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>C</mml:mi><mml:msup><mml:mo stretchy="false">]</mml:mo><mml:mrow><mml:msup><mml:mn>2</mml:mn><mml:mrow><mml:mi>&#x03C3;</mml:mi><mml:mo>+</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>w</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo><mml:mo>+</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>n</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></p>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM249"><mml:mrow><mml:mo>{</mml:mo><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>&#x03C9;</mml:mi></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:msup><mml:mn>2</mml:mn><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03C3;</mml:mi><mml:mo>+</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>w</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo><mml:mo>+</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>n</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mspace width="negativethinmathspace" /></mml:mrow></mml:msup><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item>
<list-item><label>3.</label>
<p>The parties jointly decrypt <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM250"><mml:mi>C</mml:mi></mml:math></inline-formula> and split it into <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM251"><mml:msub><mml:mi>C</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msub><mml:mi>C</mml:mi><mml:mi>&#x03C9;</mml:mi></mml:msub></mml:math></inline-formula>, each component consisting of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM252"><mml:mi>&#x03C3;</mml:mi><mml:mo>+</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>w</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo><mml:mo>+</mml:mo><mml:mo fence="false" stretchy="false">&#x2308;</mml:mo><mml:msub><mml:mi>log</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2061;</mml:mo><mml:mi>n</mml:mi><mml:mo fence="false" stretchy="false">&#x2309;</mml:mo><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> bits. <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM253"><mml:mrow><mml:mo>{</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item>
<list-item><label>4.</label>
<p>For each <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM254"><mml:mi>i</mml:mi></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM255"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2264;</mml:mo><mml:mi>&#x03C9;</mml:mi></mml:math></inline-formula>, the parties compute <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM256"><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo lspace="thickmathspace" rspace="thickmathspace">mod</mml:mo><mml:mi>N</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo></mml:math></inline-formula>.</p></list-item>
</list>This reduces the number of decryptions by a factor <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM257"><mml:mi>&#x03C9;</mml:mi></mml:math></inline-formula>, at the cost of some extra multiplications that combined are comparable to one decryption effort. To further reduce the number of secure additions each party could pack <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM258"><mml:mi>&#x03C9;</mml:mi></mml:math></inline-formula> random numbers before encrypting them when precomputing <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM259"><mml:mo stretchy="false">(</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>R</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo fence="false" stretchy="false">&#x27E8;</mml:mo><mml:mi>r</mml:mi><mml:mo fence="false" stretchy="false">&#x27E9;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> pairs [see <xref ref-type="sec" rid="s3f">Section 3.6</xref>], which also reduces the communication effort.</p>
</sec>
</sec>
<sec id="s5"><label>5</label><title>Evaluation</title>
<sec id="s5a"><label>5.1</label><title>Security</title>
<p>Because topic sampling is performed in a secure, but joint way, the parties learn the total number of words in all documents of a single party. However, nobody learns the sampling probabilities, and only the document holder learns the new topics (of the words in his documents). Our solution is secure in the semi-honest model, i.e., parties are expected to exactly follow the protocol steps, but are allowed to compute with any data that is received during execution in an attempt to gain additional insights in other parties&#x2019; data.</p>
<p>As we use standard building blocks, such as secure comparison and random number generation, of the MPyC platform, which is known to be secure in the semi-honest model, our computations with secret-sharings are secure too. Similarly, Paillier is known to be semantically secure, and since we use threshold decryption, encrypted information will never fall in strange hands.</p>
<p>Therefore, we only need to investigate the conversions from encryptions to secret-sharings, as described in <xref ref-type="sec" rid="s3f">Section 3.6</xref>. Because the numbers <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM260"><mml:mi>R</mml:mi></mml:math></inline-formula> contain <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM261"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> more bits than the weights, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM262"><mml:mi>&#x03C3;</mml:mi></mml:math></inline-formula> is the statistical security parameter, we know that the sum <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM263"><mml:mi>w</mml:mi><mml:mo>+</mml:mo><mml:mi>R</mml:mi></mml:math></inline-formula> is statistically indistinguishable from a large random number, and can be safely revealed. Furthermore, as each party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM264"><mml:mi>i</mml:mi></mml:math></inline-formula> generates its own <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM265"><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM266"><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>, the sums <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM267"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:msub><mml:mi>R</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM268"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:msub><mml:mi>r</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> can be considered as secret random numbers.</p>
</sec>
<sec id="s5b"><label>5.2</label><title>Implementation</title>
<p>We have implemented our secure LDA approach in Python 3.8. For the homomorphic encryption functionalities, we have used the Paillier implementation available in the TNO MPC Lab (<xref ref-type="bibr" rid="B15">15</xref>). This implementation is based on the distributed Paillier solution presented in (<xref ref-type="bibr" rid="B10">10</xref>). For the functionalities based on secret sharing, we have used the MPyC framework (<xref ref-type="bibr" rid="B16">16</xref>). This framework implements a number of functionalities based on Shamir secret sharing. We performed all of our experiments with three parties, but stress that our implementation also works for more parties.</p>
</sec>
<sec id="s5c"><label>5.3</label><title>Experimental setup</title>
<p>For our experiments, we used the Amazon reviews dataset presented by Ni, Li and McAuley (<xref ref-type="bibr" rid="B17">17</xref>). In total, this dataset consists of over 200 million reviews. However, we only used the first 150 entries. Furthermore, we split these 150 entries into three separate datasets of 50 documents for the three different parties. In total, this results in a vocabulary length of <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM269"><mml:mi>V</mml:mi><mml:mo>=</mml:mo><mml:mn>1492</mml:mn></mml:math></inline-formula> terms and a total number of 2,965 words in the distributed corpus. For the experiments, we used 5, 10, 20, 30, 40 and 50 documents per party. As the number of words is not the same for every document, we compared the number of words over all documents for the actual experiments, which is 16, 406, 873, 1549, 2,197 and 2,965 respectively. Furthermore, we chose the symmetric priors <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM270"><mml:mi>&#x03B1;</mml:mi><mml:mo>=</mml:mo><mml:mi>&#x03B2;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mi>K</mml:mi></mml:mfrac></mml:mrow></mml:math></inline-formula>. This corresponds to the default parameter choices in the scikit-learn implementation of LDA.</p>
<p>All experiments have been run on a single server running an Intel Broadwell CPU at 2.1&#x2009;GHz with 4 cores and 32&#x2009;GB RAM. The parties communicated via (local) HTTPS connections.</p>
</sec>
<sec id="s5d"><label>5.4</label><title>Performance</title>
<p>We evaluate the performance of our solution in terms of accuracy and runtime.</p>
<sec id="s5d1"><label>5.4.1</label><title>Accuracy</title>
<p>In order to evaluate the accuracy of our secure LDA solution, we compare its results to the results obtained when performing a regular LDA implementation without any encryption or secret sharing. We compare both using the <italic>perplexity</italic> metric. This metric is standard in language modelling and is defined as <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM271"><mml:munder><mml:mo>&#x220F;</mml:mo><mml:mi>m</mml:mi></mml:munder><mml:msubsup><mml:mi>p</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula>. Here, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM272"><mml:mi>N</mml:mi><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>m</mml:mi></mml:munder><mml:msub><mml:mi>N</mml:mi><mml:mi>m</mml:mi></mml:msub></mml:math></inline-formula> is the total number of words, and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM273"><mml:msub><mml:mi>p</mml:mi><mml:mi>m</mml:mi></mml:msub></mml:math></inline-formula> is the predictive likelihood of all words in document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM274"><mml:mi>m</mml:mi></mml:math></inline-formula> (<xref ref-type="bibr" rid="B4">4</xref>). Perplexity is an objective metric that essentially computes the geometric mean of the log-likelihood per word in a set of observed documents. Lower perplexity scores imply a model that describes the dataset better. We have implemented and compared three versions of LDA:
<list list-type="simple">
<list-item><label>&#x2013;</label>
<p><bold>Standard LDA:</bold> this is a standard implementation of LDA without the use of encryption and updating the matrices after each word topic generation.</p></list-item>
<list-item><label>&#x2013;</label>
<p><bold>Batching LDA:</bold> this version also does not use encryption, but implements a <italic>batched</italic> version of LDA, updating the matrices only once at the end of each pass through the entire corpus.</p></list-item>
<list-item><label>&#x2013;</label>
<p><bold>Secure LDA:</bold> this is the solution presented in this work. It implements a privacy-preserving batched version of the LDA algorithm.</p></list-item>
</list>By comparing the standard- and batching versions of LDA, we can measure the impact of the adaptation we made to the algorithm. By then comparing the batching- and the secure variants, we can furthermore measure the accuracy of our privacy-preserving solution.</p>
<p>We let all three variants run for 100 iterations with two topics and 50 documents per party, which results in a total of 2965 words distributed over the parties. The results of this experiment can be found in <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref>. We ran all versions for five times and present the average results.</p>
<fig id="F2" position="float"><label>Figure 2</label>
<caption><p>Perplexity traces of three LDA variants.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1610228-g002.tif"><alt-text content-type="machine-generated">Line graph showing perplexity versus iteration for three models: Standard LDA, Batching LDA, and Secure LDA. All models decrease perplexity over 100 iterations, with Standard LDA showing the steepest improvement, followed by Batching LDA and Secure LDA.</alt-text>
</graphic>
</fig>
<p>As can be seen, the standard version of LDA converges faster than the batching- and secure variants. Furthermore, we see that by updating the weights after every word, the standard version generates a slightly better model. However, the differences do not seem to be significant. Finally, we observe that the secure variant shows behaviour similar to the batched plaintext variant, which strongly suggests that the use of encryption and secret sharing does not reduce the accuracy of the algorithm.</p>
</sec>
<sec id="s5d2"><label>5.4.2</label><title>Runtime</title>
<p>To see the influence of the input size and the desired complexity of the model to train, we ran benchmarks varying both the total number of words in all the documents, and the number of topics to model. We separately measured the runtime of the pre-processing step for the ciphertext conversions and performing one iteration of the secure LDA algorithm. For all benchmarks, we used a 1024-bit Paillier key<xref ref-type="fn" rid="FN0002"><sup>2</sup></xref> for the homomorphic encryptions and a 64-bit field size for the Shamir secret shares. All parameter combinations have been tested five times and averaged.</p>
<p>First, we present the results for a varying number of topics for the preprocessing phase and the iteration phase in <xref ref-type="fig" rid="F3">Figures&#x00A0;3a,b</xref> respectively. As can be seen, the amount of work for the preprocessing phase is linear in both the number <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM275"><mml:mi>N</mml:mi></mml:math></inline-formula> of words and the number <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM276"><mml:mi>K</mml:mi></mml:math></inline-formula> of topics, which is as expected as the number of tuples required per iteration is <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM277"><mml:mi>N</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>K</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula>. For the iterations, the general trend for an increasing number of topics is also linear with slightly steeper increases from 2 to 3, 4 to 5 and 8 to 9. This is explained by the fact that for the secure drawing, the number of intervals is extended by dummies to reach a power of two (either <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM278"><mml:msup><mml:mn>2</mml:mn><mml:mn>1</mml:mn></mml:msup></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM279"><mml:msup><mml:mn>2</mml:mn><mml:mn>2</mml:mn></mml:msup></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM280"><mml:msup><mml:mn>2</mml:mn><mml:mn>3</mml:mn></mml:msup></mml:math></inline-formula>, or <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM281"><mml:msup><mml:mn>2</mml:mn><mml:mn>4</mml:mn></mml:msup></mml:math></inline-formula> in these experiments), which incurs an extra step in the binary search (see <xref ref-type="sec" rid="app3">Appendix A.3</xref> to avoid this). Other than that, the amount of work scales linearly in the number of topics.</p>
<p>Second, to see the influence of the input size, we also plotted the runtimes in <xref ref-type="fig" rid="F4">Figure 4</xref> against an increasing number of words over all parties. As expected, the preprocessing phase again shows a linear increase in the number of words. However, the runtime of one iteration seems to grow slightly faster than linear which might seem surprising at first as the algorithm description does not suggest exponential increase as the number of words grows. This behaviour is explained by the way we batch conversions in <xref ref-type="sec" rid="s4b">Section 4.2</xref>. Namely, a fixed number of weights can be converted at once, depending on the size of the Paillier modulus. As long as the number of conversions that need to be performed fits in the same number of decryptions, the runtime of an iteration grows linearly. However, if more decryptions are required in this step, the increase in runtime grows faster.</p>
<fig id="F3" position="float"><label>Figure 3</label>
<caption><p>Benchmark of secure LDA in the number of topics. <bold>(a)</bold> Preprocessing. <bold>(b)</bold> Iteration.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1610228-g003.tif"><alt-text content-type="machine-generated">Two line charts comparing runtime in seconds for preprocessing and iteration against the number of topics. Each chart includes lines for datasets with 166, 406, 873, 1549, 2197, and 2965 words. Both charts show increasing runtime with more topics, with larger datasets taking longer.</alt-text>
</graphic>
</fig>
<fig id="F4" position="float"><label>Figure 4</label>
<caption><p>Benchmark results of secure LDA in the number of words. <bold>(a)</bold> Preprocessing. <bold>(b)</bold> Iteration.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1610228-g004.tif"><alt-text content-type="machine-generated">Two line graphs compare runtimes for preprocessing and iteration based on the number of words for two to nine topics. Runtimes increase with more topics and words, with separate scales for preprocessing (up to 160 seconds) and iteration (up to 800 seconds). Each line represents a different topic count.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s5e"><label>5.5</label><title>Comparison to prior work</title>
<p>As explained in <xref ref-type="sec" rid="s1b">Section 1.2</xref>, there are three works that also consider decentralized, privacy-preserving LDA. In <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>, we highlight the most important differences between our works and these related works. Due to the lack of comparable runtime measurements in these works it is hard to compare our work in that regard. Instead, we turn to a conceptual comparison.</p>
<p>In terms of accuracy, it is unclear how the altered algorithm of (<xref ref-type="bibr" rid="B8">8</xref>) impacts the accuracy exactly since they do not provide metrics such as perplexity. We do know that their convergence notion influences the resulting model accuracy to some extend. Furthermore, they leak the probability distributions for the topics in every round, which is a privacy risk as this reveals information about other parties&#x2019; data. Our solution keeps <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM282"><mml:mo movablelimits="true" form="prefix">Pr</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> secret throughout the entire protocol. Furthermore, they do not provide a security argument for their solution, which we do.</p>
<p>Due to the use of differential privacy, (<xref ref-type="bibr" rid="B7">7</xref>) is not able to match the accuracy of non-private LDA like we are able to do using MPC. Furthermore, this is a weaker security guarantee and might still leak some (statistical) information about the data of the other parties. This solution is, however, faster than our solution.</p>
<p>Finally, in (<xref ref-type="bibr" rid="B6">6</xref>) an approach is used where statistical information about the documents of the parties is shared in every round. This way, they are able to learn models with high accuracy and obtain a high performance at the cost of very low security guarantees as this essentially comes down to sharing your document-topic matrix.</p>
<p>All in all, our solution is very secure and accurate, at the cost of a lower performance. However, our solution scales linearly in both the number of words and the number of topics, which makes it scalable in practice.</p>
</sec>
</sec>
<sec id="s6" sec-type="conclusions"><label>6</label><title>Conclusions</title>
<p>In this work, we have presented and evaluated a fundamentally new approach to securely perform an LDA algorithm on a set of documents distributed amongst several, untrusting parties. Compared to earlier solutions, our solution provides stronger secrecy as we keep almost <italic>everything</italic> secret, including the topic weights. The only thing leaked in our solution is the total number of words over all documents of a party. Furthermore, we minimize the risk of leakage as the data is protected using cryptographic assumptions instead of statistical techniques like differential privacy, which might accidentally still leak some information. Furthermore, we show that the accuracy of our approach is similar to non-secure variants of the LDA algorithm.</p>
<p>Finally, we show that our solution scales nearly linear in the number of topics and the number of words. All in all, this makes it an attractive solution in practice, even for larger datasets.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="data-availability"><title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/Supplementary Material, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s9" sec-type="author-contributions"><title>Author contributions</title>
<p>TV: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. VD: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. MM: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. BK: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft.</p>
</sec>
<sec id="s10" sec-type="funding-information"><title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. The research was performed within TNO&#x2019;s Appl.AI program.</p>
</sec>
<sec id="s11" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s12" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<fn-group>
<fn id="FN0001"><p><sup>1</sup>Term refers to the element of a vocabulary, and word refers to the element of a document. A term has a particular meaning and can be instantiated by several words.</p></fn>
<fn id="FN0002"><p><sup>2</sup>From a security perspective, a 2048-bit key would have been preferable, but our primal goal was to investigate input scalability.</p></fn>
</fn-group>
<sec id="s13" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rijcken</surname><given-names>E</given-names></name><name><surname>Kaymak</surname><given-names>U</given-names></name><name><surname>Scheepers</surname><given-names>F</given-names></name><name><surname>Mosteiro</surname><given-names>P</given-names></name><name><surname>Zervanou</surname><given-names>K</given-names></name><name><surname>Spruit</surname><given-names>M</given-names></name></person-group>. <article-title>Topic modeling for interpretable text classification from EHRs</article-title>. <source>Front Big Data</source>. (<year>2022</year>) <volume>5</volume>:<fpage>846930</fpage>. <pub-id pub-id-type="doi">10.3389/fdata.2022.846930</pub-id><pub-id pub-id-type="pmid">35600326</pub-id></citation></ref>
<ref id="B2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Noble</surname><given-names>PJM</given-names></name><name><surname>Appleton</surname><given-names>C</given-names></name><name><surname>Radford</surname><given-names>AD</given-names></name><name><surname>Nenadic</surname><given-names>G</given-names></name></person-group>. <article-title>Using topic modelling for unsupervised annotation of electronic health records to identify an outbreak of disease in UK dogs</article-title>. <source>PLoS One</source>. (<year>2021</year>) <volume>16</volume>(<issue>12</issue>):<fpage>e0260402</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0260402</pub-id><pub-id pub-id-type="pmid">34882714</pub-id></citation></ref>
<ref id="B3"><label>3.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Harrando</surname><given-names>I</given-names></name><name><surname>Lisena</surname><given-names>P</given-names></name><name><surname>Troncy</surname><given-names>R</given-names></name></person-group>. <article-title>Apples to apples: a systematic evaluation of topic models</article-title>. In: <comment><italic>RANLP</italic>. INCOMA Ltd. (2021). p. 483&#x2013;93</comment>.</citation></ref>
<ref id="B4"><label>4.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Heinrich</surname><given-names>G</given-names></name></person-group>. <source>Parameter estimation for text analysis</source> (Tech. Rep.). Citeseer (<year>2005</year>).</citation></ref>
<ref id="B5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname><given-names>F</given-names></name><name><surname>Ren</surname><given-names>X</given-names></name><name><surname>Yang</surname><given-names>S</given-names></name><name><surname>Han</surname><given-names>Q</given-names></name><name><surname>Zhao</surname><given-names>P</given-names></name><name><surname>Yang</surname><given-names>X</given-names></name></person-group>. <article-title>Latent dirichlet allocation model training with differential privacy</article-title>. <source>IEEE Trans Inf Forensics Secur</source>. (<year>2021</year>) <volume>16</volume>:<fpage>1290</fpage>&#x2013;<lpage>305</lpage>. <pub-id pub-id-type="doi">10.1109/TIFS.2020.3032021</pub-id></citation></ref>
<ref id="B6"><label>6.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Colin</surname><given-names>I</given-names></name><name><surname>Dupuy</surname><given-names>C</given-names></name></person-group>. <article-title>Decentralized topic modelling with latent dirichlet allocation</article-title>. <comment><italic>CoRR</italic> [Preprint]. <italic>abs/1610.01417</italic> (2016)</comment>.</citation></ref>
<ref id="B7"><label>7.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>Y</given-names></name><name><surname>Tong</surname><given-names>Y</given-names></name><name><surname>Shi</surname><given-names>D</given-names></name></person-group>. <article-title>Federated latent dirichlet allocation: a local differential privacy based framework</article-title>. <comment>In: <italic>AAAI</italic>. AAAI Press (2020). p. 6283&#x2013;90</comment>.</citation></ref>
<ref id="B8"><label>8.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Yang</surname><given-names>B</given-names></name><name><surname>Nakagawa</surname><given-names>H</given-names></name></person-group>. <article-title>Computation of ratios of secure summations in multi-party privacy-preserving latent dirichlet allocation</article-title>. <comment>In: <italic>PAKDD (1)</italic>. Springer (2010). p. 189&#x2013;97. Lecture Notes in Computer Science; vol. 6118</comment>.</citation></ref>
<ref id="B9"><label>9.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Paillier</surname><given-names>P</given-names></name></person-group>. <article-title>Public-key cryptosystems based on composite degree residuosity classes</article-title>. <comment>In: <italic>EUROCRYPT</italic>. Springer (1999). p. 223&#x2013;38. Lecture Notes in Computer Science; vol. 1592</comment>.</citation></ref>
<ref id="B10"><label>10.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Veugen</surname><given-names>T</given-names></name><name><surname>Attema</surname><given-names>T</given-names></name><name><surname>Spini</surname><given-names>G</given-names></name></person-group>. <article-title>An implementation of the paillier crypto system with threshold decryption without a trusted dealer</article-title>. <comment><italic>IACR Cryptol. ePrint Arch.</italic> [Preprint]. (2019). p. 1136</comment>.</citation></ref>
<ref id="B11"><label>11.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Chaum</surname><given-names>D</given-names></name><name><surname>Cr&#x00E9;peau</surname><given-names>C</given-names></name><name><surname>Damg&#x00E5;rd</surname><given-names>I</given-names></name></person-group>. <article-title>Multiparty unconditionally secure protocols (extended abstract)</article-title>. <comment>In: <italic>STOC</italic>. ACM (1988). p. 11&#x2013;9</comment>.</citation></ref>
<ref id="B12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shamir</surname><given-names>A</given-names></name></person-group>. <article-title>How to share a secret</article-title>. <source>Commun ACM</source>. (<year>1979</year>) <volume>22</volume>(<issue>11</issue>):<fpage>612</fpage>&#x2013;<lpage>3</lpage>. <pub-id pub-id-type="doi">10.1145/359168.359176</pub-id></citation></ref>
<ref id="B13"><label>13.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Bar-Ilan</surname><given-names>J</given-names></name><name><surname>Beaver</surname><given-names>D</given-names></name></person-group>. <article-title>Non-cryptographic fault-tolerant computing in constant number of rounds of interaction</article-title>. <comment>In: <italic>PODC</italic>. ACM (1989). p. 201&#x2013;9</comment>.</citation></ref>
<ref id="B14"><label>14.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>de Hoogh</surname><given-names>S</given-names></name><name><surname>van Tilborg</surname><given-names>H</given-names></name></person-group>. <article-title><italic>Design of large scale applications of secure multiparty computation: secure linear programming</italic></article-title> <comment>(Ph.D dissertation). Eindhoven, Netherlands: Department of Mathematics and Computer Science, Technische Universiteit Eindhoven (2012)</comment>.</citation></ref>
<ref id="B15"><label>15.</label><citation citation-type="other"><collab>TNO MPC Lab</collab>. <article-title>Paillier encryption scheme implementation</article-title>. <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://github.com/TNO-MPC/encryption_schemes.paillier">https://github.com/TNO-MPC/encryption&#x005F;schemes.paillier</ext-link> <comment>(Accessed July 15, 2025)</comment>.</citation></ref>
<ref id="B16"><label>16.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Schoenmakers</surname><given-names>B</given-names></name></person-group>. <article-title>Secure multiparty computation in python</article-title>. <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.win.tue.nl/ berry/mpyc/">https://www.win.tue.nl/ berry/mpyc/</ext-link> <comment>(Accessed May 2018)</comment>.</citation></ref>
<ref id="B17"><label>17.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ni</surname><given-names>J</given-names></name><name><surname>Li</surname><given-names>J</given-names></name><name><surname>McAuley</surname><given-names>JJ</given-names></name></person-group>. <article-title>Justifying recommendations using distantly-labeled reviews and fine-grained aspects</article-title>. <comment>In: <italic>EMNLP/IJCNLP (1)</italic>. Association for Computational Linguistics (2019). p. 188&#x2013;97</comment>.</citation></ref>
<ref id="B18"><label>18.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Damg&#x00E5;rd</surname><given-names>I</given-names></name><name><surname>Meldgaard</surname><given-names>S</given-names></name><name><surname>Nielsen</surname><given-names>JB</given-names></name></person-group>. <article-title>Perfectly secure oblivious RAM without random oracles</article-title>. <comment>In: <italic>TCC</italic>. Springer (2011). p. 144&#x2013;63. Lecture Notes in Computer Science; vol. 6597</comment>.</citation></ref>
<ref id="B19"><label>19.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ostrovsky</surname><given-names>R</given-names></name><name><surname>Shoup</surname><given-names>V</given-names></name></person-group>. <article-title>Private information storage (extended abstract)</article-title>. <comment>In: <italic>STOC</italic>. ACM (1997). p. 294&#x2013;303</comment>.</citation></ref></ref-list>
<app-group><app id="app3"><title>Appendix A. Optimizations</title>
<p>We describe a few optional optimisations that were not implemented due to time constraints.</p>
<sec id="app3a"><title>A.1 Use of oblivious RAM</title>
<p>Another promising solution for securely storing and accessing the topic-term matrix is by oblivious RAM (<xref ref-type="bibr" rid="B18">18</xref>, <xref ref-type="bibr" rid="B19">19</xref>). In the semi-honest model, a more efficient solution is to store the matrix entries somewhere, e.g. in the cloud, in an homomorphically encrypted way. Each party can query and modify entries, without the other parties noticing it.</p>
</sec>
<sec id="app3b"><title>A.2 Avoid indicator vectors</title>
<p>To avoid generating indicator vectors and computing a secure inner product for each new threshold, we could decide to postpone the conversions. Given the encrypted weights, party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM283"><mml:mi>p</mml:mi></mml:math></inline-formula> can first add the proper weights to determine the next threshold. Only then the encrypted threshold is converted to a secret-sharing. This does not increase the number of conversions. The transforming of fractional to integer weights might become more intensive though.</p>
<p>Given our parallel approach of combining all drawings of one party, we could compute all weights as follows:
<list list-type="simple">
<list-item><label>1.</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM284"><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>N</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x220F;</mml:mo><mml:mrow><mml:mi>&#x03C4;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>V</mml:mi></mml:munderover><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03C4;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>&#x03C4;</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula></p></list-item>
<list-item><label>2.</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM285"><mml:mo stretchy="false">[</mml:mo><mml:mi>N</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:munder><mml:mo>&#x220F;</mml:mo><mml:mi>k</mml:mi></mml:munder><mml:msub><mml:mi>N</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> &#x007B;secure product&#x007D;</p></list-item>
<list-item><label>3.</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM286"><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>N</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:msubsup><mml:mi>N</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> &#x007B;secure product and secure inverse&#x007D;</p></list-item>
</list>Given <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM287"><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM288"><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>&#x221D;</mml:mo><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:msub><mml:mi>N</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mfrac></mml:mrow></mml:math></inline-formula>, the weights for each term <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM289"><mml:mi>t</mml:mi></mml:math></inline-formula> can be computed as <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM290"><mml:mo stretchy="false">[</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:msub><mml:mi>&#x03C9;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> with one secure product. Using the local matrix <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM291"><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, these weights can be adjusted locally to document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM292"><mml:mi>m</mml:mi></mml:math></inline-formula>, to cope with the factor <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM293"><mml:mrow><mml:mfrac><mml:mrow><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x03BA;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>&#x03BA;</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:math></inline-formula>. This adjustment comes down to the exponentiation <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM294"><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mo>[</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>t</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:msubsup><mml:mi>v</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup></mml:mrow></mml:msup></mml:math></inline-formula>, where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM295"><mml:msubsup><mml:mi>v</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>k</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x03BA;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>&#x03BA;</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo>&#x22C5;</mml:mo><mml:munderover><mml:mo>&#x220F;</mml:mo><mml:mrow><mml:mi>&#x03BC;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>M</mml:mi></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x03BA;</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:mo>(</mml:mo><mml:msubsup><mml:mi>n</mml:mi><mml:mrow><mml:mi>&#x03BC;</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>&#x03BA;</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula>.</p>
<p>To generate a secret random number <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM296"><mml:mi>r</mml:mi></mml:math></inline-formula>, given term <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM297"><mml:mi>t</mml:mi></mml:math></inline-formula> and document <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM298"><mml:mi>m</mml:mi></mml:math></inline-formula>, the encryption <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM299"><mml:munder><mml:mo>&#x220F;</mml:mo><mml:mi>k</mml:mi></mml:munder><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> needs to be converted to a secret-sharing. During each iteration step of the binary search, the proper weights <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM300"><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mrow><mml:mover><mml:mi>w</mml:mi><mml:mo stretchy="false">&#x007E;</mml:mo></mml:mover></mml:mrow><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> can be accumulated by party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM301"><mml:mi>p</mml:mi></mml:math></inline-formula> to obtain the new threshold, which can then be converted to a secret-sharing for the secure comparison.</p>
</sec>
<sec id="app3c"><title>A.3 Number of topics not a power of two</title>
<p>If the number <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM302"><mml:mi>K</mml:mi></mml:math></inline-formula> of topics is a power of two, the binary search can be easily performed. If <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM303"><mml:msup><mml:mn>2</mml:mn><mml:mrow><mml:mi>&#x03BB;</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mo>&#x003C;</mml:mo><mml:mi>K</mml:mi><mml:mo>&#x003C;</mml:mo><mml:msup><mml:mn>2</mml:mn><mml:mrow><mml:mi>&#x03BB;</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula>, then the number of iterations (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM304"><mml:mi>&#x03BB;</mml:mi></mml:math></inline-formula> or <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM305"><mml:mi>&#x03BB;</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>) of the binary search would disturb the uniform distribution of the randomly chosen topic. An easy way to fix this is to add <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM306"><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup><mml:mo>&#x2212;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula> dummy values, such that the number of iterations is always <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM307"><mml:mi>&#x03BB;</mml:mi></mml:math></inline-formula>. However, this takes more secure comparisons than strictly necessary. We describe a way to avoid these additional secure comparisons without leaking information.</p>
<p>
<list list-type="simple">
<list-item><label>1.</label>
<p>Party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM308"><mml:mi>p</mml:mi></mml:math></inline-formula> randomly chooses <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM309"><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup><mml:mo>&#x2212;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula> different dummy indices <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM310"><mml:msub><mml:mi>d</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mo fence="false" stretchy="false">{</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:mspace width="thickmathspace" /><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup><mml:mo fence="false" stretchy="false">}</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM311"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2264;</mml:mo><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup><mml:mo>&#x2212;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula>, such that <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM312"><mml:msub><mml:mi>d</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>&#x003C;</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x003C;</mml:mo><mml:mo>&#x2026;</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup><mml:mo>&#x2212;</mml:mo><mml:mi>K</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. See argumentation below how this should be done.</p></list-item>
<list-item><label>2.</label>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM313"><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>; <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM314"><mml:mi>u</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> &#x007B;Initialise counters&#x007D;</p></list-item>
<list-item><label>3.</label>
<p>For <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM315"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> to <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM316"><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup></mml:math></inline-formula> do: &#x007B;Compute new weights <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM317"><mml:msub><mml:mi>v</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>&#x007D;</p>
<p>If <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM318"><mml:msub><mml:mi>d</mml:mi><mml:mi>u</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>i</mml:mi></mml:math></inline-formula> then <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM319"><mml:msub><mml:mi>v</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:math></inline-formula>; <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM320"><mml:mi>u</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mi>u</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> else <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM321"><mml:msub><mml:mi>v</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:math></inline-formula>; <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM322"><mml:mi>k</mml:mi><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mi>k</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula></p></list-item>
<list-item><label>4.</label>
<p>The parties perform a binary search with weights <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM323"><mml:msub><mml:mi>v</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM324"><mml:mn>1</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#x2264;</mml:mo><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup></mml:math></inline-formula>:
<list list-type="simple">
<list-item><label>&#x2013;</label>
<p>If there is only one non-dummy index remaining, party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM325"><mml:mi>p</mml:mi></mml:math></inline-formula> ends the binary search.</p></list-item>
<list-item><label>&#x2013;</label>
<p>In each iteration, party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM326"><mml:mi>p</mml:mi></mml:math></inline-formula> constructs an indicator vector of length <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM327"><mml:mi>K</mml:mi></mml:math></inline-formula> (ignoring the dummy weights).</p></list-item>
</list></p></list-item>
</list></p>
<p>We need <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM328"><mml:mi>K</mml:mi></mml:math></inline-formula> to be even to avoid information leakage. E.g., for <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM329"><mml:mi>K</mml:mi><mml:mo>=</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula> the index <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM330"><mml:mn>2</mml:mn></mml:math></inline-formula> will never be selected after one iteration, irrespective of the chosen dummy index. This means that party <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM331"><mml:mi>p</mml:mi></mml:math></inline-formula> has to first choose one special dummy in case <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM332"><mml:mi>K</mml:mi></mml:math></inline-formula> is odd that should not lead to skipping iterations (in Step 4). The question remains how the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM333"><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup><mml:mo>&#x2212;</mml:mo><mml:mi>K</mml:mi></mml:math></inline-formula> random dummy indices (in Step 1) should be chosen, assuming <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM334"><mml:mi>K</mml:mi></mml:math></inline-formula> is even.</p>
<p>We order the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM335"><mml:mi>K</mml:mi></mml:math></inline-formula> indices in <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM336"><mml:mi>K</mml:mi><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:math></inline-formula> pairs of consecutive numbers. We choose <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM337"><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mn>2</mml:mn><mml:mi>&#x03BB;</mml:mi></mml:msup><mml:mo>&#x2212;</mml:mo><mml:mi>K</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:math></inline-formula> random positions out of these <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM338"><mml:mi>K</mml:mi><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:math></inline-formula> pairs. We add two dummies to each chosen pair, just before each element of the pair. In this way, each of the <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM339"><mml:mi>K</mml:mi></mml:math></inline-formula> indices will have an identical probability of being chosen after <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM340"><mml:mi>&#x03BB;</mml:mi></mml:math></inline-formula> (no dummies in the pair) or <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM341"><mml:mi>&#x03BB;</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> (dummies in the pair) rounds.</p>
</sec></app>
</app-group>
</back>
</article>