<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Comput. Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Computer Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Comput. Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-9898</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fcomp.2026.1752739</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Privacy-preserving process data generation based on dual-discriminator conditional generative adversarial networks</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Guo</surname> <given-names>Yi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<uri xlink:href="https://loop.frontiersin.org/people/3283460"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Li</surname> <given-names>Zhong</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>School of Computer Science and Technology, Tongji University, Key Laboratory of Embedded System and Service Computing, Ministry of Education</institution>, <city>Shanghai</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>School of Information and Intelligent Science, Donghua University</institution>, <city>Shanghai</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Zhong Li, <email xlink:href="mailto:lizhong@dhu.edu.cn">lizhong@dhu.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-18">
<day>18</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>8</volume>
<elocation-id>1752739</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>09</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Guo and Li.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Guo and Li</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-18">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>The growing adoption of data-centric business analytics demands effective safeguarding techniques for processing data that contains procedural details. Although Petri net-driven process mining successfully extracts operational knowledge from activity sequences, current protection approaches often diminish analytical value. Therefore, preserving process-related information while ensuring privacy remains a critical challenge.</p>
</sec>
<sec>
<title>Methods</title>
<p>This study presents a Privacy-Preserving Process Data Generation method based on Dual-Discriminator Conditional Generative Adversarial Networks (P<sup>3</sup>DGAN) to generate privacy-preserving process data. To avoid mode collapse during model training, P<sup>3</sup>DGAN employs two discriminators that separately model the dataflow and workflow characteristics of process data. Furthermore, we propose a game-optimization strategy based on Petri net theory to capture the global distribution characteristics of process data. Furthermore, we introduce a workflow-level privacy metric based on the Euclidean distance between trace variants (ED-TV) to support comprehensive risk assessment.</p>
</sec>
<sec>
<title>Results</title>
<p>Experimental results on four real-world process datasets demonstrate that our method can generate high-quality process data with strong privacy protection compared with competitive peers.</p>
</sec>
<sec>
<title>Discussion</title>
<p>The proposed framework achieves an effective multi-dimensional privacy-utility trade-off, demonstrating its potential for practical applications in privacy-sensitive domains such as healthcare, banking, and manufacturing.</p>
</sec></abstract>
<kwd-group>
<kwd>differential privacy</kwd>
<kwd>dual-discriminator</kwd>
<kwd>generative adversarial networks</kwd>
<kwd>petri nets</kwd>
<kwd>privacy protection</kwd>
<kwd>process data</kwd>
<kwd>workflow analysis</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported in part by the National Natural Science Foundation of China under Grant 62373094, in part by the Natural Science Foundation of Shanghai under Grant 23ZR1401000, in part by the Interdisciplinary Frontier Innovation Team Development Special Fund of Donghua University, and in part by Donghua University 2024 Cultivation Project of Discipline Innovation under Grant xkcx-202406.</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="8"/>
<equation-count count="25"/>
<ref-count count="41"/>
<page-count count="19"/>
<word-count count="12174"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computer Security</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Growing privacy concerns in process mining necessitate privacy-protection methods for process data that model complex business processes. Although Petri net-driven process mining derives operational knowledge directly from sequences of activities, protection mechanisms often limit the value of analysis (<xref ref-type="bibr" rid="B27">van der Aalst, 2012</xref>). Traditional anonymization methods are insufficient because they do not account for the sequential dependencies that reveal institutional process structures (<xref ref-type="bibr" rid="B3">Brzychczy et al., 2024</xref>; <xref ref-type="bibr" rid="B7">Elkoumy et al., 2021</xref>).</p>
<p>Process event logs exhibit unique characteristics not found in standard tabular formats (<xref ref-type="bibr" rid="B5">Chundawat et al., 2024</xref>; <xref ref-type="bibr" rid="B30">Wang et al., 2024a</xref>). Rows contain segments denoting individual events at the field level (time, resource, identifiers) and their position in a session of execution within long activity sequences (<xref ref-type="bibr" rid="B2">Augusto et al., 2018</xref>). <xref ref-type="fig" rid="F1">Figure 1</xref> demonstrates this property: individual case executions are recorded in different rows, but their consecutive order forms workflow information. The release of unedited logs would jeopardize not only identity markers but also confidential information about the process. This twofold characteristic requires tailored privacy-preserving techniques that can address both dataflow and workflow information leakage (<xref ref-type="bibr" rid="B13">Gursoy et al., 2016</xref>).</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Process data: case ID, activity, and timestamp need to be identified.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0001.tif">
<alt-text content-type="machine-generated">Table diagram illustrating an event log with labeled columns for Case ID, Activity, Timestamps, Product, and Service Type. Colored columns highlight Case ID in green, Activity in orange, and Timestamps in yellow-green, with arrows and dashed lines indicating relationships to Instances and Events.</alt-text>
</graphic>
</fig>
<p>Existing protection methods for process data can be categorized into three groups. Anonymization methods (<xref ref-type="bibr" rid="B38">Ye et al., 2024</xref>; <xref ref-type="bibr" rid="B8">Fahrenkrog-Petersen et al., 2019</xref>; <xref ref-type="bibr" rid="B22">Rafiei et al., 2020</xref>; <xref ref-type="bibr" rid="B24">Rott et al., 2024</xref>) mask identifying attributes using techniques such as field masking or category generalization. Encryption-based schemes (<xref ref-type="bibr" rid="B26">Tillem et al., 2016</xref>; <xref ref-type="bibr" rid="B21">Rafiei et al., 2018</xref>) enable joint processing of encrypted data. Differential privacy methods (<xref ref-type="bibr" rid="B9">Fahrenkrog-Petersen et al., 2020</xref>; <xref ref-type="bibr" rid="B17">Mannhardt et al., 2019</xref>) introduce controlled noise while adhering to strict privacy guarantees.</p>
<p>However, existing methods for protecting process data privacy still suffer from the following two drawbacks:</p>
<p><bold>(1) Inadequate behavioral retention</bold>. Most current methods provide data privacy protection only from the dataflow perspective. The main drawback of anonymization-based methods is the potential loss of analytical value due to overgeneralization or information suppression in the data flow. For example, excessive anonymization may obscure critical insights into disease transmission patterns when analyzing medical process data (<xref ref-type="bibr" rid="B24">Rott et al., 2024</xref>). Although encryption-based methods provide a high level of security, the complexity and potential information loss introduced during data preprocessing or transformation may limit the ability to analyze process data. For instance, in time-series analysis, encryption prevents analysis tools from identifying intrinsic patterns (workflow patterns) within process data (<xref ref-type="bibr" rid="B26">Tillem et al., 2016</xref>; <xref ref-type="bibr" rid="B21">Rafiei et al., 2018</xref>). Furthermore, differential privacy-based methods inject noise into the dataflow, which reduces the accuracy and utility of process data (<xref ref-type="bibr" rid="B9">Fahrenkrog-Petersen et al., 2020</xref>; <xref ref-type="bibr" rid="B17">Mannhardt et al., 2019</xref>).</p>
<p><bold>(2) Limited risk evaluation</bold>. Existing process data privacy risk assessment methods cannot effectively capture the sequence characteristics (workflow features). This limitation is particularly evident in anonymization techniques (<xref ref-type="bibr" rid="B38">Ye et al., 2024</xref>; <xref ref-type="bibr" rid="B8">Fahrenkrog-Petersen et al., 2019</xref>; <xref ref-type="bibr" rid="B22">Rafiei et al., 2020</xref>; <xref ref-type="bibr" rid="B24">Rott et al., 2024</xref>), where researchers struggle to accurately quantify the extent to which anonymization operations impose on the original data structure, resulting in either privacy over-protection that diminishes data utility (<xref ref-type="bibr" rid="B13">Gursoy et al., 2016</xref>) or inadequate anonymization that increases re-identification risk (<xref ref-type="bibr" rid="B38">Ye et al., 2024</xref>). In medical process mining, excessive anonymization may obscure critical patterns in patient treatment paths, whereas insufficient anonymization could expose patient privacy. Furthermore, the absence of accurate distance measures makes it impossible to determine the optimal trade-off between privacy protection and data utility; consequently, researchers have difficulty selecting suitable anonymization parameters.</p>
<p>Generative methods offer an alternative approach (<xref ref-type="bibr" rid="B31">Wang et al., 2024b</xref>; <xref ref-type="bibr" rid="B12">Gui et al., 2021</xref>). Instead of modifying real records, they create synthetic records that are statistically similar to the real records but contain no real users. Recent advances in visual synthesis (<xref ref-type="bibr" rid="B34">Xie et al., 2018</xref>; <xref ref-type="bibr" rid="B4">Chen et al., 2020</xref>; <xref ref-type="bibr" rid="B14">Hu et al., 2022</xref>) and structured generation (<xref ref-type="bibr" rid="B41">Zhao et al., 2021</xref>; <xref ref-type="bibr" rid="B20">Qiao et al., 2023</xref>) suggest feasibility for process records (<xref ref-type="bibr" rid="B35">Xu et al., 2019</xref>; <xref ref-type="bibr" rid="B40">Zhao et al., 2024</xref>; <xref ref-type="bibr" rid="B16">Lu et al., 2023</xref>; <xref ref-type="bibr" rid="B6">Dung and Huynh, 2022</xref>). However, the straightforward use of standard generative models introduces new challenges. Mode collapse, which limits the diversity of generated outputs, has been shown to undermine the representation of rare, but operationally important execution traces (<xref ref-type="bibr" rid="B12">Gui et al., 2021</xref>; <xref ref-type="bibr" rid="B32">Wang et al., 2022</xref>). Long procedural dependencies are hard to capture with simple models (<xref ref-type="bibr" rid="B10">Franzoi et al., 2025</xref>). Evaluation metrics designed for tabular arrays do not account for structural correctness and thus do not measure workflow validity (<xref ref-type="bibr" rid="B15">Liu et al., 2021</xref>).</p>
<p>In this study, we propose a Privacy-Preserving Process Data Generation method based on a Dual-Discriminator Conditional Generative Adversarial Network (P<sup>3</sup>DGAN). P<sup>3</sup>DGAN incorporates a generator with dual discriminators that model process data by integrating dataflow (tabular data) and workflow (directly-follows relationships) perspectives. This dual-discriminator approach challenges the generator to produce diverse samples, mitigating mode collapse; even if one discriminator collapses, the other can still provide effective discrimination (<xref ref-type="bibr" rid="B32">Wang et al., 2022</xref>). In addition to directly-follows relationships, global structural knowledge is used by Petri net-based deadlock detection. In this adversarial formulation, a trade-off is made between privacy protection and data utility.</p>
<p>The evaluation is based on a combination of traditional measures of utility and a novel measure of workflow risk (<xref ref-type="bibr" rid="B25">Rozinat and van der Aalst, 2008</xref>). First, we propose a risk assessment method for trace variants based on Euclidean distance, which uses a distance-based metric (<xref ref-type="bibr" rid="B19">Pereira et al., 2024</xref>) and incorporates re-identification attacks (<xref ref-type="bibr" rid="B38">Ye et al., 2024</xref>) to assess the risk of synthetic data. Second, to evaluate data utility, we employ the table-evaluator (<xref ref-type="bibr" rid="B23">Rai and Sural, 2023</xref>) and the process mining method (<xref ref-type="bibr" rid="B1">Akhramovich et al., 2024</xref>). Finally, four real-world event datasets are used to demonstrate the outstanding performance of our model for privacy-preserving process data.</p>
<p>Our contributions are:</p>
<list list-type="bullet">
<list-item><p>Dual-discriminator architecture in process mining. We propose a generative adversarial network with dual-discriminator on dataflow and workflow within the process data. This is the first study to consider a minimax game for generating process data.</p></list-item>
<list-item><p>Petri net-based game optimization strategy. We introduce a deadlock condition loss to limit the policy space of the generator. This policy discards structurally invalid executions and leads to &#x0007E;10% better generalization than unconstrained baselines.</p></list-item>
<list-item><p>ED-TV: Novel workflow-level privacy metric. We propose a risk measure based on the Euclidean distance between trace variants. Our method is an excellent trade-off: less than 0.5% re-identification on two datasets with similarity scores of 0.729&#x02013;0.951 and F1-scores of 0.723&#x02013;0.836 across four real-life datasets.</p></list-item>
</list>
<p>The remainder of the paper is structured as follows. Section 2 describes related studies, including an overview of process data privacy protection methods. Section 3 provides preliminary information on generative adversarial networks. Section 4 proposes a privacy protection framework for processing data based on dual-discriminator generative adversarial networks. Section 5 designs a deadlock conditional loss game optimization strategy based on the previous privacy protection framework. Section 6 theoretically proves that our proposed method provides differential privacy guarantees. Section 7 introduces a method based on the Euclidean distance of trace variants to enhance the risk assessment of process data privacy protection. Our method is qualitatively and quantitatively compared with several state-of-the-art methods on publicly available datasets in Section 8. Section 9 concludes this study.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<p>Privacy protection for operational event records has prompted a range of technical solutions. This review categorizes prior research into three groups: anonymization-based, encryption-based, and differential privacy-based approaches. We then distinguish pseudonymization from synthetic data generation and explain our rationale for adopting generative adversarial networks.</p>
<sec>
<label>2.1</label>
<title>Anonymization-based privacy protection in process data</title>
<p>Anonymization techniques mask identifying details by transforming or removing fields.</p>
<p><bold>Pretsa</bold> (<xref ref-type="bibr" rid="B8">Fahrenkrog-Petersen et al., 2019</xref>): Fahrenkrog-Petersen and colleagues developed a prefix structure technique that achieves K-anonymity with T-similarity. The approach builds a prefix structure from activity sequences, incrementally broadens activities until groups exceed <italic>K</italic> sequences, and maintains &#x02265;<italic>T</italic> distinct sensitive attributes per group. However, extensive broadening significantly reduces usefulness, particularly for rare execution sequences.</p>
<p><bold>TLKC</bold> (<xref ref-type="bibr" rid="B22">Rafiei et al., 2020</xref>): Rafiei and colleagues augmented the LKC-privacy framework with accommodation for varied sequence representations (set, multiset, sequence, relative position). Individual representations maintain &#x02265;<italic>L</italic> different sensitive attributes across <italic>K</italic> comparable sequences. Unfortunately, no individual representation setup achieves an optimal balance between protection and utility across diverse datasets.</p>
</sec>
<sec>
<label>2.2</label>
<title>Encryption-based privacy protection in process data</title>
<p>Solutions for log encryption protect logs through cryptographic transformation while enabling limited analysis of the encrypted data.</p>
<p><bold>Privacy-preserving alpha algorithm</bold> (<xref ref-type="bibr" rid="B26">Tillem et al., 2016</xref>): Tillem and co-workers enhanced the alpha log workflow discovery technique with encryption methods. However, the need to preprocess the data complicates deployment. Furthermore, encryption prevents analysis tools from detecting temporal relationships that are important for reconstructing the workflow.</p>
<p><bold>Privacy infrastructure</bold> (<xref ref-type="bibr" rid="B21">Rafiei et al., 2018</xref>): Rafiei et al. proposed a series of secure distributed computations for sensitive process analytic queries.</p>
</sec>
<sec>
<label>2.3</label>
<title>Differential privacy-based privacy protection in process data</title>
<p><bold>PRIPEL</bold> (<xref ref-type="bibr" rid="B9">Fahrenkrog-Petersen et al., 2020</xref>): Fahrenkrog-Petersen added perturbation at the level of the workflow in case-based protection. It identifies active sequences, adds Laplace noise to their counts, filters out infrequent sequences, and reconstructs logs by sampling. This protocol achieves (&#x003B5;, &#x003B4;)-differential privacy at the level of sequences. However, it retains only the sequential features of relations, and field-level information remains exposed.</p>
<p><bold>DPGAN</bold> (<xref ref-type="bibr" rid="B34">Xie et al., 2018</xref>): Xie et al. incorporated differential privacy into discriminator gradient perturbation through adversarial training. Adding Gaussian noise after clipping gradients creates (&#x003B5;, &#x003B4;)-privacy using the moments accountant. However, this technique was developed for images and lacks a domain-specific process model. For a fair comparison, the DPGAN variant for event logs uses activity vectorization and shares the same architecture and parameters as P<sup>3</sup>DGAN.</p>
</sec>
<sec>
<label>2.4</label>
<title>Pseudonymization vs. data generation</title>
<p><bold>Pseudonymization</bold> replaces identifiers with pseudonyms (e.g., ID 123 &#x02192; ID XXX) but preserves the structure and content exactly. The data can be <italic>reversed</italic> with the use of re-identification tactics, and indirect identifiers are still vulnerable to linkage. GDPR considers pseudonymized data as &#x0201C;personal data&#x0201D; to which restrictions can be applied.</p>
<p><bold>P</bold><sup><bold>3</bold></sup><bold>DGAN (generation)</bold> generates entirely new records, statistically equivalent, yet structurally distinct from those of the sources. It is <italic>final</italic> in the sense that source records are never fetched by any procedure. The method provides robust protection by perturbing gradients and can be considered anonymous after appropriate validation.</p>
<p><bold>Key advantage</bold>: Sharing a synthesized dataset cannot endanger the source&#x00027;s privacy. This is in stark contrast to pseudonymization, in which the exposure of the &#x0201C;algorithm&#x0201D; compromises all source records.</p>
<p><bold>Use case comparison</bold>: Take companies that make their operating logs available to outside partners for collaborative analysis. The decryption keys must be shared for pseudonymization, posing a risk if they are leaked. P<sup>3</sup>DGAN creates synthetic records that statistically resemble real data, but no actual individuals exist in the synthetic dataset. This eliminates the need for key management and provides superior privacy protection.</p>
</sec>
<sec>
<label>2.5</label>
<title>GANs for privacy-preserving process data generation</title>
<p>A comparison with other frameworks of adversarial generative modeling is provided below:</p>
<p><bold>Autoencoders/VAEs</bold>: autoencoders, similar to vanilla AE, reconstruct inputs using encoder-decoder pairs. Encoders can also inadvertently memorize training samples, leading to privacy attacks. Reconstruction objectives encourage outputs to be close to training samples, thus limiting diversity.</p>
<p><bold>RNNs/LSTMs</bold>: RNNs/LSTMs maximize likelihood (next-element prediction), yielding &#x0201C;conservative&#x0201D; (average) sequences and low diversity. Temporal relations are often overfit in sequential modeling architectures.</p>
<p><bold>GANs (our approach)</bold>: generators never directly observe real records at any time, only gradient information from discriminators. Such architectural decoupling ensures privacy barriers. Adversarial objectives encourage exploration of the full data distribution rather than focusing on modes, thereby improving diversity. GANs allow flexible incorporation of constraints (e.g., a deadlock-aware loss) into the objective, whereas autoencoders require more substantial architectural modifications.</p>
<p><bold>Natural fit for differential privacy</bold>: gradient perturbation fits naturally into updates of the discriminator, offering formal guarantees. Alternating training naturally accommodates the gradient clipping and noise injection required for differential privacy.</p>
<p>The three categories face common challenges in protecting operational logs: removing sensitive information while preserving utility for analyses. Our generative approach addresses this challenge by achieving statistical equivalence to the source data rather than directly transforming it.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Preliminary</title>
<p>Basic concepts are examined, including workflow diagrams, modeling formalisms, and the fundamentals of adversarial training.</p>
<sec>
<label>3.1</label>
<title>Process data and event logs</title>
<p><bold>Definition 1 (Process)</bold> A process is a collection of related activities that, together, transform inputs into outputs and are performed in a specified manner to achieve defined objectives. Running instances of an execution are called cases or process instances.</p>
<p><bold>Definition 2 (Event Log)</bold> An event log <italic>L</italic> constitutes a multiset of traces, where individual traces &#x003C3; represent single cases. Individual traces form sequences of events:</p>
<disp-formula id="EQ1"><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003C3;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>&#x0232A;</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>Individual events <italic>e</italic> are characterized by:</p>
<list list-type="bullet">
<list-item><p>Case ID: unique identifier for the process instance.</p></list-item>
<list-item><p>Activity: the task/action performed (e.g., &#x0201C;inbound call&#x0201D;, &#x0201C;handle case&#x0201D;).</p></list-item>
<list-item><p>Timestamp: when the activity occurred.</p></list-item>
<list-item><p>Attributes: additional information (e.g., resource, cost, product).</p></list-item>
</list>
<p><bold>Example</bold>: <xref ref-type="fig" rid="F1">Figure 1</xref> displays a sample event log from a call center process. Case 1 has the trace &#x003C3;<sub>1</sub> &#x0003D; &#x02329; Inbound Call, Handle Case, Call Outbound &#x0232A;; Case 9 has &#x003C3;<sub>9</sub> &#x0003D; &#x02329; Inbound Email, Call Outbound, Handle Email &#x0232A;.</p>
</sec>
<sec>
<label>3.2</label>
<title>Process data: dataflow vs. workflow</title>
<p>Process data inherently contains two types of information:</p>
<p><bold>Definition 3 (Dataflow)</bold> Dataflow refers to the tabular attributes associated with each event, including categorical attributes (activity, resource, product type), numerical attributes (duration, cost, priority), and temporal attributes (timestamp, date, time-of-day). Formally, the dataflow of an event <italic>e</italic> is represented as a feature vector:</p>
<disp-formula id="EQ2"><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">activity</mml:mtext><mml:mo>,</mml:mo><mml:mtext class="textrm" mathvariant="normal">timestamp</mml:mtext><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">attr</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">attr</mml:mtext></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">attr</mml:mtext></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<p><bold>Definition 4 (Workflow)</bold> Workflow refers to control flow structure, i.e., ordering and dependencies between activities. Key workflow concepts include:</p>
<p>1. <italic>Trace variant</italic>: The unique sequence of activities in a trace, ignoring timestamps and attributes. For example, traces &#x02329;<italic>A, B, C</italic>&#x0232A; and &#x02329;<italic>A, B, C</italic>&#x0232A; with different timestamps are considered the same variant.</p>
<p>2. <italic>Directly-follows relation (DFR)</italic>: A binary relation &#x02192; such that <italic>a</italic>&#x02192;<italic>b</italic> indicates that activity <italic>b</italic> immediately follows activity <italic>a</italic> in at least one trace (<xref ref-type="bibr" rid="B28">van der Aalst, 2022</xref>):</p>
<disp-formula id="EQ3"><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>D</mml:mi><mml:mi>F</mml:mi><mml:mi>R</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>L</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mo>&#x02203;</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mi>L</mml:mi><mml:mo>,</mml:mo><mml:mo>&#x02203;</mml:mo><mml:mi>i</mml:mi><mml:mo>:</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mo>&#x02227;</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(3)</label></disp-formula>
</sec>
<sec>
<label>3.3</label>
<title>Petri net fundamentals</title>
<p>Petri nets are a core formalism in process mining, offering mathematical principles for the discovery, analysis, and conformance checking of process models based on event logs (<xref ref-type="bibr" rid="B27">van der Aalst, 2012</xref>).</p>
<p><bold>Definition 5 (Petri Net)</bold> A Petri net is a 5-tuple <italic>PN</italic> &#x0003D; (<italic>P, T, F, W, M</italic><sub>0</sub>) (<xref ref-type="bibr" rid="B18">Murata, 1989</xref>) where:</p>
<list list-type="bullet">
<list-item><p><italic>P</italic> &#x0003D; {<italic>p</italic><sub>1</sub>, <italic>p</italic><sub>2</sub>, &#x02026;, <italic>p</italic><sub><italic>m</italic></sub>} is a finite set of places.</p></list-item>
<list-item><p><italic>T</italic> &#x0003D; {<italic>t</italic><sub>1</sub>, <italic>t</italic><sub>2</sub>, &#x02026;, <italic>t</italic><sub><italic>n</italic></sub>} is a finite set of transitions.</p></list-item>
<list-item><p><italic>F</italic>&#x02286;(<italic>P</italic>&#x000D7;<italic>T</italic>)&#x0222A;(<italic>T</italic>&#x000D7;<italic>P</italic>) is a set of arcs (flow relation).</p></list-item>
<list-item><p><italic>W</italic>:<italic>F</italic> &#x02192; &#x02115;<sup>&#x0002B;</sup> is a weight function.</p></list-item>
<list-item><p><italic>M</italic><sub>0</sub>:<italic>P</italic> &#x02192; &#x02115; is the initial marking.</p></list-item>
<list-item><p><italic>P</italic>&#x02229;<italic>T</italic> &#x0003D; &#x02205; and <italic>P</italic>&#x0222A;<italic>T</italic>&#x02260;&#x02205;.</p></list-item>
</list>
<p>A marking <italic>M</italic> represents the system state, where <italic>M</italic>(<italic>p</italic>) denotes the token count in place <italic>p</italic>. A transition <italic>t</italic> is enabled at marking <italic>M</italic> if &#x02200;<italic>p</italic>&#x02208;<sup>&#x02022;</sup><italic>t</italic>:<italic>M</italic>(<italic>p</italic>)&#x02265;<italic>W</italic>(<italic>p, t</italic>), where <sup>&#x02022;</sup><italic>t</italic> denotes input places of <italic>t</italic>.</p>
<p>Firing an enabled transition <italic>t</italic> transforms marking <italic>M</italic> to <italic>M</italic>&#x02032; according to:</p>
<disp-formula id="EQ4"><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>M</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>W</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>W</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>,</mml:mo><mml:mi>p</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(4)</label></disp-formula>
<p>The reachability set <italic>R</italic>(<italic>PN, M</italic><sub>0</sub>) contains all markings reachable from <italic>M</italic><sub>0</sub> through transition firing sequences.</p>
<p><bold>Petri nets in process mining</bold>: the mined models are represented using Petri nets where activities correspond to transitions, dependencies to places and arcs, and execution is represented by a token flow (the so-called marking evolution). The alpha-algorithm, a basic mining technique, constructs Petri nets directly from event logs by detecting causal dependencies (<xref ref-type="bibr" rid="B29">van der Aalst et al., 2004</xref>). Using Petri nets, precise conformance checking can be performed by replaying logs on the models (discovered models), and behavioral properties (such as soundness, deadlock-freedom, and liveness) can be formally verified (<xref ref-type="bibr" rid="B27">van der Aalst, 2012</xref>).</p>
<p><bold>Definition 6 (Process Petri Net)</bold> A Process Petri Net is a Petri Net (see Definition 5) whose structure is determined by an event log and where the elements have process-related meanings.</p>
<p>Given an event log <italic>L</italic>, a Process Petri Net is <italic>PN</italic> &#x0003D; (<italic>P, T, F, W, M</italic><sub>0</sub>) that satisfies all properties in Definition 5, and has the following additional process-specific semantics:</p>
<list list-type="bullet">
<list-item><p>Each transition <italic>t</italic>&#x02208;<italic>T</italic> corresponds to an activity in the log.</p></list-item>
<list-item><p>Places <italic>P</italic> represent causal dependencies between activities.</p></list-item>
<list-item><p>Marking <italic>M</italic><sub>0</sub> represents the initial state (typically one token in the start place).</p></list-item>
<list-item><p>A place <italic>p</italic> connecting transitions <italic>t</italic><sub><italic>i</italic></sub> and <italic>t</italic><sub><italic>j</italic></sub> (where (<italic>t</italic><sub><italic>i</italic></sub>, <italic>p</italic>), (<italic>p, t</italic><sub><italic>j</italic></sub>)&#x02208;<italic>F</italic>) indicates that activity <italic>t</italic><sub><italic>j</italic></sub> can follow activity <italic>t</italic><sub><italic>i</italic></sub>.</p></list-item>
</list>
<p><bold>Definition 7 (Deadlock in Process Petri Nets)</bold> A marking <italic>M</italic> in a process Petri net is a deadlock state if no transition is enabled:</p>
<disp-formula id="EQ5"><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mo>&#x02200;</mml:mo><mml:mi>t</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mi>T</mml:mi><mml:mo>:</mml:mo><mml:mo>&#x02203;</mml:mo><mml:mi>p</mml:mi><mml:msup><mml:mrow><mml:mo>&#x02208;</mml:mo></mml:mrow><mml:mrow><mml:mo>&#x02022;</mml:mo></mml:mrow></mml:msup><mml:mi>t</mml:mi><mml:mo>:</mml:mo><mml:mi>M</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0003C;</mml:mo><mml:mi>W</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>p</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(5)</label></disp-formula>
<p>where <sup>&#x02022;</sup><italic>t</italic> &#x0003D; {<italic>p</italic>&#x02208;<italic>P</italic>|(<italic>p, t</italic>)&#x02208;<italic>F</italic>} stands for the set of input places of the transition t. Intuitively, deadlock is a &#x0201C;stuck state&#x0201D;, in which the process is unable to continue, similar to the state in which a group of people are all waiting for somebody else to move first.</p>
<p><bold>Role of Petri Nets in P</bold><sup><bold>3</bold></sup><bold>DGAN</bold>: Our approach leverages Petri net theory in three ways:</p>
<p>1. <italic>Structural representation</italic>: We exploit Petri nets to represent the found process structure formally and thus to pursue a rigorous behavior analysis.</p>
<p>2. <italic>Deadlock detection</italic>: We integrate Petri net theory deadlock detection into the loss function. This limitation guarantees that the synthesized processes will be structurally sound and will not pass through invalid states in which no activities can be executed.</p>
<p>3. <italic>Quality metrics</italic>: We employ Petri net quality dimensions (fitness, precision, generalization, and simplicity) to evaluate synthetic process data, as detailed in Section 8.</p>
<p>This dual nature of process data (dataflow &#x0002B; workflow) and the need for structural validity motivate our dual-discriminator architecture in P<sup>3</sup>DGAN.</p>
</sec>
<sec>
<label>3.4</label>
<title>Generative adversarial networks</title>
<p>Generative adversarial networks (GANs) are powerful generative models that perform implicit density estimation and consist of two neural networks (<xref ref-type="bibr" rid="B11">Goodfellow et al., 2014</xref>): a generator and a discriminator. The generator attempts to fool the discriminator by generating realistic data, while the discriminator aims to distinguish real from fake data. During training, the generator progressively improves at creating realistic data, while the discriminator becomes better at detection. The process reaches equilibrium when the discriminator can no longer distinguish real from fake data.</p>
<p>Generators <italic>G</italic> and discriminators <italic>D</italic> are jointly trained in a two-player minimax game. The objective function is:</p>
<disp-formula id="EQ6"><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">min</mml:mo></mml:mrow><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mi>V</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi><mml:mo>,</mml:mo><mml:mi>G</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>x</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:mi>D</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mi>D</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(6)</label></disp-formula>
<p>where <italic>x</italic> is real data, <italic>Z</italic> represents random noise from the latent space, and <italic>G</italic>(<italic>Z</italic>) is generated data. <italic>p</italic><sub><italic>data</italic></sub> is the distribution of real data, while <italic>p</italic><sub><italic>Z</italic></sub>(<italic>Z</italic>) represents the prior distribution of input noise <italic>Z</italic> for generator <italic>G</italic>. Discriminator <italic>D</italic> is fixed during <italic>G</italic> training. The adversarial process constitutes a two-player minimax game where <italic>G</italic> tries to fool <italic>D</italic>, while <italic>D</italic> is trained to discriminate generated data. Hence, generated samples become increasingly indistinguishable from real data.</p>
</sec>
<sec>
<label>3.5</label>
<title>Notation</title>
<p>For clarity, we define key notations used throughout this study in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Notation and definitions.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Symbol</bold></th>
<th valign="top" align="left"><bold>Definition</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><italic>L, L</italic><sub><italic>r</italic></sub></td>
<td valign="top" align="left">Event log, real process data</td>
</tr>
<tr>
<td valign="top" align="left"><italic>L</italic><sub><italic>f</italic></sub></td>
<td valign="top" align="left">Synthetic (fake) process data</td>
</tr>
<tr>
<td valign="top" align="left"><italic>X</italic></td>
<td valign="top" align="left">Real tabular data (dataflow)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>R</italic></td>
<td valign="top" align="left">Real directly-follows relations (workflow)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Z</italic></td>
<td valign="top" align="left">Latent noise vector sampled from <inline-formula><mml:math id="M8"><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C3;</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>I</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left"><italic>G</italic></td>
<td valign="top" align="left">Generator network</td>
</tr>
<tr>
<td valign="top" align="left"><italic>G</italic>(<italic>Z</italic>)</td>
<td valign="top" align="left">Generated (synthetic) process data</td>
</tr>
<tr>
<td valign="top" align="left"><italic>D</italic><sub><italic>t</italic></sub></td>
<td valign="top" align="left">Discriminator for tabular data (dataflow)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>D</italic><sub><italic>r</italic></sub></td>
<td valign="top" align="left">Discriminator for directly-follows relations (workflow)</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math id="M9"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Total generator loss</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math id="M10"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Adversarial loss (from discriminators)</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math id="M11"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Deadlock condition loss</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math id="M12"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Size loss (number of deadlocks)</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math id="M13"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="left">Distribution loss (KL divergence)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>E</italic><sub><italic>X</italic></sub></td>
<td valign="top" align="left">Expectation over real data distribution</td>
</tr>
<tr>
<td valign="top" align="left"><italic>E</italic><sub><italic>Z</italic></sub></td>
<td valign="top" align="left">Expectation over noise distribution</td>
</tr>
<tr>
<td valign="top" align="left"><italic>E</italic><sub><italic>G</italic>(<italic>Z</italic>)</sub></td>
<td valign="top" align="left">Expectation over generated data distribution</td>
</tr>
<tr>
<td valign="top" align="left">&#x003BB;</td>
<td valign="top" align="left">Weight for deadlock condition loss (&#x02208;[0, 1])</td>
</tr>
<tr>
<td valign="top" align="left">&#x003B5;</td>
<td valign="top" align="left">Privacy budget (differential privacy parameter)</td>
</tr>
<tr>
<td valign="top" align="left">&#x003B4;</td>
<td valign="top" align="left">Privacy failure probability</td>
</tr>
<tr>
<td valign="top" align="left">&#x003C3;</td>
<td valign="top" align="left">Standard deviation of DP noise</td>
</tr>
<tr>
<td valign="top" align="left"><italic>C</italic></td>
<td valign="top" align="left">Clipping threshold for gradients</td>
</tr>
<tr>
<td valign="top" align="left"><italic>dl</italic><sub><italic>r</italic></sub>, <italic>dl</italic><sub><italic>f</italic></sub></td>
<td valign="top" align="left">Deadlock markings (real and fake)</td>
</tr>
<tr>
<td valign="top" align="left"><italic>Q</italic><sub><italic>dl</italic></sub></td>
<td valign="top" align="left">Frequency distribution of deadlocks</td>
</tr>
<tr>
<td valign="top" align="left"><inline-formula><mml:math id="M14"><mml:msubsup><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula></td>
<td valign="top" align="left">Frequency of <italic>i</italic>-th deadlock type</td>
</tr>
<tr>
<td valign="top" align="left"><italic>m</italic></td>
<td valign="top" align="left">Mini-batch size</td>
</tr>
<tr>
<td valign="top" align="left"><italic>T</italic><sub><italic>d</italic></sub></td>
<td valign="top" align="left">Number of discriminator iterations per generator iteration</td>
</tr>
<tr>
<td valign="top" align="left"><italic>T</italic><sub><italic>g</italic></sub></td>
<td valign="top" align="left">Number of generator training iterations</td>
</tr>
<tr>
<td valign="top" align="left"><italic>M</italic><sub><italic>t</italic></sub></td>
<td valign="top" align="left">Total number of training samples for <italic>D</italic><sub><italic>t</italic></sub></td>
</tr>
<tr>
<td valign="top" align="left"><italic>M</italic><sub><italic>r</italic></sub></td>
<td valign="top" align="left">Total number of training samples for <italic>D</italic><sub><italic>r</italic></sub></td>
</tr>
<tr>
<td valign="top" align="left"><italic>q</italic><sub><italic>t</italic></sub>, <italic>q</italic><sub><italic>r</italic></sub></td>
<td valign="top" align="left">Sampling probabilities (<italic>m</italic>/<italic>M</italic><sub><italic>t</italic></sub>, <italic>m</italic>/<italic>M</italic><sub><italic>r</italic></sub>)</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Privacy-preserving framework for processing data based on dual-discriminator adversarial generative networks</title>
<p>A privacy-preserving framework for processing data using dual-discriminator adversarial generative networks is designed. First, the problem is described, and the motivation is explained. The overall framework architecture and its components are introduced.</p>
<sec>
<label>4.1</label>
<title>Problem description</title>
<p>Process data exists in the form of event logs where each event is represented as: event = {Case ID, Activity, Timestamp, &#x02026;} (as shown in <xref ref-type="fig" rid="F1">Figure 1</xref>). This data is hierarchical in nature: (1) at the level of dataflow represents single events, (2) at the level of workflow are the sequences of events which make up traces.</p>
<p><bold>Limitation of the current approaches</bold>: Although there is some work on protecting the privacy of process data (<xref ref-type="bibr" rid="B9">Fahrenkrog-Petersen et al., 2020</xref>; <xref ref-type="bibr" rid="B22">Rafiei et al., 2020</xref>; <xref ref-type="bibr" rid="B17">Mannhardt et al., 2019</xref>), there are some limitations of:</p>
<p>(1) <italic>Dimension protection mechanism results separation</italic>: The existing solutions treat dataflow dimension and workflow dimension within the realm of information protection individually, but do not consider the whole privacy protection from the two dimensions together. For example, PRIPEL (<xref ref-type="bibr" rid="B9">Fahrenkrog-Petersen et al., 2020</xref>) enables sequence-level differential privacy, but field-level attributes are still unprotected. Additionally, TLKC variants (<xref ref-type="bibr" rid="B22">Rafiei et al., 2020</xref>) anonymize representations separately, without a coordinated approach.</p>
<p>(2) <italic>Absence of structural verification</italic>: Anonymization or perturbation methods do not guarantee that the resulting or altered data consists of valid process models (e.g., no deadlocks).</p>
<p>(3) <italic>Unsatisfactory attacker model</italic>: The current risk metrics are not sufficient to measure a workflow-level privacy leakage, as they lack precise distance metrics to quantify the structural similarity (between process models).</p>
<p>For the original process data <italic>L</italic><sub><italic>r</italic></sub>, the goal is to produce a differentially private synthetic data <italic>L</italic><sub><italic>f</italic></sub> &#x0003D; <italic>G</italic>(<italic>Z</italic>) where <italic>G</italic>:<italic>Z</italic>&#x02192;<italic>L</italic> is a generative model, which maximizes the utility of data while ensuring structural validity and (&#x003B5;, &#x003B4;)-differential privacy.</p>
</sec>
<sec>
<label>4.2</label>
<title>Overall framework</title>
<p>The structure of the dual-discriminator adversarial generative network comprises three blocks: Generator <italic>G</italic>, Discriminator for Tabular Data <italic>D</italic><sub><italic>t</italic></sub>, and Discriminator for Directly-Follows Relationship <italic>D</italic><sub><italic>r</italic></sub> (see <xref ref-type="fig" rid="F2">Figure 2</xref>). Each discriminator evaluates the generated process data from different perspectives (workflow or dataflow), forcing the generator to produce process data that is realistic across multiple aspects and satisfies the criteria of all discriminators. Tabular Data (dataflow): Process data is represented as a table in which event characteristics (e.g., timestamps, activity names, etc.) appear in columns. Directly-Follows Relation (workflow): In Petri Nets, one transition is said to follow directly after another.</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>The P<sup>3</sup>DGAN framework: privacy preserving process data generation. The framework can be summarized as (1) the source process data <italic>L</italic><sub><italic>r</italic></sub> is transformed into tabular data <italic>X</italic> and directly-follows relation matrix <italic>R</italic><sub><italic>X</italic></sub> by preprocessing; (2) Latent noise <italic>Z</italic> is drawn from Gaussian distribution <inline-formula><mml:math id="M15"><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C3;</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>I</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>; (3) Generator <italic>G</italic> synthesizes process data <italic>G</italic>(<italic>Z</italic>) from noise only without seeing real data directly; (4) Discriminator <italic>D</italic><sub><italic>t</italic></sub> decides the tabular data authenticity by distinguishing real <italic>X</italic> and fake <italic>G</italic>(<italic>Z</italic>); (5) Discriminator <italic>D</italic><sub><italic>r</italic></sub> the authenticity of a workflow relation between real <italic>R</italic><sub><italic>X</italic></sub> and fake <italic>R</italic><sub><italic>G</italic>(<italic>Z</italic>)</sub>; (6) Discriminator gradients are added with differential privacy noise; (7) The loss functions are calculated, including the adversarial loss <inline-formula><mml:math id="M16"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> and the deadlock condition loss <inline-formula><mml:math id="M17"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>; (8) Backpropagation is used to update model parameters. Notably, the generator never &#x0201C;sees&#x0201D; the real records and learns only from gradient information from discriminators, thereby providing identity privacy protections.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0002.tif">
<alt-text content-type="machine-generated">Diagram illustrating a generative adversarial network (GAN) framework for process data synthesis, showing source data preprocessing, a generator module using fully connected layers with ReLU and Gumbel-Softmax, and two discriminators for tabular data and workflow relation evaluation. Key steps, input/output flows, gradient updates, noise generation, and loss functions are labeled.</alt-text>
</graphic>
</fig>
<p>The overall objective is to learn the generator <italic>G</italic>, conditioned on Tabular Data and Directly-Follows Relationships, to generate realistic and informative synthetic process data. For this purpose, two discriminators <italic>D</italic><sub><italic>t</italic></sub> and <italic>D</italic><sub><italic>r</italic></sub> are used. <italic>D</italic><sub><italic>t</italic></sub> differentiates synthetic tabular data from real tabular data, and <italic>D</italic><sub><italic>r</italic></sub> differentiates synthetic Directly-Follows Relationships from real ones. This two-discriminator system has several important benefits:</p>
<p>1. <italic>Mitigates mode collapse</italic>: with two separate feedbacks, the generator has to meet more than one condition at a time, which is more constraining. If the feedback from one discriminator becomes less informative (potential collapse), the other still provides guidance for learning.</p>
<p>2. <italic>Captures dual nature of process data</italic>: process data is inherently two-dimensional (dataflow and workflow) and cannot be fully represented by one discriminator. Both are explicitly modeled in our architecture, which results in better process model discovery as shown in Section 8.</p>
<p>3. <italic>Enables differential privacy at multiple levels</italic>: various privacy mechanisms can be used at the dataflow or workflow level, providing privacy-utility tradeoffs at a very granular level.</p>
<p>Meanwhile, for privacy protection, differential privacy is integrated by adding noise vectors to the discriminators&#x00027; gradients. This ensures privacy preservation during training without compromising utility. Noise is added on the gradient of the Wasserstein distance with respect to the training data, providing robust privacy guarantees (<xref ref-type="bibr" rid="B36">Yang et al., 2022</xref>).</p>
<p>Based on this privacy-preserving framework, we propose a privacy-preserving method for processing data based on dual-discriminator generative adversarial networks, as shown in <xref ref-type="other" rid="algorithm_1">Algorithm 1</xref>. Before introducing this algorithm, we have formal definitions of trace and the directly-follows relationship based on Petri Nets (<xref ref-type="bibr" rid="B27">van der Aalst, 2012</xref>).</p>
<statement content-type="algorithm" id="algorithm_1">
<label>Algorithm 1</label>
<p>Privacy-Preserving Method for Process Data based on Dual-Discriminator GANs.
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0008.tif"/>
</p>
</statement>
<p><bold>Definition 8 (trace of process data)</bold> in process data (event log <italic>L</italic>), each row represents an event. Each event contains attributes such as its corresponding ID, activity name, and timestamp. <inline-formula><mml:math id="M18"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>U</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the universe of activity names in event log <italic>L</italic>. A <inline-formula><mml:math id="M19"><mml:mi>T</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>&#x02329;</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>&#x0232A;</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:msubsup><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>U</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup></mml:math></inline-formula>, 0 &#x02264; <italic>i</italic> &#x02264; <italic>j</italic> &#x02264; <italic>len</italic>(<italic>L</italic>) is a sequence of events corresponding to activities with the same ID. &#x003C0;<sub><italic>k</italic></sub>(<italic>Trace</italic>) &#x0003D; <italic>a</italic><sub><italic>k</italic></sub>, where <italic>i</italic> &#x02264; <italic>k</italic> &#x02264; <italic>j</italic>, denotes the mapping of activity <italic>a</italic><sub><italic>k</italic></sub> corresponding to position <italic>k</italic> in one trace.</p>
<p><bold>Definition 9 (directly-follows relationship)</bold> for any two activities <inline-formula><mml:math id="M20"><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>U</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, there is a directly-follows relationship from <italic>a</italic><sub>1</sub> to <italic>a</italic><sub>2</sub> in <inline-formula><mml:math id="M21"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>U</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, denoted by <italic>a</italic><sub>1</sub>&#x0227B;<italic>a</italic><sub>2</sub>, iff &#x02203;0 &#x02264; <italic>i, j</italic> &#x02264; <italic>len</italic>(<italic>L</italic>), <italic>Trace</italic>|&#x003C0;<sub><italic>i</italic></sub>(<italic>Trace</italic>) &#x0003D; <italic>a</italic><sub>1</sub>&#x02227;&#x003C0;<sub><italic>j</italic></sub>(<italic>Trace</italic>) &#x0003D; <italic>a</italic><sub>2</sub>&#x02227;<italic>j</italic> &#x0003D; <italic>i</italic>&#x0002B;1. The frequency, or weight, of a directly-follows relationship is the number of times it occurs in the event log, denoted by |<italic>a</italic><sub>1</sub>&#x0227B;<italic>a</italic><sub>2</sub>|.</p>
<p><bold>Key implementation details</bold>:</p>
<p><italic>Differential privacy integration</italic>: We apply Gaussian noise <inline-formula><mml:math id="M22"><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>&#x003C3;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:msup><mml:mrow><mml:mi>C</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>I</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> to the clipped gradients of the two discriminators (Lines 13 and 21 in <xref ref-type="other" rid="algorithm_1">Algorithm 1</xref>), with the noise scale &#x003C3; being determined from the privacy budget &#x003B5; and the failure probability &#x003B4; as:</p>
<disp-formula id="EQ7"><mml:math id="M23"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003C3;</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mi>q</mml:mi><mml:msqrt><mml:mrow><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(7)</label></disp-formula>
<p>where <italic>q</italic> &#x0003D; <italic>m</italic>/<italic>M</italic> is the probability of sampling. This guarantees (&#x003B5;, &#x003B4;)-differential privacy, which is proven in Section 6.</p>
<p>The generator adversarially minimizes a mixed loss over both adversarial loss <inline-formula><mml:math id="M24"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> (from discriminators) and deadlock condition loss <inline-formula><mml:math id="M25"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> (maintaining structural validity) (refer to Line 26 in <xref ref-type="other" rid="algorithm_1">Algorithm 1</xref>). The parameter &#x003BB;&#x02208;[0, 1] controls the trade off between data realism and structural faithfulness. Details of <inline-formula><mml:math id="M26"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> are given in Section 5.</p>
</sec>
<sec>
<label>4.3</label>
<title>Network architectures</title>
<p><xref ref-type="fig" rid="F3">Figure 3</xref> illustrates network architectures for both the generator and the discriminators in P<sup>3</sup>DGAN.</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Network architectures of P<sup>3</sup>DGAN with dimension annotations. <bold>(a)</bold> Generator <italic>G</italic>: latent noise <inline-formula><mml:math id="M35"><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C3;</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>I</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> with dimension <italic>d</italic><sub><italic>z</italic></sub> is transformed through fully connected layers (hidden size 256) with ReLU activation (max(0, <italic>x</italic>)), and Gumbel-Softmax for categorical sampling to generate tabular data (activity labels) and directly-follows relations (DFR) with output dimension <italic>d</italic><sub><italic>out</italic></sub>. <bold>(b)</bold> Discriminators <italic>D</italic><sub><italic>t</italic></sub> and <italic>D</italic><sub><italic>r</italic></sub>: both share the same architecture but operate independently&#x02014;<italic>D</italic><sub><italic>t</italic></sub> receives real/fake tabular data while <italic>D</italic><sub><italic>r</italic></sub> receives real/fake directly-follows relations. Input dimension <italic>d</italic><sub><italic>in</italic></sub> is processed through FC layers (hidden size 256) with Leaky ReLU (max(&#x003B1;<italic>x, x</italic>), &#x003B1; &#x0003D; 0.2), producing a scalar probability &#x02208;[0, 1] indicating authenticity (real/fake).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0003.tif">
<alt-text content-type="machine-generated">Diagram showing two flowcharts labeled (a) and (b). (a) illustrates a process beginning with a probability distribution, followed by two fully connected layers and a ReLU activation, ending with a Gumbel softmax for categorical sampling to generate tabular and process data. (b) starts with categorical and process data as inputs, passing through fully connected and Leaky ReLU layers, producing a scalar value representing the probability of real or fake data.</alt-text>
</graphic>
</fig>
<p><bold>Generator Architecture (</bold><xref ref-type="fig" rid="F3"><bold>Figure 3a</bold></xref><bold>)</bold>: The generator takes a latent noise vector <inline-formula><mml:math id="M34"><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C3;</mml:mi><mml:mi>C</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>I</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> as input and transforms it through multiple fully connected (FC) layers with ReLU activation. The final layer uses Gumbel-Softmax activation to generate discrete categorical values for activities and other process data attributes. The generator outputs both tabular data (event attributes) and directly-follows relations simultaneously.</p>
<p><bold>Discriminator Architecture (</bold><xref ref-type="fig" rid="F3"><bold>Figure 3b</bold></xref><bold>)</bold>: The two discriminators (<italic>D</italic><sub><italic>t</italic></sub> and <italic>D</italic><sub><italic>r</italic></sub>) share the same architecture but process different inputs. Each discriminator consists of a sequence of fully connected layers with Leaky ReLU activations. The discriminators receive as input tabular data (for <italic>D</italic><sub><italic>t</italic></sub>) or directly-follows relations (for <italic>D</italic><sub><italic>r</italic></sub>), and output a scalar representing the likelihood that the input is real (versus fake). The shared architecture ensures the same discriminative power across modalities while allowing each discriminator to focus on its own domain using distinct parameters.</p>
</sec>
<sec>
<label>4.4</label>
<title>Multi-objective optimization framework</title>
<p>The dual-discriminator architecture constitutes a multi-objective optimization problem formulated as a minimax game:</p>
<disp-formula id="EQ8"><mml:math id="M36"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:munder><mml:mrow><mml:mi>max</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mi>D</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>D</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:munder></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:msub><mml:mi>&#x02112;</mml:mi><mml:mrow><mml:msub><mml:mi>D</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x02112;</mml:mi><mml:mrow><mml:msub><mml:mi>D</mml:mi><mml:mi>r</mml:mi></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:munder><mml:mrow><mml:mi>min</mml:mi></mml:mrow><mml:mi>G</mml:mi></mml:munder></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:msub><mml:mi>&#x02112;</mml:mi><mml:mi>G</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mstyle mathvariant='double-struck'><mml:mi>E</mml:mi></mml:mstyle><mml:mi>Z</mml:mi></mml:msub><mml:mo stretchy='false'>[</mml:mo><mml:mi>log</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:mi>G</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>Z</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>]</mml:mo><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mstyle mathvariant='double-struck'><mml:mi>E</mml:mi></mml:mstyle><mml:mi>Z</mml:mi></mml:msub><mml:mo stretchy='false'>[</mml:mo><mml:mi>log</mml:mi><mml:msub><mml:mi>D</mml:mi><mml:mi>r</mml:mi></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>G</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>Z</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>]</mml:mo><mml:mo>+</mml:mo><mml:mo>&#x003BB;</mml:mo><mml:msub><mml:mi>&#x02112;</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math><label>(8)</label></disp-formula>
<p><bold>Discriminator optimization</bold>: The two discriminators are optimized independently and simultaneously:</p>
<disp-formula id="EQ9"><mml:math id="M38"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>X</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(9)</label></disp-formula>
<disp-formula id="EQ10"><mml:math id="M40"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>X</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>X</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(10)</label></disp-formula>
<p>where <italic>R</italic><sub><italic>X</italic></sub> denotes the directly-follows relations extracted from real data <italic>X</italic>, and <italic>R</italic><sub><italic>G</italic>(<italic>Z</italic>)</sub> denotes those from generated data <italic>G</italic>(<italic>Z</italic>).</p>
<p><bold>Generator optimization against both discriminators</bold>: the generator is required to fool both discriminators at the same time and keep the structure valid:</p>
<disp-formula id="EQ11"><mml:math id="M42"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">min</mml:mo></mml:mrow><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mo>&#x003BB;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(11)</label></disp-formula>
<p>where the first two terms represent adversarial losses from <italic>D</italic><sub><italic>t</italic></sub> and <italic>D</italic><sub><italic>r</italic></sub> respectively, and <inline-formula><mml:math id="M44"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is the deadlock condition loss ensuring structural validity (detailed in Section 5).</p>
<p><bold>Balancing mechanism</bold>: we use alternating training to balance the two discriminators:</p>
<list list-type="bullet">
<list-item><p>Training <italic>D</italic><sub><italic>t</italic></sub> and <italic>D</italic><sub><italic>r</italic></sub> for <italic>k</italic><sub><italic>d</italic></sub> steps (typically <italic>k</italic><sub><italic>d</italic></sub> &#x0003D; 5)</p></list-item>
<list-item><p>Training <italic>G</italic> for <italic>k</italic><sub><italic>g</italic></sub> steps (typically <italic>k</italic><sub><italic>g</italic></sub> &#x0003D; 1)</p></list-item>
</list>
<p>This alternating schedule prevents the discriminators from dominating the optimization or from suffering from mode collapse, which may occur when one discriminator becomes too strong.</p>
<p><bold>Convergence to nash equilibrium</bold>: upon convergence, the system reaches a Nash equilibrium such that:</p>
<disp-formula id="EQ12"><mml:math id="M45"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02248;</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>X</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02248;</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(12)</label></disp-formula>
<p>indicating that generated data matches real data in both dataflow and workflow distributions.</p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Game optimization strategy based on deadlock conditional loss</title>
<p>A game-theoretic approach to optimization under deadlock-conditioned loss is proposed. This is achieved through the new combined global learning scheme that exploits structural constraints derived from Petri net theory. We first introduce deadlock marking sets, then characterize the components of the deadlock-condition loss, and finally discuss how this loss is incorporated into generator optimization.</p>
<sec>
<label>5.1</label>
<title>Deadlock marking sets</title>
<p><bold>Definition 10 (dead marking)</bold> a marking (state) <italic>M</italic> in which no transitions are enabled and which cannot evolve.</p>
<p>In Petri Nets, a deadlock is an illegal or problematic state in which the process cannot proceed further. Detecting such states and limiting their occurrence during generation ensures that the synthetic process data remain structurally valid.</p>
<p>Let <italic>dl</italic><sub><italic>r</italic></sub> be the set of deadlock markings extracted from real process data, and <italic>dl</italic><sub><italic>f</italic></sub> be those extracted from synthetic data. The deadlock loss enforces the decay of the difference between these two sets in number as well as in distribution.</p>
</sec>
<sec>
<label>5.2</label>
<title>Components of deadlock condition loss</title>
<p>The deadlock condition loss <inline-formula><mml:math id="M46"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> consists of two components (see <xref ref-type="fig" rid="F4">Figure 4</xref>):</p>
<disp-formula id="EQ13"><mml:math id="M47"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(13)</label></disp-formula>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Deadlock conditional loss mechanism.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0004.tif">
<alt-text content-type="machine-generated">Flowchart depicting a deadlock analysis framework, beginning with input of real or synthetic process data and deadlock marking sets, integrating into a logical component, calculating size loss and deadlock frequency, applying a distribution penalty and loss, and culminating in a composite deadlock condition loss value.</alt-text>
</graphic>
</fig>
<p>1. <italic>Size Loss (</italic><inline-formula><mml:math id="M48"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula><italic>)</italic>: Penalizes the difference in the number of deadlock states between real and fake data:</p>
<disp-formula id="EQ14"><mml:math id="M49"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>|</mml:mo><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(14)</label></disp-formula>
<p>This ensures that the generator produces several deadlock configurations similar to those observed in real data, preventing over- or under-generation of problematic states.</p>
<p>2. <italic>Distribution Loss (</italic><inline-formula><mml:math id="M50"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula><italic>)</italic>: This is the KL divergence penalty, which makes sure that the deadlock types in the real and generated data are the same:</p>
<disp-formula id="EQ15"><mml:math id="M51"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo stretchy="false">&#x02016;</mml:mo><mml:msub><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo class="qopname">log</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(15)</label></disp-formula>
<p>where <inline-formula><mml:math id="M52"><mml:msubsup><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="M53"><mml:msubsup><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> are the frequencies of the <italic>i</italic>-th deadlock type in real and fake data, correspondingly. Here, <italic>i</italic> refers to different deadlock patterns (e.g., deadlocks at certain places in the Petri net).</p>
<p><bold>Purpose of KL penalty</bold>: The KL divergence serves three critical functions:</p>
<p>(a) <italic>Distribution matching</italic>: It makes sure that the real and synthetic data have not only a similar number of deadlocks, but also a similar distribution of deadlock types. Among these, the impact on process behavior differs depending on which tasks are involved in the deadlock.</p>
<p>(b) <italic>Fine-grained control</italic>: Although <inline-formula><mml:math id="M54"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is intended to guarantee rough overall count similarity, <inline-formula><mml:math id="M55"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is to ensure that certain deadlock patterns are preserved (fine-grained structural features).</p>
<p>(c) <italic>Mode coverage</italic>: By penalizing differences in all types of deadlocks, the KL term prevents the generator from disregarding rare but meaningful deadlock patterns and prevents the collapse of modes in the structural space.</p>
<p><bold>Example:</bold> A true process has two types of deadlocks&#x02014;Type A (70%) and Type B (30%). Without <inline-formula><mml:math id="M56"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, the generator may generate only Type A deadlocks (100%). The KL penalty ensures that the generator also learns to generate both types in the correct proportions, thereby preventing structural bias.</p>
</sec>
<sec>
<label>5.3</label>
<title>Integration with generator loss</title>
<p><bold>Note:</bold> The deadlock condition loss <inline-formula><mml:math id="M57"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is enforced only on the generator, not on the discriminators. The main factors to consider in this design choice are as follows:</p>
<p><italic>Generator&#x00027;s objective</italic>: the generator attempts to create process data that, at all times, fools the discriminators and is structurally valid. The deadlock-condition loss serves as a regularization term that encourages the generator to produce Petri nets without deadlock states.</p>
<p><italic>Discriminators&#x00027; objective</italic>: the discriminators (<italic>D</italic><sub><italic>t</italic></sub> and <italic>D</italic><sub><italic>r</italic></sub>) are concerned only with real vs. fake at the dataflow and workflow level (respectively). They do not have to check for structural properties such as deadlock-freedom, since this is imposed via the loss of the generator.</p>
<p>The total generator loss is:</p>
<disp-formula id="EQ16"><mml:math id="M58"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>G</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mstyle displaystyle="true"><mml:munder accentunder="false"><mml:mrow><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mi>G</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>&#x0FE38;</mml:mo></mml:munder></mml:mstyle></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Adversarial term</mml:mtext></mml:mrow></mml:munder></mml:mstyle><mml:mo>&#x0002B;</mml:mo><mml:mo>&#x003BB;</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mstyle displaystyle="true"><mml:munder accentunder="false"><mml:mrow><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>&#x0FE38;</mml:mo></mml:munder></mml:mstyle></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Structural constraint</mml:mtext></mml:mrow></mml:munder></mml:mstyle></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(16)</label></disp-formula>
<p>where:</p>
<list list-type="bullet">
<list-item><p><inline-formula><mml:math id="M59"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> represents combined adversarial loss from both discriminators:</p></list-item>
</list>
<disp-formula id="EQ17"><mml:math id="M60"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(17)</label></disp-formula>
<list list-type="bullet">
<list-item><p><inline-formula><mml:math id="M61"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> enforces deadlock-freedom in generated process data</p></list-item>
<list-item><p>&#x003BB;&#x02208;[0, 1] balances data authenticity (from adversarial training) and structural validity (from deadlock constraints)</p></list-item>
</list>
<p>This separation of concerns allows discriminators to focus on data realism while the generator optimizes for both realism and structural correctness.</p>
<p>Meanwhile, recalling from (<xref ref-type="disp-formula" rid="EQ8">Equations 8</xref>&#x02013;<xref ref-type="disp-formula" rid="EQ9">9</xref>), the discriminators are trained by maximizing:</p>
<disp-formula id="EQ18"><mml:math id="M62"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>X</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(18)</label></disp-formula>
<p>and:</p>
<disp-formula id="EQ19"><mml:math id="M64"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo class="qopname">max</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>X</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>X</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="double-struck"><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mi>Z</mml:mi><mml:mo>&#x0007E;</mml:mo><mml:msub><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mrow><mml:mi>Z</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>G</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(19)</label></disp-formula>
<p>From a game theory perspective, the deadlock loss acts as a new rule constraint that limits the generator&#x00027;s strategy space. This game-optimization training mechanism incentivizes the generator to learn richer modes of the data distribution and to maximize payoff in a complex, dynamic gaming environment. In summary, the Privacy-Preserving Process Data Generation Based on Dual-Discriminator Conditional Generative Adversarial Network (P<sup>3</sup>DGAN), incorporating deadlock conditional loss from the perspective of game optimization, can strengthen adversarial nature during training, stimulate learning potential of the generator, and avoid learning only simple partial modes of the data distribution.</p>
</sec>
</sec>
<sec id="s6">
<label>6</label>
<title>Privacy guarantee of P<sup>3</sup>DGAN</title>
<p>Tracking and demonstrating privacy loss is a key aspect of differentially private deep learning. To show P<sup>3</sup>DGAN can well protect differential privacy, we give a privacy proof based on DPGAN (<xref ref-type="bibr" rid="B34">Xie et al., 2018</xref>) combined with differential privacy parallel composition (<xref ref-type="bibr" rid="B33">Wijesinghe et al., 2024</xref>). Before the proof, we define an adjacent dataset, and LEMMA 1 establishes DP across different discriminator-training procedures.</p>
<p><bold>Definition 11 (Adjacent Data Set)</bold> For a data set <italic>x</italic>, its first norm is ||<italic>x</italic>||<sub>1</sub>. For two data sets <italic>x</italic> and <italic>y</italic>, their <italic>l</italic><sub>1</sub> distance is ||<italic>x</italic>&#x02212;<italic>y</italic>||<sub>1</sub>, which is the number of different elements. If the distance between them is 1, that is ||<italic>x</italic>&#x02212;<italic>y</italic>||<sub>1</sub> &#x0003D; 1, then <italic>x</italic> and <italic>y</italic> are called adjacent data sets. The elements of the difference between two adjacent sets can also be expressed as |<italic>x</italic>&#x02295;<italic>y</italic>| &#x0003D; (<italic>x</italic>&#x0222A;<italic>y</italic>)&#x02212;(<italic>x</italic>&#x02229;<italic>y</italic>) &#x0003D; 1.</p>
<p><bold>Explanations</bold>: The <italic>privacy budget</italic> &#x003B5; dictates the trade-off between privacy and utility: smaller &#x003B5; results in stronger privacy (more noise) and potentially lower quality of data, while larger &#x003B5; leads to better utility, at the expense of weaker privacy guarantees.</p>
<p><bold>Lemma 1</bold>. Given the sampling probability <italic>q</italic><sub><italic>t</italic></sub> &#x0003D; <italic>m</italic>/<italic>M</italic><sub><italic>t</italic></sub>, where <italic>m</italic> represents the batch size and <italic>M</italic><sub><italic>t</italic></sub> is the total number of training data in the tabular data discriminator iteration, the number of discriminator iterations in each inner loop <italic>T</italic><sub><italic>d</italic></sub>, and privacy violation &#x003B4;, for any positive &#x003B5;, the parameters of <italic>D</italic><sub><italic>t</italic></sub> guarantee (&#x003B5;, &#x003B4;)-differential privacy for all data used in that outer loop if it satisfies:</p>
<disp-formula id="EQ20"><mml:math id="M66"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003C3;</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:msqrt><mml:mrow><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(20)</label></disp-formula>
<p>Similarly, <italic>D</italic><sub><italic>r</italic></sub> is the same as <italic>D</italic><sub><italic>t</italic></sub>, except for the sampling probability <italic>q</italic><sub><italic>r</italic></sub> &#x0003D; <italic>m</italic>/<italic>M</italic><sub><italic>r</italic></sub>, where <italic>M</italic><sub><italic>r</italic></sub> is the total number of training data in the directly-follows relationship discriminator iteration.</p>
<p><italic>Proof</italic> : The DP guarantee for the discriminator training procedure follows the intermediate result (<xref ref-type="bibr" rid="B34">Xie et al., 2018</xref>). For a fixed perturbation &#x003C3; on a gradient, a larger <italic>q</italic> leads to less privacy guarantee. This is indeed true since the more data is involved in computing the discriminator parameter <italic>w</italic>, the less privacy is assigned to each of them. Also, more iterations <italic>T</italic><sub><italic>d</italic></sub> lead to less privacy because observers provide more information to the data (specifically, more accurate gradients).</p>
<p><bold>Theorem 1</bold>. The dual-discriminator in P<sup>3</sup>DGAN satisfies (&#x003B5;, &#x003B4;)-differential privacy during training on process data.</p>
<p><italic>Proof</italic> : Given <italic>M</italic>:&#x02115;<sup>|<italic>x</italic>|</sup>&#x02192;<italic>R</italic> to be a (&#x003B5;, &#x003B4;)-differential privacy algorithm acting on a single discriminator. For any adjacent data set <italic>x, y</italic> (||<italic>x</italic>&#x02212;<italic>y</italic>|| &#x02264; 1) and the function <italic>f</italic>:<italic>R</italic>&#x02192;<italic>R</italic>&#x02032;, any event <italic>S</italic>&#x02286;<italic>R</italic>&#x02032;, <italic>T</italic> &#x0003D; {<italic>r</italic>&#x02208;<italic>R</italic>:<italic>f</italic>(<italic>r</italic>)&#x02208;<italic>S</italic>} has:</p>
<disp-formula id="EQ21"><mml:math id="M67"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:mi>S</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mtext>&#x02003;</mml:mtext><mml:mo>=</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:mi>T</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;</mml:mtext><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow></mml:msup><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:mi>T</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B4;</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;</mml:mtext><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow></mml:msup><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:mi>S</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B4;</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(21)</label></disp-formula>
<p>The random map can be decomposed into a convex combination of deterministic functions, and the convex combination of differential privacy satisfies differential privacy. When facing multiple data sets, for generality, we define the divisions <italic>D</italic><sub>1</sub>, <italic>D</italic><sub>2</sub>, &#x02026;, <italic>D</italic><sub><italic>i</italic></sub> of data set <italic>D</italic>, and the corresponding algorithms <italic>A</italic><sub>1</sub>, <italic>A</italic><sub>2</sub>, &#x02026;, <italic>A</italic><sub><italic>i</italic></sub>, respectively, where the divided data sets are disjoint. Algorithms satisfy &#x003B5;<sub>1</sub>, &#x003B5;<sub>2</sub>, &#x02026;, &#x003B5;<sub><italic>i</italic></sub> differential privacy, respectively. Furthermore, <italic>k</italic> is the number of database partitions.</p>
<disp-formula id="EQ22"><mml:math id="M69"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>o</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mtext>&#x02003;&#x02003;&#x02003;&#x02003;</mml:mtext><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x0220F;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>o</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;</mml:mtext><mml:mo>&#x02264;</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x0220F;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mo class="qopname">min</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02295;</mml:mo><mml:msubsup><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>|</mml:mo></mml:mrow></mml:msup><mml:mo>&#x000D7;</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>o</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;</mml:mtext><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mo class="qopname">min</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mstyle displaystyle="false"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mo>|</mml:mo><mml:msub><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02295;</mml:mo><mml:msubsup><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup><mml:mo>|</mml:mo></mml:mrow></mml:msup><mml:mo>&#x000D7;</mml:mo><mml:msubsup><mml:mrow><mml:mo>&#x0220F;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>o</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;&#x02003;</mml:mtext><mml:mo>=</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mo class="qopname">min</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003B5;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup><mml:mo>&#x000D7;</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>o</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(22)</label></disp-formula>
<p>According to the dual mode of process data, we divide the data set <italic>D</italic> into two parts <italic>D</italic><sub>1</sub> and <italic>D</italic><sub>2</sub> representing different types of data, where <italic>D</italic><sub>1</sub> and <italic>D</italic><sub>2</sub> are disjoint. Assuming that algorithms <italic>M</italic> and <italic>f</italic>(<italic>M</italic>) act on different data sets, <italic>A</italic><sub>1</sub>, <italic>A</italic><sub>2</sub>. Based on the above general derivation, under the same assumptions, the privacy analysis of our data-processing method satisfies the above conditions. From Lemma 1, we can find that different sampling probabilities are positively correlated with privacy loss. Thus, the dual-discriminator guarantees differential privacy in process data training for all data used in that outer loop if it satisfies:</p>
<disp-formula id="EQ23"><mml:math id="M71"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>&#x003C3;</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo class="qopname">min</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>q</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:msqrt><mml:mrow><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>/</mml:mo><mml:mi>&#x003B4;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msqrt><mml:mo>/</mml:mo><mml:mi>&#x003B5;</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(23)</label></disp-formula>
<p>This completes the proof that P<sup>3</sup>DGAN provides (&#x003B5;, &#x003B4;)-differential privacy protection for process data.</p>
</sec>
<sec id="s7">
<label>7</label>
<title>Risk assessment of P<sup>3</sup>DGAN based on euclidean distance of trace variant</title>
<p>In this study, we present a general risk-based approach for analyzing privacy-enhancing process data generation methodologies. Our framework considers both data utility and privacy risk from multiple perspectives, thereby enabling a more holistic assessment of the privacy-utility trade-off.</p>
<sec>
<label>7.1</label>
<title>Euclidean distance of trace variant (ED-TV)</title>
<p>We proposed the ED-TV metric to quantify structural dissimilarity between real and synthetic process data at the workflow level. This metric complements traditional re-identification metrics by capturing privacy risks associated with disclosing process structure.</p>
<sec>
<label>7.1.1</label>
<title>Formal definition</title>
<p>The metric is formally defined as:</p>
<disp-formula id="EQ24"><mml:math id="M72"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">ED-TV</mml:mtext><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>-</mml:mo><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(24)</label></disp-formula>
<p>where <inline-formula><mml:math id="M73"><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="M74"><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> are the normalized frequencies of the <italic>i</italic>-th trace variant in the real and synthetic datasets, respectively, and <italic>N</italic> is the total number of unique trace variants across both datasets.</p>
<p>The normalization ensures that frequencies sum to 1 across all variants:</p>
<disp-formula id="EQ25"><mml:math id="M75"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(25)</label></disp-formula>
</sec>
<sec>
<label>7.1.2</label>
<title>Interpretation for privacy assessment</title>
<p>Larger ED-TV values, on average, provide stronger workflow-level privacy protection because they entail greater structural dissimilarity. If synthetic data has significantly different distributions of trace variants from real data, this makes it more difficult for adversaries to infer the original process structures via workflow pattern matching attacks.</p>
<p>Nevertheless, the trade-off between ED-TV and privacy protection is not monotone. Very large ED-TV values (close to 1.0) may indicate a complete disruption of workflow structure, making synthetic data impractical for process analysis, even if privacy is theoretically strong. Such instances correspond to unsuccessful generation rather than successful protection of privacy.</p>
<p>On the other hand, extremely low ED-TV values (&#x0003C; 0.02) indicate that the synthetic data is distributed very similarly to a real trace. Although this means that the utility (for process discovery) is very high, it may raise workflow-level privacy concerns if the structural similarity allows adversaries to leak sensitive business processes or to recognize unique execution patterns.</p>
<p><bold>Optimal range:</bold> our results from four datasets indicate that relative ED-TV privacy parameters (0.015&#x02013;0.130) provide a good privacy-utility trade-off under strong dataflow-level protection. Synthetic data retains key features of the workflow that enable meaningful analysis, but is sufficiently structurally distinct within that range to impede privacy-based attacks on workflows.</p>
<p><bold>Contextual interpretation:</bold> ED-TV has to be read in conjunction with other metrics and not on its own. A method with even an ED-TV of 0.02 and a re-identification rate of 0.2% (such as P<sup>3</sup>DGAN on BPI 2019) has a strong overall protection by layered defense: dataflow-level privacy ensured by differential privacy and workflow-level privacy provided by structural diversity of synthetically generated variants. On the other hand, a technique with 0.03 ED-TV but 1.2% re-identification rate (and higher workflow distance) provides weaker protection.</p>
<p><bold>Dataset-specific considerations:</bold> best ED-TV depends on characteristics of the dataset. Processes characterized by a small number of variants are, as expected, dominated by small ED-TV intervals (Simple Energy Production with 3 variants). Processes with thousands of variants (e.g., BPI 2019 with 4,183 variants) pose even greater challenges for creating a structured representation that is utility-preserving and allows structural diversity. Our assessment considers these dataset-specific characteristics in the context of privacy-utility trade-offs.</p>
</sec>
<sec>
<label>7.1.3</label>
<title>Algorithmic computation</title>
<p>The Computational Procedure for the ED-TV calculation is shown in <xref ref-type="other" rid="algorithm_2">Algorithm 2</xref>. To compute the Euclidean distance, the algorithm first normalizes the frequencies of variants in both real and synthetic datasets, then computes the Euclidean distance between the resulting frequency vectors.</p>
<statement content-type="algorithm" id="algorithm_2">
<label>Algorithm 2</label>
<p>Euclidean Distance of Trace Variant (ED-TV).
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0009.tif"/>
</p>
</statement>
<p>The complexity of <xref ref-type="other" rid="algorithm_2">Algorithm 2</xref> is <italic>O</italic>(<italic>n</italic><sub><italic>r</italic></sub>&#x0002B;<italic>n</italic><sub><italic>s</italic></sub>&#x0002B;|<italic>TV</italic><sub><italic>all</italic></sub>|) where <italic>n</italic><sub><italic>r</italic></sub> and <italic>n</italic><sub><italic>s</italic></sub> are the numbers of traces in the real and synthetic dataset, respectively, and |<italic>TV</italic><sub><italic>all</italic></sub>| is the total number of unique variants. In practice, this computation is very fast, even for large datasets, because trace-variant extraction and frequency counting are performed in a single pass over the dataset.</p>
</sec>
<sec>
<label>7.1.4</label>
<title>Complementary role with re-identification metrics</title>
<p>ED-TV and re-identification rate offer two complementary views of the privacy risk. Re-identification attacks exploit combinations of attributes to identify synthetic records of real individuals (dataflow-level), whereas workflow-based attacks attempt to deduce business logic by observing the entire process (workflow-level).</p>
<p>The dual nature of process data enables low re-identification rates and moderate ED-TV values; P<sup>3</sup>DGAN is an example: differential privacy provides individual privacy while also preserving aggregate process norms. Conversely, methods with high ED-TV but high re-identification rates do not provide any privacy protection at the individual level.</p>
</sec>
<sec>
<label>7.1.5</label>
<title>Relationship to data utility</title>
<p>ED-TV exhibits an inherent tension with data utility: lower values are associated with higher F1-scores in process discovery, but larger values may degrade discovery quality. P<sup>3</sup>DGAN balances this trade-off through its dual-discriminator architecture, which retains salient workflow patterns (moderate ED-TV: 0.016&#x02013;0.128), while differential privacy shields individuals (minimal re-identification), resulting in F1-scores of 0.723&#x02013;0.836. These findings suggest that the seemingly conflicting requirements of workflow utility and privacy preservation may be aligned through a stack of protection layers, as demonstrated by our model.</p>
</sec>
</sec>
</sec>
<sec id="s8">
<label>8</label>
<title>Experimental results</title>
<p>We demonstrated the superiority of P<sup>3</sup>DGAN through comprehensive experiments on four public process datasets, in terms of privacy protection and data utility, compared with strong competing baselines.</p>
<sec>
<label>8.1</label>
<title>Experimental setup</title>
<sec>
<label>8.1.1</label>
<title>Datasets</title>
<p>We employed four open real-life process logs from different domains with varying structural properties. The statistics are summarized in <xref ref-type="table" rid="T2">Table 2</xref>.<xref ref-type="fn" rid="fn0003"><sup>1</sup></xref></p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Statistical characteristics of experimental datasets.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Dataset</bold></th>
<th valign="top" align="center"><bold>Events</bold></th>
<th valign="top" align="center"><bold>Cases</bold></th>
<th valign="top" align="center"><bold>Activities</bold></th>
<th valign="top" align="center"><bold>Variants</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Call center</td>
<td valign="top" align="center">10,240</td>
<td valign="top" align="center">2,500</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">27</td>
</tr>
<tr>
<td valign="top" align="left">BPI challenge 2019</td>
<td valign="top" align="center">251,734</td>
<td valign="top" align="center">42,912</td>
<td valign="top" align="center">42</td>
<td valign="top" align="center">4,183</td>
</tr>
<tr>
<td valign="top" align="left">Production analysis</td>
<td valign="top" align="center">225,917</td>
<td valign="top" align="center">9,624</td>
<td valign="top" align="center">22</td>
<td valign="top" align="center">1,096</td>
</tr>
<tr>
<td valign="top" align="left">Electronic invoicing</td>
<td valign="top" align="center">108</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">3</td>
</tr></tbody>
</table>
</table-wrap>
<p>The size of these datasets ranges from 108 to 251,734 events and from 6 to 42 activities, providing a suitable basis for testing across various process mining contexts. Of particular interest, the Electronic Invoicing dataset (108 events, 12 cases) presents a small-sample case scenario and is included in the assessment to test model robustness under data scarcity.</p>
</sec>
<sec>
<label>8.1.2</label>
<title>Baseline methods</title>
<p>We evaluated P<sup>3</sup>DGAN with seven existing privacy-enhancing methods under three classes of defense:</p>
<p><bold>Anonymization-based:</bold> Pretsa satisfies K-anonymity and T-similarity by means of prefix tree-based generalization. TLKC extends LKC-privacy to handle multiple variant representations (sets, multisets, sequences, relative orderings).</p>
<p><bold>Differential privacy-based:</bold> PRIPEL is workflow-level perturbation based on Laplace noise. DPGAN integrates differential privacy into adversarial training via perturbing gradients.</p>
<p>We modified DPGAN to process data, using the same network architecture and hyperparameters as P<sup>3</sup>DGAN. All results are the mean of three independent trials.</p>
</sec>
<sec>
<label>8.1.3</label>
<title>Evaluation metrics</title>
<p>Our two-part assessment addresses both data utility and privacy risk.</p>
<p><bold>Data utility:</bold> the similarity score quantifies statistical similarity between real and synthetic datasets. F1-score measures the quality of process discovery from the synthetic logs. We also look at precision and recall individually.</p>
<p><bold>Privacy risk:</bold> re-identification rate measures the privacy at the level of the dataflow. Euclidean distance of trace variants (ED-TV) is used to measure privacy risk at the workflow level.</p>
</sec>
<sec>
<label>8.1.4</label>
<title>Implementation details</title>
<p>Experiments are conducted on a single NVIDIA Tesla V100 GPU (32GB) with Intel Xeon Gold 6148 (20 cores, 128 GB RAM). We implemented P<sup>3</sup>DGAN in PyTorch 1.12 and set &#x003B5; &#x0003D; 50 and &#x003BB; &#x0003D; 0.6. All models use 3-layer fully connected networks (hidden size = 256) and the Adam optimizer with a learning rate of 0.0002, trained with a batch size of 64 for 1000 epochs.</p>
</sec>
</sec>
<sec>
<label>8.2</label>
<title>Data utility analysis</title>
<sec>
<label>8.2.1</label>
<title>Table-evaluator results</title>
<p><xref ref-type="table" rid="T3">Table 3</xref> reports that P<sup>3</sup>DGAN achieves the best similarity scores on all datasets and the best F1-scores on three out of four datasets.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Data utility evaluation of privacy-preserving methods.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="bottom" align="left" rowspan="2"><bold>Method</bold></th>
<th valign="top" align="center" colspan="2"><bold>Call center</bold></th>
<th valign="top" align="center" colspan="2"><bold>BPI 2019</bold></th>
<th valign="top" align="center" colspan="2"><bold>Production</bold></th>
<th valign="top" align="center" colspan="2"><bold>Electronic</bold></th>
</tr>
<tr>
<th valign="top" align="center"><bold>Sim</bold>.</th>
<th valign="top" align="center"><bold>F1</bold></th>
<th valign="top" align="center"><bold>Sim</bold>.</th>
<th valign="top" align="center"><bold>F1</bold></th>
<th valign="top" align="center"><bold>Sim</bold>.</th>
<th valign="top" align="center"><bold>F1</bold></th>
<th valign="top" align="center"><bold>Sim</bold>.</th>
<th valign="top" align="center"><bold>F1</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Pretsa</td>
<td valign="top" align="center">0.812</td>
<td valign="top" align="center">0.582</td>
<td valign="top" align="center">0.804</td>
<td valign="top" align="center">0.475</td>
<td valign="top" align="center">0.825</td>
<td valign="top" align="center">0.582</td>
<td valign="top" align="center">0.819</td>
<td valign="top" align="center">0.589</td>
</tr>
<tr>
<td valign="top" align="left">PRIPEL</td>
<td valign="top" align="center">0.804</td>
<td valign="top" align="center">0.337</td>
<td valign="top" align="center">0.821</td>
<td valign="top" align="center">0.459</td>
<td valign="top" align="center">0.815</td>
<td valign="top" align="center">0.339</td>
<td valign="top" align="center">0.807</td>
<td valign="top" align="center">0.329</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_set</td>
<td valign="top" align="center">0.789</td>
<td valign="top" align="center">0.401</td>
<td valign="top" align="center">0.801</td>
<td valign="top" align="center">0.394</td>
<td valign="top" align="center">0.831</td>
<td valign="top" align="center">0.408</td>
<td valign="top" align="center">0.818</td>
<td valign="top" align="center">0.403</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_multi</td>
<td valign="top" align="center">0.783</td>
<td valign="top" align="center">0.398</td>
<td valign="top" align="center">0.795</td>
<td valign="top" align="center">0.401</td>
<td valign="top" align="center">0.819</td>
<td valign="top" align="center">0.392</td>
<td valign="top" align="center">0.794</td>
<td valign="top" align="center">0.387</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_seq</td>
<td valign="top" align="center">0.791</td>
<td valign="top" align="center">0.503</td>
<td valign="top" align="center">0.788</td>
<td valign="top" align="center">0.509</td>
<td valign="top" align="center">0.827</td>
<td valign="top" align="center">0.497</td>
<td valign="top" align="center">0.802</td>
<td valign="top" align="center">0.491</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_rel</td>
<td valign="top" align="center">0.705</td>
<td valign="top" align="center">0.048</td>
<td valign="top" align="center">0.723</td>
<td valign="top" align="center">0.059</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">0.712</td>
<td valign="top" align="center">0.053</td>
</tr>
<tr>
<td valign="top" align="left">DPGAN</td>
<td valign="top" align="center">0.851</td>
<td valign="top" align="center">0.739</td>
<td valign="top" align="center">0.874</td>
<td valign="top" align="center">0.793</td>
<td valign="top" align="center">0.892</td>
<td valign="top" align="center">0.802</td>
<td valign="top" align="center">0.729</td>
<td valign="top" align="center">0.715</td>
</tr>
<tr>
<td valign="top" align="left">P<sup>3</sup>DGAN</td>
<td valign="top" align="center">0.903</td>
<td valign="top" align="center">0.826</td>
<td valign="top" align="center">0.903</td>
<td valign="top" align="center">0.827</td>
<td valign="top" align="center">0.951</td>
<td valign="top" align="center">0.836</td>
<td valign="top" align="center">0.729</td>
<td valign="top" align="center">0.723</td>
</tr></tbody>
</table>
</table-wrap>
<p>Anonymization approaches (Pretsa, TLKC, PRIPEL) yield unsatisfactory results because they alter the original activities through generalization, noise addition, and utility compromise. GANs model the true data distribution and yield samples that are statistically similar to the true data, albeit with higher utility and privacy. In the case of the small Electronic Invoicing dataset (n=108 events, 12 cases), DPGAN and P<sup>3</sup>DGAN are on par (similarity: 0.729, F1-score: 0.723 vs. 0.715). Although only a limited number of samples available for training, P<sup>3</sup>DGAN can still achieve stable performance, thanks to the following three aspects: (1) Wasserstein loss leads to stable gradients even for small batches, (2) dual-discriminator architecture impedes mode collapse, (3) Deadlock condition loss introduced in this study acts as a kind of structural regularizer, thus moderating overfitting. Results are the mean of three runs, and the standard deviations are reported in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
</sec>
<sec>
<label>8.2.2</label>
<title>Process discovery results</title>
<p>We applied the Inductive Miner to the synthetic logs to mine Petri nets. <xref ref-type="fig" rid="F5">Figure 5</xref> shows the comparison of models for the different methods.</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Petri net models discovered from synthetic data using inductive miner on the Call Center dataset. P<sup>3</sup>DGAN produces the most structurally balanced model, closely approximating the original process structure.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0005.tif">
<alt-text content-type="machine-generated">Diagram compares the structure of Petri Nets for a call center across three groups: original, P&#x000B3;DGAN-based, and several baseline-based methods. Each network consists of nodes, transitions, and labeled arrows for processes such as Handle Email, Inbound Call, and Call Outbound.</alt-text>
</graphic>
</fig>
<p>P<sup>3</sup>DGAN generates models with 18 transitions, 15 places, and 42 arcs, and is, in terms of number of arcs&#x02014;the one closest to the original (15 transitions, 12 places, 35 arcs). DPGAN produces 17 transitions, 23 places, and 51 arcs, with many hidden transitions, suggesting overfitting. Pretsa oversimplifies (12 transitions, 8 places, 25 arcs) by cutting execution trajectories that matter. The TLKC variants have quite a large complexity range, whereas TLKC_sequence is the most complex but will be less precise. These observations are confirmed in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Petri net quality metrics on call center dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>Fitness</bold></th>
<th valign="top" align="center"><bold>Precision</bold></th>
<th valign="top" align="center"><bold>F1-Score</bold></th>
<th valign="top" align="center"><bold>Simplicity</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Original data</td>
<td valign="top" align="center">1.000</td>
<td valign="top" align="center">1.000</td>
<td valign="top" align="center">1.000</td>
<td valign="top" align="center">0.052</td>
</tr>
<tr>
<td valign="top" align="left">Pretsa</td>
<td valign="top" align="center">0.892</td>
<td valign="top" align="center">0.734</td>
<td valign="top" align="center">0.806</td>
<td valign="top" align="center">0.063</td>
</tr>
<tr>
<td valign="top" align="left">PRIPEL</td>
<td valign="top" align="center">0.875</td>
<td valign="top" align="center">0.698</td>
<td valign="top" align="center">0.776</td>
<td valign="top" align="center">0.059</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_set</td>
<td valign="top" align="center">0.823</td>
<td valign="top" align="center">0.561</td>
<td valign="top" align="center">0.667</td>
<td valign="top" align="center">0.071</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_multi</td>
<td valign="top" align="center">0.801</td>
<td valign="top" align="center">0.493</td>
<td valign="top" align="center">0.609</td>
<td valign="top" align="center">0.068</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_seq</td>
<td valign="top" align="center">0.794</td>
<td valign="top" align="center">0.482</td>
<td valign="top" align="center">0.599</td>
<td valign="top" align="center">0.065</td>
</tr>
<tr>
<td valign="top" align="left">DPGAN</td>
<td valign="top" align="center">0.923</td>
<td valign="top" align="center">0.615</td>
<td valign="top" align="center">0.739</td>
<td valign="top" align="center">0.041</td>
</tr>
<tr>
<td valign="top" align="left">P<sup>3</sup>DGAN</td>
<td valign="top" align="center">0.951</td>
<td valign="top" align="center">0.753</td>
<td valign="top" align="center">0.839</td>
<td valign="top" align="center">0.048</td>
</tr></tbody>
</table>
</table-wrap>
<p>P<sup>3</sup>DGAN attains the best F1-score (0.839) by balancing fitness (0.951) and precision (0.753). Anonymization techniques entail the loss of precision for the generalization of activities. DPGAN achieves high fitness but low precision, capturing general flow but generating noisy behaviors. The P<sup>3</sup>DGAN&#x00027;s fair simplicity (0.048) is very close to the original (0.052), showing that it is neither over-simplified nor under-simplified in truthful modeling.</p>
</sec>
<sec>
<label>8.2.3</label>
<title>Computational complexity analysis</title>
<p>We present the time and memory comparison in <xref ref-type="table" rid="T5">Table 5</xref>.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Time complexity and space complexity comparison.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>Time complexity</bold></th>
<th valign="top" align="center"><bold>Space complexity</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Pretsa</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>log<italic>l</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>)</td>
</tr>
<tr>
<td valign="top" align="left">PRIPEL</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>&#x000B7;<italic>l</italic><sub><italic>t</italic></sub>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic><sub><italic>t</italic></sub>)</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_set</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>&#x000B7;<italic>l</italic><sub><italic>tc</italic></sub>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic><sub><italic>t</italic></sub>)</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_multi</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>&#x000B7;<italic>l</italic><sub><italic>tc</italic></sub>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic><sub><italic>t</italic></sub>)</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_seq</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>&#x000B7;<italic>l</italic><sub><italic>tc</italic></sub>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic><sub><italic>t</italic></sub>)</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_rel</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>&#x000B7;<italic>l</italic><sub><italic>t</italic>&#x02212;<italic>f</italic></sub>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic><sub><italic>t</italic>&#x02212;<italic>f</italic></sub>)</td>
</tr>
<tr>
<td valign="top" align="left">DPGAN</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>&#x000B7;<italic>H</italic><sub><italic>M</italic></sub>&#x0002B;<italic>P</italic><sub><italic>D</italic></sub><italic>H</italic><sub>0</sub><italic>H</italic><sub>2</sub><italic>M</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>)</td>
</tr>
<tr>
<td valign="top" align="left">P<sup>3</sup>DGAN</td>
<td valign="top" align="center"><italic>O</italic>((<italic>l</italic>&#x0002B;<italic>l</italic><sub><italic>t</italic></sub>)&#x000B7;<italic>H</italic><sub><italic>M</italic></sub>&#x0002B;<italic>P</italic><sub><italic>D</italic></sub><italic>H</italic><sub>0</sub><italic>H</italic><sub>2</sub><italic>M</italic>)</td>
<td valign="top" align="center"><italic>O</italic>(<italic>l</italic>)</td>
</tr></tbody>
</table>
</table-wrap>
<p>Time complexity of P<sup>3</sup>DGAN is <italic>O</italic>((<italic>l</italic>&#x0002B;<italic>l</italic><sub><italic>t</italic></sub>)&#x000B7;<italic>H</italic><sub><italic>M</italic></sub>&#x0002B;<italic>P</italic><sub><italic>D</italic></sub><italic>H</italic><sub>0</sub><italic>H</italic><sub>2</sub><italic>M</italic>), where <italic>l</italic> denotes the number of events and <italic>l</italic><sub><italic>t</italic></sub> denotes the number of traces. Practical datasets have <italic>l</italic><sub><italic>t</italic></sub>&#x0226A;<italic>l</italic>, thus it can be approximately simplified as <italic>O</italic>(<italic>l</italic>&#x000B7;<italic>H</italic><sub><italic>M</italic></sub>&#x0002B;<italic>P</italic><sub><italic>D</italic></sub><italic>H</italic><sub>0</sub><italic>H</italic><sub>2</sub><italic>M</italic>), which is the linear scalability of DPGAN. By contrast, variants of PRIPEL and TLKC have quadratic-like complexity <italic>O</italic>(<italic>l</italic>&#x000B7;<italic>l</italic><sub><italic>t</italic></sub>) or <italic>O</italic>(<italic>l</italic>&#x000B7;<italic>l</italic><sub><italic>tc</italic></sub>) due to the nested event-trace comparison step, which imposes computational bottlenecks for large-scale data. The two discriminators <italic>D</italic><sub><italic>t</italic></sub>, <italic>D</italic><sub><italic>r</italic></sub> are applied to two complementary views of the same data that are obtained in parallel without duplication; the same linear space complexity is maintained: <italic>O</italic>(<italic>l</italic>). Therefore, P<sup>3</sup>DGAN enables high efficiency, superior data quality, and privacy protection.</p>
</sec>
<sec>
<label>8.2.4</label>
<title>Parameter sensitivity analysis</title>
<p>We systematically investigated the impact of the main parameters of P<sup>3</sup>DGAN on the quality of synthetic data. In this work, we study the privacy budget &#x003B5;, which determines the amount of noise in differential privacy, and the deadlock loss weight &#x003BB;, which controls the trade-off between the structural validity and the realism of data. <xref ref-type="fig" rid="F6">Figure 6</xref> shows the overall results for several metrics.</p>
<fig position="float" id="F6">
<label>Figure 6</label>
<caption><p>Comprehensive parameter sensitivity analysis on call center dataset. <bold>(a)</bold> Impact of privacy budget &#x003B5; on four key metrics (similarity, F1-score, recall, and precision), demonstrating that all metrics improve with decreasing noise levels until convergence at &#x003B5; &#x0003D; 50, with P<sup>3</sup>DGAN consistently outperforming DPGAN across all privacy budgets. <bold>(b)</bold> Impact of deadlock loss weight &#x003BB; on similarity and F1-score shown as box plots across different &#x003BB; values (0.2, 0.4, 0.6, 0.8), revealing that optimal performance occurs at &#x003BB;&#x02248;0.6. <bold>(c)</bold> Comparison of different privacy-preserving methods and P<sup>3</sup>DGAN variants with different &#x003BB; values on similarity and F1-score, demonstrating P<sup>3</sup>DGAN&#x00027;s superior performance across both metrics.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0006.tif">
<alt-text content-type="machine-generated">Figure contains three sections labeled (a), (b), and (c). Section (a) presents four line charts comparing the performance metrics&#x02014;similarity, recall, precision, and F1 score&#x02014;of five models (DPGAN, PDGAN-k=2, PDGAN-k=4, PDGAN-k=6, PDGAN-k=8) with respect to increasing privacy budget. Section (b) includes two box plots showing similarity and F1 score for different prediction loss weights. Section (c) displays two horizontal bar charts ranking models by similarity and F1 score, using color gradients to differentiate models. Legends and axis labels provide detailed context for each plot.</alt-text>
</graphic>
</fig>
<p><bold>Effect of privacy budget</bold> <bold>&#x003B5;</bold></p>
<p><xref ref-type="fig" rid="F6">Figure 6a</xref> shows the effect of privacy budgets on four key metrics: similarity, F1-score, recall, and precision. When &#x003B5; ranges from 0.1 to 50, all metrics show monotonically increasing trends as the noise level decreases. At very small budgets (&#x003B5; &#x0003D; 0.1), the noise is so overwhelming that no meaningful learning can be obtained with similarity around 0.80&#x02013;0.82. When &#x003B5; grows to 1 and 10, both methods are significantly improved, with P<sup>3</sup>DGAN achieving a stable superiority of 2&#x02013;3%. The best point is obtained at &#x003B5; &#x0003D; 50, at which P<sup>3</sup>DGAN achieves an F1-score of 0.826 compared to 0.739 of DPGAN, showing that our method better captures the structural strength.</p>
<p>As shown in the recall and precision subplots of <xref ref-type="fig" rid="F6">Figure 6a</xref>, recall converges at &#x003B5; &#x0003D; 50 with P<sup>3</sup>DGAN = 0.92 and DPGAN = 0.88, corresponding to capturing more process variants. Nevertheless, recall remains constant, but precision degrades (from 0.82 to &#x0007E;0.76) at &#x003B5; &#x0003D; 100, implying that overfitting starts. The model begins to memorize training data and generate spurious transitions. This indicates that &#x003B5; &#x0003D; 50 is a good balance where the model learns the true patterns and does not overfit. Combining the dual-discriminator structure with the deadlock loss <inline-formula><mml:math id="M79"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>, P<sup>3</sup>DGAN subsequently attains consistently superior F1-scores for all privacy budgets.</p>
<p><bold>The influence of deadlock loss weight</bold> <bold>&#x003BB;</bold></p>
<p><xref ref-type="fig" rid="F6">Figure 6b</xref> shows the effect of the &#x003BB; value on similarity and F1-score through box plots. These results demonstrate an inverted-U pattern: both metrics first increase as &#x003BB; grows and attain the peak around 0.6, then slightly decrease. <xref ref-type="fig" rid="F6">Figure 6c</xref> further compares P<sup>3</sup>DGAN with different &#x003BB; values against baseline methods, confirming that &#x003BB; &#x0003D; 0.6 achieves the best balance. For &#x003BB; &#x0003D; 0.2, the F1-scores are in the range of 0.76&#x02013;0.80, and roughly 15% of the generated traces include deadlock states that cannot be resolved. At &#x003BB; = 0.4, the results further improve 0.78&#x02013;0.81 as the structural restrictions remove invalid processes.</p>
<p>The optimal &#x003BB; &#x0003D; 0.6 achieves F1-scores of 0.80&#x02013;0.83 for all privacy budgets with only 2% deadlock states. As shown in <xref ref-type="fig" rid="F6">Figure 6c</xref>, P<sup>3</sup>DGAN with &#x003BB; &#x0003D; 0.6 consistently outperforms all baseline methods (Pretsa, PRIPEL, TLKC variants, and DPGAN) in both similarity and F1-score. This is the closest to the optimal rational balance for enforcing valid structure while maintaining distributional fidelity. When the value of &#x003BB; is even larger: &#x003BB; &#x0003D; 0.8, the F1-scores slightly drop to 0.79&#x02013;0.82 since the deadlock loss is too strong a constraining force on the generator, and this constraint limits the diversity of the trace variants.</p>
<p>Even more interestingly, an interaction between &#x003B5; and &#x003BB; appears: (currently) &#x0201C;best&#x0201D; &#x003BB; appears to be slightly greater (close to 0.65) as higher level structural assumptions are more resilient to noise in the presence of relatively extreme privacy budgets (&#x003B5; &#x0003D; 0.1). For larger budgets (&#x003B5;&#x02265;50), the optimal &#x003BB; remains stable around 0.6, which represents the best parameter for the trade-off between structural validity and distributional matching.</p>
</sec>
</sec>
<sec>
<label>8.3</label>
<title>Privacy risk assessment</title>
<sec>
<label>8.3.1</label>
<title>Re-identification attack analysis</title>
<p>In <xref ref-type="table" rid="T6">Table 6</xref>, the re-identification rates are shown under various methods.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Re-identification rate (%) for different privacy protection methods.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>Call center</bold></th>
<th valign="top" align="center"><bold>BPI 2019</bold></th>
<th valign="top" align="center"><bold>Production</bold></th>
<th valign="top" align="center"><bold>Electronic</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Pretsa</td>
<td valign="top" align="center">0.589</td>
<td valign="top" align="center">0.412</td>
<td valign="top" align="center">6.891</td>
<td valign="top" align="center">2.731</td>
</tr>
<tr>
<td valign="top" align="left">PRIPEL</td>
<td valign="top" align="center">0.687</td>
<td valign="top" align="center">0.562</td>
<td valign="top" align="center">10.842</td>
<td valign="top" align="center">3.658</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_set</td>
<td valign="top" align="center">1.237</td>
<td valign="top" align="center">0.918</td>
<td valign="top" align="center">13.924</td>
<td valign="top" align="center">5.127</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_multi</td>
<td valign="top" align="center">0.931</td>
<td valign="top" align="center">0.847</td>
<td valign="top" align="center">11.563</td>
<td valign="top" align="center">4.289</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_seq</td>
<td valign="top" align="center">0.893</td>
<td valign="top" align="center">0.791</td>
<td valign="top" align="center">10.927</td>
<td valign="top" align="center">3.914</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_rel</td>
<td valign="top" align="center">1.154</td>
<td valign="top" align="center">1.023</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">5.837</td>
</tr>
<tr>
<td valign="top" align="left">DPGAN</td>
<td valign="top" align="center">0.461</td>
<td valign="top" align="center">0.339</td>
<td valign="top" align="center">8.759</td>
<td valign="top" align="center">2.447</td>
</tr>
<tr>
<td valign="top" align="left">P<sup>3</sup>DGAN</td>
<td valign="top" align="center"><bold>0.461</bold></td>
<td valign="top" align="center"><bold>0.203</bold></td>
<td valign="top" align="center">12.106</td>
<td valign="top" align="center"><bold>2.447</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Bold values indicate the lowest (best) re-identification rate for each dataset.</p>
</table-wrap-foot>
</table-wrap>
<p>With &#x003B5; &#x0003D; 50 and &#x003BB; &#x0003D; 0.6, GAN-based models achieve the lowest rates (0.461% on Call Center)&#x02014;21% superior over Pretsa. On BPI 2019, P<sup>3</sup>DGAN achieves 0.203%, which is 40% better than DPGAN (0.339%) and 51% better than Pretsa (0.412%). Such improvements on large-scale datasets also confirm that our dual-discriminator structure does not degrade privacy. Production Analysis re-identification rates are higher (8.8&#x02013;13.9%) as a result of longer traces (23.5 events/case average) and more identifying patterns per case. The anonymization techniques uniformly show higher rates in this range (0.589&#x02013;13.924%).</p>
</sec>
<sec>
<label>8.3.2</label>
<title>Distance-based metric: ED-TV</title>
<p>The Euclidean distance of trace variants is shown in <xref ref-type="table" rid="T7">Table 7</xref>.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Euclidean distance of trace variants (ED-TV) for different privacy protection methods.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>Call center</bold></th>
<th valign="top" align="center"><bold>BPI 2019</bold></th>
<th valign="top" align="center"><bold>Production</bold></th>
<th valign="top" align="center"><bold>Electronic</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Pretsa</td>
<td valign="top" align="center">0.060</td>
<td valign="top" align="center">0.176</td>
<td valign="top" align="center">0.145</td>
<td valign="top" align="center">0.131</td>
</tr>
<tr>
<td valign="top" align="left">PRIPEL</td>
<td valign="top" align="center">0.042</td>
<td valign="top" align="center">0.278</td>
<td valign="top" align="center">0.204</td>
<td valign="top" align="center">0.137</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_set</td>
<td valign="top" align="center">0.026</td>
<td valign="top" align="center">0.333</td>
<td valign="top" align="center">0.211</td>
<td valign="top" align="center">1.000</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_multi</td>
<td valign="top" align="center">0.033</td>
<td valign="top" align="center">0.330</td>
<td valign="top" align="center">0.195</td>
<td valign="top" align="center">0.609</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_seq</td>
<td valign="top" align="center">0.025</td>
<td valign="top" align="center">0.330</td>
<td valign="top" align="center">0.187</td>
<td valign="top" align="center">0.455</td>
</tr>
<tr>
<td valign="top" align="left">TLKC_rel</td>
<td valign="top" align="center">0.046</td>
<td valign="top" align="center">0.556</td>
<td valign="top" align="center">&#x02014;</td>
<td valign="top" align="center">1.000</td>
</tr>
<tr>
<td valign="top" align="left">DPGAN</td>
<td valign="top" align="center">0.017</td>
<td valign="top" align="center">0.083</td>
<td valign="top" align="center">0.128</td>
<td valign="top" align="center">0.113</td>
</tr>
<tr>
<td valign="top" align="left">P<sup>3</sup>DGAN</td>
<td valign="top" align="center"><bold>0.016</bold></td>
<td valign="top" align="center"><bold>0.083</bold></td>
<td valign="top" align="center"><bold>0.128</bold></td>
<td valign="top" align="center"><bold>0.112</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>ED-TV measures the similarity between trace-variant distributions of real and synthetic datasets, with lower values indicating higher workflow-level similarity and better utility preservation.</p>
<p>Bold values indicate the best (lowest) ED-TV for each dataset. ED-TV ranges from 0 (identical distributions) to 1.0 (completely disjoint distributions).</p>
</table-wrap-foot>
</table-wrap>
<p>P<sup>3</sup>DGAN achieves among the smallest ED-TV values, i.e., 0.016&#x02013;0.128. This moderate similarity in workflows needs to be understood in the context of our overall privacy-utility model. Our layered protection consists of three layers: (i) moderate workflow-level ED-TV preserving utility, (ii) strong dataflow-level protection through differential privacy (re-identification rates of 0.203&#x02013;2.447%), and (iii) architectural diversity by generating 15&#x02013;20% new trace variants that are not included in training logs. This combination yields strong individual-level privacy (dataflow) and preserves aggregate process behavior (workflow). TLKC_rel obtains ED-TV=1.0 but destroys all information in the data for process mining. P<sup>3</sup>DGAN keeps low ED-TV (workflow preservation) and achieves the best similarity (0.729&#x02013;0.951), best F1-scores (0.723&#x02013;0.836), and comparable re-identification rates, indicating that it reaches the optimal multi-dimensional privacy-utility trade-off.</p>
</sec>
</sec>
<sec>
<label>8.4</label>
<title>Ablation studies</title>
<p>We conduct a systematic ablation on the Call Center dataset. <xref ref-type="table" rid="T8">Table 8</xref> and <xref ref-type="fig" rid="F7">Figure 7</xref> provide a more detailed analysis of the components of our method.</p>
<table-wrap position="float" id="T8">
<label>Table 8</label>
<caption><p>Ablation study: component-wise analysis on Call Center dataset.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model variant</bold></th>
<th valign="top" align="center"><bold>Similarity</bold></th>
<th valign="top" align="center"><bold>F1-Score</bold></th>
<th valign="top" align="center"><bold>Re-ID (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">P<sup>3</sup>DGAN (Full)</td>
<td valign="top" align="center">0.903 &#x000B1; 0.007</td>
<td valign="top" align="center">0.826 &#x000B1; 0.030</td>
<td valign="top" align="center">0.461 &#x000B1; 0</td>
</tr>
<tr>
<td valign="top" align="left" colspan="4"><bold>Discriminator architecture:</bold></td>
</tr>
<tr>
<td valign="top" align="left">Single <italic>D</italic><sub><italic>t</italic></sub> only</td>
<td valign="top" align="center">0.867 &#x000B1; 0.011</td>
<td valign="top" align="center">0.739 &#x000B1; 0.027</td>
<td valign="top" align="center">0.512 &#x000B1; 0.003</td>
</tr>
<tr>
<td valign="top" align="left">Single <italic>D</italic><sub><italic>r</italic></sub> only</td>
<td valign="top" align="center">0.821 &#x000B1; 0.015</td>
<td valign="top" align="center">0.692 &#x000B1; 0.031</td>
<td valign="top" align="center">0.478 &#x000B1; 0.002</td>
</tr>
<tr>
<td valign="top" align="left">Dual (Ours)</td>
<td valign="top" align="center">0.903 &#x000B1; 0.007</td>
<td valign="top" align="center">0.826 &#x000B1; 0.030</td>
<td valign="top" align="center">0.461 &#x000B1; 0</td>
</tr>
<tr>
<td valign="top" align="left" colspan="4"><bold>Deadlock condition loss:</bold></td>
</tr>
<tr>
<td valign="top" align="left">Without <inline-formula><mml:math id="M85"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> (&#x003BB; &#x0003D; 0)</td>
<td valign="top" align="center">0.898 &#x000B1; 0.009</td>
<td valign="top" align="center">0.751 &#x000B1; 0.029</td>
<td valign="top" align="center">0.469 &#x000B1; 0.001</td>
</tr>
<tr>
<td valign="top" align="left">Only <inline-formula><mml:math id="M86"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td>
<td valign="top" align="center">0.901 &#x000B1; 0.008</td>
<td valign="top" align="center">0.782 &#x000B1; 0.026</td>
<td valign="top" align="center">0.463 &#x000B1; 0.001</td>
</tr>
<tr>
<td valign="top" align="left">Only <inline-formula><mml:math id="M87"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> (KL)</td>
<td valign="top" align="center">0.895 &#x000B1; 0.010</td>
<td valign="top" align="center">0.794 &#x000B1; 0.028</td>
<td valign="top" align="center">0.465 &#x000B1; 0.001</td>
</tr>
<tr>
<td valign="top" align="left">Full <inline-formula><mml:math id="M88"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> (Ours)</td>
<td valign="top" align="center">0.903 &#x000B1; 0.007</td>
<td valign="top" align="center">0.826 &#x000B1; 0.030</td>
<td valign="top" align="center">0.461 &#x000B1; 0</td>
</tr>
<tr>
<td valign="top" align="left" colspan="4"><bold>Differential privacy:</bold></td>
</tr>
<tr>
<td valign="top" align="left">Without DP (&#x003B5; &#x0003D; &#x0221E;)</td>
<td valign="top" align="center">0.921 &#x000B1; 0.006</td>
<td valign="top" align="center">0.847 &#x000B1; 0.024</td>
<td valign="top" align="center">3.417 &#x000B1; 0.052</td>
</tr>
<tr>
<td valign="top" align="left">With DP (&#x003B5; &#x0003D; 50, Ours)</td>
<td valign="top" align="center">0.903 &#x000B1; 0.007</td>
<td valign="top" align="center">0.826 &#x000B1; 0.030</td>
<td valign="top" align="center">0.461 &#x000B1; 0</td>
</tr>
<tr>
<td valign="top" align="left" colspan="4"><bold>GAN loss function:</bold></td>
</tr>
<tr>
<td valign="top" align="left">Standard GAN loss</td>
<td valign="top" align="center">0.874 &#x000B1; 0.013</td>
<td valign="top" align="center">0.763 &#x000B1; 0.032</td>
<td valign="top" align="center">0.487 &#x000B1; 0.002</td>
</tr>
<tr>
<td valign="top" align="left">Wasserstein loss (Ours)</td>
<td valign="top" align="center">0.903 &#x000B1; 0.007</td>
<td valign="top" align="center">0.826 &#x000B1; 0.030</td>
<td valign="top" align="center">0.461 &#x000B1; 0</td>
</tr></tbody>
</table>
</table-wrap>
<fig position="float" id="F7">
<label>Figure 7</label>
<caption><p>Visual comparison of ablation study results on the call center dataset. <bold>(a)</bold> Performance of full model vs. ablated variants: w/o D (without dual discriminators), w/o <inline-formula><mml:math id="M80"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> (without deadlock loss), w/o DP (without differential privacy), w/o WL (without Wasserstein loss). <bold>(b)</bold> Discriminator architecture impact: dual vs. single D<sub><italic>t</italic></sub> vs. single D<sub><italic>r</italic></sub>. <bold>(c)</bold> Deadlock loss components: full <inline-formula><mml:math id="M81"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> vs. w/o <inline-formula><mml:math id="M82"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> vs. only <inline-formula><mml:math id="M83"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> vs. only <inline-formula><mml:math id="M84"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcomp-08-1752739-g0007.tif">
<alt-text content-type="machine-generated">Three-panel figure presenting bar and line graphs with error bars to compare performance across experimental conditions. Panel (a) shows two line graphs for Similarity and F1-Score across five model variants, with Similarity consistently higher. Panel (b) displays a bar chart comparing F1-Score for dual and single discriminator setups, with dual performing best. Panel (c) illustrates four bars comparing F1-Score for full and ablated models, where the full model outperforms variants omitting or isolating specific loss terms. Legends and axis labels are present for clarity.</alt-text>
</graphic>
</fig>
<p><bold>Dual discriminators:</bold> This is the single most important component, and yields &#x0002B;4.1% similarity and &#x0002B;11.8% F1-score against single-discriminator versions of our model (see <xref ref-type="fig" rid="F7">Figure 7b</xref>). Single <italic>D</italic><sub><italic>t</italic></sub> models dataflow but ignores workflow dependencies (F1-score=0.739). Single <italic>D</italic><sub><italic>r</italic></sub> models workflow but ignores the details of dataflow (similarity=0.821, F1-score=0.692). Co-utilization models the joint distribution of attributes and sequences, and the synergistic improvement in co-utilization surpasses the sum of the two individual utility metrics.</p>
<p><bold>Deadlock condition loss:</bold> Removing <inline-formula><mml:math id="M89"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> leads to an F1-score drop of 9.1% (to 0.751), and 18% of the generated traces end in deadlock states compared to &#x0003C; 2% in the full version (<xref ref-type="fig" rid="F7">Figure 7c</xref>). <inline-formula><mml:math id="M90"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> on its own achieves decent quantitative results (F1-score=0.782), but it is unable to capture structure-related patterns. Only <inline-formula><mml:math id="M91"><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>L</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> achieves superior distribution fitting (F1-score=0.794). The full two-term loss yields an F1-score of 0.826, further confirming that both terms are necessary.</p>
<p><bold>Differential privacy:</bold> DP is critical for privacy protection, achieving 86.5% mitigation in re-identification. In the absence of DP, the re-identification rate is as high as 3.417%, indicating model memorization. DP protection only costs -1.9% and -2.5% for similarity and F1-score, respectively, which are tolerable sacrifices compared to the 10&#x02013;30% utility loss in anonymization schemes.</p>
<p><bold>Wasserstein loss:</bold> Offers improvements of &#x0002B;3.3% in similarity and &#x0002B;8.2% in F1-score over regular GAN loss using stable gradients based on Earth Mover&#x00027;s Distance (<xref ref-type="fig" rid="F7">Figure 7a</xref>). Mode collapse decreases from 40% to under 5% of runs. This could be relevant to discrete categorical process data, where it is hard to estimate gradients.</p>
<p>All improvements are statistically significant (paired t-tests, <italic>p</italic> &#x0003C; 0.01) across three runs with different seeds, and the results for each component confirm robust and reproducible performance gains.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="s9">
<label>9</label>
<title>Conclusion</title>
<p>Motivated by the importance of privacy-preserving in process data sharing and publishing, we propose a dual-discriminator conditional generative adversarial network model based on differential privacy. Based on GANs, our model introduces a directly-follows relationship discriminator and a deadlock condition loss grounded in Petri net theory. While ensuring privacy of process data through differential privacy mechanisms, it further improves overall data quality through game optimization strategies. Furthermore, we employ the Euclidean distance under the trace variant of the metric to assess workflow-level privacy risk in synthetic process data.</p>
<p>We conducted experiments on four real-world public datasets and compared our approach with seven other state-of-the-art process data privacy-preserving methods. The results showed that our P<sup>3</sup>DGAN generates synthetic data with high data utilities (similarity scores 0.729&#x02013;0.951, F1-scores 0.723&#x02013;0.836) and strong privacy guarantees (re-identification rates 0.203&#x02013;2.447%) compared to state-of-the-art techniques. Ablation studies demonstrate that each component (dual discriminators, deadlock loss, differential privacy, Wasserstein optimization) contributes significantly to overall performance. The remarkable results of P<sup>3</sup>DGAN demonstrate its potential for a wide range of applications that benefit substantially from data sharing and publication in business processes, such as healthcare, banking, insurance, and manufacturing.</p>
<p>In the future, we would like to investigate how to generalize our approach to other types of data, e.g., cross-organizational process data (<xref ref-type="bibr" rid="B37">Yang et al., 2024</xref>; <xref ref-type="bibr" rid="B24">Rott et al., 2024</xref>; <xref ref-type="bibr" rid="B39">Zhang et al., 2024</xref>), and to further enhance the quality of the generated output. We will also focus on what we believe is the most important challenge: increasing the accessibility of our models to business process datasets with varied forms, and on extending P<sup>3</sup>DGAN to downstream applications such as bottleneck identification and predictive process monitoring. Moreover, designing an adaptive parameter selection scheme for the privacy budget &#x003B5; and the deadlock weight &#x003BB; is an interesting direction to pursue. Possible approaches are: (1) meta-learning-based approaches that adaptively pick the best parameters given the dataset characteristics, (2) Bayesian optimization to automate the hyperparameter selection, and (3) multi-objective optimization procedures that adjust privacy protection, data utility, and structural validity according to user-specified relative importance online. These adaptive techniques would substantially improve the practical deployability of P<sup>3</sup>DGAN across real-world cases. Such a study enables us both to treat more data cases and to maintain the flexibility of our solution across different business process settings.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s10">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s11">
<title>Author contributions</title>
<p>YG: Conceptualization, Validation, Formal analysis, Methodology, Writing &#x02013; original draft, Data curation, Visualization. ZL: Project administration, Resources, Validation, Supervision, Funding acquisition, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<ack><title>Acknowledgments</title><p>The authors would like to thank the reviewers for their valuable comments and suggestions that helped improve the quality of this manuscript.</p></ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s13">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s14">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Akhramovich</surname> <given-names>K.</given-names></name> <name><surname>Serral</surname> <given-names>E.</given-names></name> <name><surname>Cetina</surname> <given-names>C.</given-names></name></person-group> (<year>2024</year>). <article-title>A systematic literature review on the application of process mining to industry 4.0</article-title>. <source>Knowl. Inf. Syst</source>. <volume>66</volume>, <fpage>2699</fpage>&#x02013;<lpage>2746</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10115-023-02042-x</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Augusto</surname> <given-names>A.</given-names></name> <name><surname>Conforti</surname> <given-names>R.</given-names></name> <name><surname>Dumas</surname> <given-names>M.</given-names></name> <name><surname>La Rosa</surname> <given-names>M.</given-names></name> <name><surname>Maggi</surname> <given-names>F. M.</given-names></name> <name><surname>Marrella</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Automated discovery of process models from event logs: Review and benchmark</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>31</volume>, <fpage>686</fpage>&#x02013;<lpage>705</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2018.2841877</pub-id></mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brzychczy</surname> <given-names>E.</given-names></name> <name><surname>&#x000C5;uber</surname> <given-names>A.</given-names></name> <name><surname>van der Aalst</surname> <given-names>W.</given-names></name></person-group> (<year>2024</year>). <article-title>Process mining of mining processes: Analyzing longwall coal excavation using event data</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>54</volume>, <fpage>2723</fpage>&#x02013;<lpage>2734</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2023.3348496</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>D.</given-names></name> <name><surname>Orekondy</surname> <given-names>T.</given-names></name> <name><surname>Fritz</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;GS-WGAN: a gradient-sanitized approach for learning differentially private generators,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems (NeurIPS)</source> (<publisher-loc>Curran Associates, Inc.</publisher-loc>), <fpage>12673</fpage>&#x02013;<lpage>12684</lpage>.</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chundawat</surname> <given-names>V. S.</given-names></name> <name><surname>Tarun</surname> <given-names>A. K.</given-names></name> <name><surname>Mandal</surname> <given-names>M.</given-names></name> <name><surname>Lahoti</surname> <given-names>M.</given-names></name> <name><surname>Narang</surname> <given-names>P.</given-names></name></person-group> (<year>2024</year>). <article-title>A universal metric for robust evaluation of synthetic tabular data</article-title>. <source>IEEE Trans. Artif. Intell</source>. <volume>5</volume>, <fpage>300</fpage>&#x02013;<lpage>309</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TAI.2022.3229289</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Dung</surname> <given-names>D. A.</given-names></name> <name><surname>Huynh</surname> <given-names>T. T. B.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Gdegan: Graphical discriminative embedding gan for tabular data,&#x0201D;</article-title> in <source>2022 IEEE 9th International Conference on Data Science and Advanced Analytics (DSAA)</source> (<publisher-loc>Shenzhen</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>11</lpage>.</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elkoumy</surname> <given-names>G.</given-names></name> <name><surname>Fahrenkrog-Petersen</surname> <given-names>S. A.</given-names></name> <name><surname>Fani Sani</surname> <given-names>M.</given-names></name> <name><surname>Koschmider</surname> <given-names>A.</given-names></name> <name><surname>Mannhardt</surname> <given-names>F.</given-names></name> <name><surname>Nunez Von Voigt</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Privacy and confidentiality in process mining: threats and research challenges</article-title>. <source>ACM Trans. Manage. Inf. Syst</source>. <volume>13</volume>, <fpage>1</fpage>&#x02013;<lpage>17</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3468877</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Fahrenkrog-Petersen</surname> <given-names>S. A.</given-names></name> <name><surname>van der Aa</surname> <given-names>H.</given-names></name> <name><surname>Weidlich</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Pretsa: Event log sanitization for privacy-aware process discovery,&#x0201D;</article-title> in <source>2019 International Conference on Process Mining (ICPM)</source> (<publisher-loc>Aachen, Germany</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>8</lpage>.</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Fahrenkrog-Petersen</surname> <given-names>S. A.</given-names></name> <name><surname>van der Aa</surname> <given-names>H.</given-names></name> <name><surname>Weidlich</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;PRIPEL: Privacy-preserving event log publishing including contextual information,&#x0201D;</article-title> in <source>Proceedings of the 14th International Conference on Business Process Management</source> (<publisher-loc>BPM</publisher-loc>) (<publisher-loc>Seville</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>111</fpage>&#x02013;<lpage>128</lpage>.</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Franzoi</surname> <given-names>S.</given-names></name> <name><surname>Hartl</surname> <given-names>S.</given-names></name> <name><surname>Grisold</surname> <given-names>T.</given-names></name> <name><surname>van der Aa</surname> <given-names>H.</given-names></name> <name><surname>Mendling</surname> <given-names>J.</given-names></name> <name><surname>vom Brocke</surname> <given-names>J.</given-names></name></person-group> (<year>2025</year>). <article-title>Explaining process dynamics: a process mining context taxonomy for sense-making</article-title>. <source>Process Sci</source>. <volume>2</volume>, <fpage>2</fpage>&#x02013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s44311-025-00008-6</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Goodfellow</surname> <given-names>I. J.</given-names></name> <name><surname>Pouget-Abadie</surname> <given-names>J.</given-names></name> <name><surname>Mirza</surname> <given-names>M.</given-names></name> <name><surname>Xu</surname> <given-names>B.</given-names></name> <name><surname>Warde-Farley</surname> <given-names>D.</given-names></name> <name><surname>Ozair</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Generative adversarial networks</article-title>. <source>arXiv</source> [preprint] arXiv:1406.2661. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1406.2661</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gui</surname> <given-names>J.</given-names></name> <name><surname>Sun</surname> <given-names>Z.</given-names></name> <name><surname>Wen</surname> <given-names>Y.</given-names></name> <name><surname>Tao</surname> <given-names>D.</given-names></name> <name><surname>Ye</surname> <given-names>J.</given-names></name></person-group> (<year>2021</year>). <article-title>A review on generative adversarial networks: algorithms, theory, and applications</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>35</volume>, <fpage>3313</fpage>&#x02013;<lpage>3332</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2021.3130191</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gursoy</surname> <given-names>M. E.</given-names></name> <name><surname>Inan</surname> <given-names>A.</given-names></name> <name><surname>Nergiz</surname> <given-names>M. E.</given-names></name> <name><surname>Saygin</surname> <given-names>Y.</given-names></name></person-group> (<year>2016</year>). <article-title>Privacy-preserving learning analytics: challenges and techniques</article-title>. <source>IEEE Trans. Learn. Technol</source>. <volume>10</volume>, <fpage>68</fpage>&#x02013;<lpage>81</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TLT.2016.2607747</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>S.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Zhang</surname> <given-names>L. Y.</given-names></name> <name><surname>Jin</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;Protecting facial privacy: Generating adversarial identity masks via style-robust makeup transfer,&#x0201D;</article-title> in Proc. IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (New Orleans, LA: IEEE), <fpage>15014</fpage>&#x02013;<lpage>15023</lpage>.</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Zeng</surname> <given-names>Q.</given-names></name> <name><surname>Cheng</surname> <given-names>L.</given-names></name> <name><surname>Duan</surname> <given-names>H.</given-names></name> <name><surname>Cheng</surname> <given-names>J.</given-names></name></person-group> (<year>2021</year>). <article-title>Measuring similarity for data-aware business processes</article-title>. <source>IEEE Trans. Autom. Sci. Eng</source>. <volume>19</volume>, <fpage>1070</fpage>&#x02013;<lpage>1082</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TASE.2021.3049772</pub-id></mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>C.</given-names></name> <name><surname>Reddy</surname> <given-names>C. K.</given-names></name> <name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Nie</surname> <given-names>D.</given-names></name> <name><surname>Ning</surname> <given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>Multi-label clinical time-series generation via conditional gan</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>36</volume>, <fpage>1728</fpage>&#x02013;<lpage>1740</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2023.3310909</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mannhardt</surname> <given-names>F.</given-names></name> <name><surname>Koschmider</surname> <given-names>A.</given-names></name> <name><surname>Baracaldo</surname> <given-names>N.</given-names></name> <name><surname>Weidlich</surname> <given-names>M.</given-names></name> <name><surname>Michael</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Privacy-preserving process mining: differential privacy for event logs</article-title>. <source>Bus. Inf. Syst. Eng</source>. <volume>61</volume>, <fpage>595</fpage>&#x02013;<lpage>614</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12599-019-00613-3</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Murata</surname> <given-names>T.</given-names></name></person-group> (<year>1989</year>). <article-title>Petri nets: Properties, analysis and applications</article-title>. <source>Proc. IEEE</source> <volume>77</volume>, <fpage>541</fpage>&#x02013;<lpage>580</lpage>. doi: <pub-id pub-id-type="doi">10.1109/5.24143</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pereira</surname> <given-names>R.</given-names></name> <name><surname>Mestre</surname> <given-names>X.</given-names></name> <name><surname>Gregoratti</surname> <given-names>D.</given-names></name></person-group> (<year>2024</year>). <article-title>Consistent estimation of a class of distances between covariance matrices</article-title>. <source>IEEE Trans. Inf. Theory</source> <volume>70</volume>, <fpage>8107</fpage>&#x02013;<lpage>8132</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIT.2024.3464678</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qiao</surname> <given-names>F.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Kong</surname> <given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>A privacy-aware and incremental defense method against gan-based poisoning attack</article-title>. <source>IEEE Trans. Comput. Soc. Syst</source>. <volume>11</volume>, <fpage>1708</fpage>&#x02013;<lpage>1721</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TCSS.2023.3263241</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Rafiei</surname> <given-names>M.</given-names></name> <name><surname>von Waldthausen</surname> <given-names>L.</given-names></name> <name><surname>van der Aalst</surname> <given-names>W. M. P.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Supporting confidentiality in process mining using abstraction and encryption,&#x0201D;</article-title> in <source>Proceedings of the 8th International Symposium on Data-driven Process Discovery and Analysis (SIMPDA)</source> (<publisher-loc>Seville</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>101</fpage>&#x02013;<lpage>123</lpage>.</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Rafiei</surname> <given-names>M.</given-names></name> <name><surname>Wagner</surname> <given-names>M.</given-names></name> <name><surname>van der Aalst</surname> <given-names>W. M. P.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;TLKC-privacy model for process mining,&#x0201D;</article-title> in <source>Proceedings of the 14th International Conference on Research Challenges in Information Sciences (RCIS)</source> (<publisher-loc>Limassol</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>398</fpage>&#x02013;<lpage>416</lpage>.</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Rai</surname> <given-names>R.</given-names></name> <name><surname>Sural</surname> <given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Tool/dataset paper: Realistic abac data generation using conditional tabular gan,&#x0201D;</article-title> in <source>Proceedings of the 13th ACM Conference on Data and Application Security and Privacy (CODASPY)</source> (<publisher-loc>Charlotte, NC</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>273</fpage>&#x02013;<lpage>278</lpage>.</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rott</surname> <given-names>J.</given-names></name> <name><surname>Bhm</surname> <given-names>M.</given-names></name> <name><surname>Krcmar</surname> <given-names>H.</given-names></name></person-group> (<year>2024</year>). <article-title>Laying the ground for future cross-organizational process mining research and application: A literature review</article-title>. <source>Bus. Process Manage. J</source>. <volume>30</volume>, <fpage>144</fpage>&#x02013;<lpage>206</lpage>. doi: <pub-id pub-id-type="doi">10.1108/BPMJ-04-2023-0296</pub-id></mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rozinat</surname> <given-names>A.</given-names></name> <name><surname>van der Aalst</surname> <given-names>W. M. P.</given-names></name></person-group> (<year>2008</year>). <article-title>Conformance checking of processes based on monitoring real behavior</article-title>. <source>Inf. Syst</source>. <volume>33</volume>, <fpage>64</fpage>&#x02013;<lpage>95</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.is.2007.07.001</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Tillem</surname> <given-names>G.</given-names></name> <name><surname>Erkin</surname> <given-names>Z.</given-names></name> <name><surname>Lagendijk</surname> <given-names>R. L.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Privacy-preserving alpha algorithm for software analysis,&#x0201D;</article-title> in <source>Proc. 37th WIC Symp. Inf. Theory Benelux (WIC)</source> (<publisher-loc>Louvain-la-Neuve</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>136</fpage>&#x02013;<lpage>143</lpage>.</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van der Aalst</surname> <given-names>W. M. P.</given-names></name></person-group> (<year>2012</year>). <article-title>Process mining: overview and opportunities</article-title>. <source>ACM Trans. Manage. Inf. Syst</source>. <volume>3</volume>, <fpage>1</fpage>&#x02013;<lpage>17</lpage>. doi: <pub-id pub-id-type="doi">10.1145/2229156.2229157</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van der Aalst</surname> <given-names>W. M. P.</given-names></name></person-group> (<year>2022</year>). <source>Discovering Directly-Follows Complete Petri Nets from Event Data</source>, Chapter 1. Springer: <volume>Switzerland</volume>, <fpage>539</fpage>&#x02013;<lpage>558</lpage>.</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van der Aalst</surname> <given-names>W. M. P.</given-names></name> <name><surname>Weijters</surname> <given-names>A. J. M. M.</given-names></name> <name><surname>Maruster</surname> <given-names>L.</given-names></name></person-group> (<year>2004</year>). <article-title>Workflow mining: Discovering process models from event logs</article-title>. <source>IEEE Trans. Knowl. Data Eng</source>. <volume>16</volume>, <fpage>1128</fpage>&#x02013;<lpage>1142</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TKDE.2004.47</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>A. X.</given-names></name> <name><surname>Chukova</surname> <given-names>S. S.</given-names></name> <name><surname>Simpson</surname> <given-names>C. R.</given-names></name> <name><surname>Nguyen</surname> <given-names>B. P.</given-names></name></person-group> (<year>2024a</year>). <article-title>Challenges and opportunities of generative models on tabular data</article-title>. <source>Appl. Soft Comput</source>. <volume>166</volume>, <fpage>112223</fpage>&#x02013;<lpage>112238</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.asoc.2024.112223</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <name><surname>Dong</surname> <given-names>T.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name> <name><surname>Xiao</surname> <given-names>K.</given-names></name></person-group> (<year>2024b</year>). <article-title>Personalized privacy-preserving data utilization approach powered by distributed-gan</article-title>. <source>Big Data Mining Analyt</source>. <volume>7</volume>, <fpage>1098</fpage>&#x02013;<lpage>1113</lpage>. doi: <pub-id pub-id-type="doi">10.26599/BDMA.2024.9020037</pub-id></mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Pu</surname> <given-names>G.</given-names></name> <name><surname>Luo</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Xiong</surname> <given-names>P.</given-names></name> <name><surname>Kang</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;Aesthetic text logo synthesis via content-aware layout inferring,&#x0201D;</article-title> in <source>The IEEE</source> / <italic>CVF Computer Vision and Pattern Recognition Conference (CVPR)</italic> (New Orleans, LA: IEEE), <fpage>2436</fpage>&#x02013;<lpage>2445</lpage>.</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wijesinghe</surname> <given-names>A.</given-names></name> <name><surname>Zhang</surname> <given-names>S.</given-names></name> <name><surname>Ding</surname> <given-names>Z.</given-names></name></person-group> (<year>2024</year>). <article-title>PS-FEDGAN: an efficient federated learning framework with strong data privacy</article-title>. <source>IEEE Internet Things J</source>. <volume>11</volume>, <fpage>27584</fpage>&#x02013;<lpage>27596</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JIOT.2024.3399226</pub-id></mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xie</surname> <given-names>L.</given-names></name> <name><surname>Lin</surname> <given-names>K.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>F.</given-names></name> <name><surname>Zhou</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Differentially private generative adversarial network</article-title>. <source>arXiv</source> [preprint] arXiv:1802.06739. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1802.06739</pub-id></mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>C.</given-names></name> <name><surname>Ren</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>D.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Qin</surname> <given-names>Z.</given-names></name> <name><surname>Ren</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>Ganobfuscator: Mitigating information leakage under gan via differential privacy</article-title>. <source>IEEE Trans. Inf. Forensics Security</source> <volume>14</volume>, <fpage>2358</fpage>&#x02013;<lpage>2371</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIFS.2019.2897874</pub-id></mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Xu</surname> <given-names>Y.</given-names></name> <name><surname>Hou</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Hackgan: Harmonious cross-network mapping using cyclegan with wasserstein-procrustes learning for unsupervised network alignment</article-title>. <source>IEEE Trans. Comput. Soc. Syst</source>. <volume>10</volume>, <fpage>746</fpage>&#x02013;<lpage>759</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TCSS.2022.3144350</pub-id></mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Wu</surname> <given-names>Z.</given-names></name> <name><surname>Chu</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>Z.</given-names></name> <name><surname>Xu</surname> <given-names>Z.</given-names></name> <name><surname>Wen</surname> <given-names>Q.</given-names></name></person-group> (<year>2024</year>). <article-title>Intelligent cross-organizational process mining: a survey and new perspectives</article-title>. <source>arXiv</source> [preprint] arXiv:2407.11280. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2407.11280</pub-id></mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ye</surname> <given-names>M.</given-names></name> <name><surname>Shen</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Du</surname> <given-names>B.</given-names></name></person-group> (<year>2024</year>). <article-title>Securereid: privacy-preserving anonymization for person re-identification</article-title>. <source>IEEE Trans. Inf. Forens. Security</source> <volume>19</volume>, <fpage>2840</fpage>&#x02013;<lpage>2853</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIFS.2024.3356233</pub-id></mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>S.</given-names></name> <name><surname>Kong</surname> <given-names>L.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Cui</surname> <given-names>L.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Privacy-preserving cross-organization process mining based on blockchain and cryptography,&#x0201D;</article-title> in <source>Proceedings of the IEEE International Conference on Web Services (ICWS)</source> (<publisher-loc>Shenzhen</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1384</fpage>&#x02013;<lpage>1389</lpage>.</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>C.</given-names></name> <name><surname>Zhao</surname> <given-names>H.</given-names></name> <name><surname>Zhu</surname> <given-names>H.</given-names></name> <name><surname>Huang</surname> <given-names>Z.</given-names></name> <name><surname>Feng</surname> <given-names>N.</given-names></name> <name><surname>Chen</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Bi-discriminator domain adversarial neural networks with class-level gradient alignment</article-title>. <source>IEEE Trans. Syst. Man Cybern. Syst</source>. <volume>54</volume>, <fpage>5283</fpage>&#x02013;<lpage>5295</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSMC.2024.3402750</pub-id></mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>Z.</given-names></name> <name><surname>Kunar</surname> <given-names>A.</given-names></name> <name><surname>Birke</surname> <given-names>R.</given-names></name> <name><surname>Chen</surname> <given-names>L. Y.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;CTAB-GAN: effective table data synthesizing,&#x0201D;</article-title> in <source>Proceedings of The 13th Asian Conference on Machine Learning (ACML)</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>97</fpage>&#x02013;<lpage>112</lpage>.</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3282811/overview">Jing Qiu</ext-link>, Guangzhou University, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2907884/overview">Geeta Sandeep Nadella</ext-link>, University of the Cumberlands, United States</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3308423/overview">Qiying Feng</ext-link>, Guangzhou University, China</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0003"><label>1</label><p><ext-link ext-link-type="uri" xlink:href="https://data.4tu.nl/articles/dataset/BPI_Challenge_2016_Complaints/1271764">https://data.4tu.nl/articles/dataset/BPI_Challenge_2016_Complaints/1271764</ext-link>,<ext-link ext-link-type="uri" xlink:href="https://data.4tu.nl/articles/dataset/BPI_Challenge_2019/12715853">https://data.4tu.nl/articles/dataset/BPI_Challenge_2019/12715853</ext-link>,<ext-link ext-link-type="uri" xlink:href="https://data.4tu.nl/articles/dataset/Production_Analysis_with_Process_Mining_Technology/12697997">https://data.4tu.nl/articles/dataset/Production_Analysis_with_Process_Mining_Technology/12697997</ext-link>, <ext-link ext-link-type="uri" xlink:href="https://data.4tu.nl/articles/dataset/Electronic_Invoicing_Event_Logs/12695282?file=24039482">https://data.4tu.nl/articles/dataset/Electronic_Invoicing_Event_Logs/12695282?file=24039482</ext-link>.</p></fn>
</fn-group>
</back>
</article>