<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="systematic-review" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2026.1737790</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Systematic Review</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>An AI-driven conceptual framework for detecting fake news and deepfake content: a systematic review</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Moyo</surname>
<given-names>Bravlyn VC.</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3262177"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tuyikeze</surname>
<given-names>Tite</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3330708"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Matsebula</surname>
<given-names>Fezile</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3276427"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Obagbuwa</surname>
<given-names>Ibidun C.</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Computer Science &#x0026; Information Technology, Faculty of Natural and Applied Sciences, Sol Plaatje University</institution>, <city>Kimberley</city>, <country country="za">South Africa</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Mathematical Sciences and Computing, Faculty of Natural Sciences, Walter Sisulu University</institution>, <city>Mthatha</city>, <country country="za">South Africa</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Ibidun C. Obagbuwa, <email xlink:href="mailto:iobagbuwa@wsu.ac.za">iobagbuwa@wsu.ac.za</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-02">
<day>02</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1737790</elocation-id>
<history>
<date date-type="received">
<day>02</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>27</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Moyo, Tuyikeze, Matsebula and Obagbuwa.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Moyo, Tuyikeze, Matsebula and Obagbuwa</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-02">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>The rapid advancement of generative artificial intelligence (AI) has enabled the creation of highly realistic synthetic media, commonly referred to as deepfakes, which are increasingly multimodal and difficult to detect. While these technologies offer creative and commercial potential, they also pose critical challenges related to misinformation, media trust, and societal harm. Despite the growing body of research, existing reviews remain fragmented, often separating technical detection advances from social and governance considerations. This study addresses this gap through a systematic review conducted in accordance with PRISMA guidelines across IEEE Xplore, Scopus, ACM Digital Library, and Web of Science. From an initial set of 120 database records, complemented by citation chaining, 34 studies published between 2014 and 2025 were included for analysis. Eighteen studies focused on deepfake generation and detection models, eight examined social and behavioural implications, and eight addressed ethical and regulatory frameworks. Thematic synthesis reveals a clear methodological shift from convolutional neural networks toward transformer- and CLIP-based architectures, alongside the emergence of large-scale benchmark datasets. However, persistent challenges remain in multimodal detection, cross-dataset generalization, explainability&#x2013;robustness trade-offs, and the translation of governance principles into deployable systems. This review contributes an integrated conceptual framework that operationally connects detection technologies, explainable AI (XAI), and governance mechanisms through explicit feedback loops. Future research directions emphasize robust multimodal benchmarks, retrieval-augmented detection systems, and interdisciplinary approaches that align technical innovation with ethical and policy safeguards.</p>
</abstract>
<kwd-group>
<kwd>deepfakes</kwd>
<kwd>explainable AI (XAI)</kwd>
<kwd>generative artificial intelligence</kwd>
<kwd>media trust</kwd>
<kwd>misinformation</kwd>
<kwd>multimodal detection</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="37"/>
<page-count count="10"/>
<word-count count="6246"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Natural Language Processing</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Generative AI has emerged as a transformative force in digital content creation. Among its most striking applications are deepfakes synthetically generated or manipulated videos, images, and audio that can convincingly imitate real individuals. Initially conceived as technical demonstrations, deepfakes have evolved into powerful tools with dual-use potential, supporting both creative innovation and malicious activities such as non-consensual sexual content, political misinformation, and reputational harm. Recent studies have investigated deepfake creation and identification in visual, audio, and multimodal domains, as well as the social and cognitive impacts of misinformation (<xref ref-type="bibr" rid="ref6">Chuk-Ke and Dong, 2024</xref>; <xref ref-type="bibr" rid="ref9">Donahue et al., 2019</xref>; <xref ref-type="bibr" rid="ref13">Gowrisankar and Thing, 2024</xref>; <xref ref-type="bibr" rid="ref14">Green and Swets, 1966</xref>; <xref ref-type="bibr" rid="ref18">Kumar et al., 2022</xref>; <xref ref-type="bibr" rid="ref20">Loth et al., 2024</xref>; <xref ref-type="bibr" rid="ref23">Nguyen et al., 2020</xref>, <xref ref-type="bibr" rid="ref24">2022</xref>; <xref ref-type="bibr" rid="ref25">Pearson and Zinets, 2022</xref>; <xref ref-type="bibr" rid="ref26">Rossler et al., 2019</xref>; <xref ref-type="bibr" rid="ref29">Siarohin et al., 2019</xref>; <xref ref-type="bibr" rid="ref28">Shu et al., 2020</xref>; <xref ref-type="bibr" rid="ref30">Sweller, 1988</xref>; <xref ref-type="bibr" rid="ref34">Zhang et al., 2024</xref>).</p>
<p>The implications of deepfakes extend well beyond technical domains, intersecting with media trust, democratic governance, and legal accountability. Social science research has examined misinformation dynamics (<xref ref-type="bibr" rid="ref35">Zhou et al., 2021</xref>; <xref ref-type="bibr" rid="ref15">Idiongo, 2024</xref>), while policymakers have begun formulating governance frameworks such as the EU Digital Services Act (<xref ref-type="bibr" rid="ref10">European Parliament and Council of the European Union (EU DSA), 2022</xref>) and the EU AI Act (<xref ref-type="bibr" rid="ref11">European Parliament and Council of the European Union (EU AI Act), 2024</xref>). In parallel, computer vision research has advanced rapidly, developing datasets and detection methods based on convolutional and transformer architectures (<xref ref-type="bibr" rid="ref9001">Verdoliva et al., 2019</xref>; <xref ref-type="bibr" rid="ref19">Li et al., 2020</xref>).</p>
<p>Despite these parallel developments, current literature remains fragmented. Few studies systematically integrate technical detection methods with social, ethical, and policy perspectives. This review addresses that gap by synthesizing interdisciplinary research to provide a unified understanding of deepfake creation, detection, and governance. Specifically, it seeks to:</p>
<list list-type="roman-lower">
<list-item>
<p>Identify dominant technical and social approaches of deepfake generation and detection.</p>
</list-item>
<list-item>
<p>Evaluate how explainable AI and multimodal architectures enhance detection robustness.</p>
</list-item>
<list-item>
<p>Examine how regulatory and ethical frameworks can inform the design of responsible detection systems.</p>
</list-item>
</list>
<p>By combining insights from computer vision, natural language processing, explainable AI, and social science, this review provides a cross-disciplinary taxonomy of deepfake research and outlines a conceptual framework for integrating detection, explainability, and governance. Through this synthesis, it aims to support the development of transparent, ethical, and technically resilient AI-based systems for mitigating the harms of synthetic media.</p>
<p>In line with contemporary academic consensus, this review avoids treating &#x201C;fake news&#x201D; as a standalone analytical category due to its politicized and ambiguous usage. Rather, the terms misinformation, disinformation, and AI-created misleading content are employed to differentiate unintentional errors, intentional distortion, and artificial media products, respectively. This terminological precision enables clearer alignment between technical detection methods, social impact studies, and regulatory frameworks.</p>
</sec>
<sec sec-type="methods" id="sec2">
<label>2</label>
<title>Methodology</title>
<sec id="sec3">
<label>2.1</label>
<title>Review protocol and literature selection</title>
<p>This study adopted a systematic review approach to synthesize current research on deepfakes, generative Artificial Intelligence (AI), and misinformation. The review followed the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) guidelines to ensure methodological transparency, replicability, and rigor. The process involved six key stages: (i) defining the research scope and questions, (ii) developing search strategies and selecting databases, (iii) applying inclusion and exclusion criteria, (iv) screening and extracting relevant data, (v) assessing methodological quality, and (vi) synthesizing and interpreting the findings.</p>
<p>The scope of this review covered the period 2014 to 2025, reflecting the evolution of deepfake research from its early technical demonstrations to contemporary multimodal and regulatory developments. Both qualitative and quantitative evidence were considered to capture the interdisciplinary nature of deepfake research across technical, social, and policy domains.</p>
</sec>
<sec id="sec4">
<label>2.2</label>
<title>Search strategy and databases</title>
<p>A comprehensive multi-database search strategy was designed to capture the full breadth of deepfake-related research, including studies on generation, detection, misinformation, and regulation. The Boolean search string used (Scopus format) was:</p>
<p>(&#x201C;deepfake&#x002A;&#x201D; OR &#x201C;fake news&#x201D; OR &#x201C;synthetic media&#x201D; OR &#x201C;AI-generated content&#x201D; OR &#x201C;misinformation&#x201D; OR &#x201C;disinformation&#x201D;) AND (&#x201C;artificial intelligence&#x201D; OR &#x201C;machine learning&#x201D; OR &#x201C;deep learning&#x201D; OR &#x201C;neural network&#x002A;&#x201D; OR &#x201C;natural language processing&#x201D; OR &#x201C;computer vision&#x201D;)</p>
<p>From an initial pool of 120 database records, complemented by backward and forward citation chaining, this query was executed and adapted for each database, including IEEE Xplore, ACM Digital Library, Scopus, SpringerLink, and Web of Science. To ensure inclusion of the most recent advances, arXiv preprints were also screened, recognizing that peer review often lags rapid developments in generative AI.</p>
<p>Preprints were included only when they were later cited in peer-reviewed venues or associated with publicly recognized datasets to mitigate quality concerns.</p>
<p>For regulatory and governance perspectives, official EU repositories and government sites were reviewed to obtain key policy texts such as the EU Digital Services Act (<xref ref-type="bibr" rid="ref10">European Parliament and Council of the European Union (EU DSA), 2022</xref>) and the EU AI Act (<xref ref-type="bibr" rid="ref11">European Parliament and Council of the European Union (EU AI Act), 2024</xref>). Reference lists of selected articles were also examined through backward and forward citation chasing to identify additional relevant studies not captured by the initial search.</p>
<p>arXiv preprints were included selectively due to the rapid pace of advances in generative AI and were cross-validated based on subsequent peer-reviewed adoption, dataset impact, or citation prominence.</p>
</sec>
<sec id="sec5">
<label>2.3</label>
<title>Inclusion and exclusion criteria</title>
<p>Inclusion criteria:</p>
<p>Studies were included if they met the following criteria:</p>
<list list-type="roman-lower">
<list-item>
<p>Published between 2014 and 2025;</p>
</list-item>
<list-item>
<p>Peer-reviewed journal articles, conference papers, or preprints focusing on deepfake generation, detection, or governance;</p>
</list-item>
<list-item>
<p>Addressed technical, social, or policy/regulatory dimensions of deepfakes;</p>
</list-item>
<list-item>
<p>Investigated text, audio, image, or video modalities related to synthetic media;</p>
</list-item>
<list-item>
<p>Written in English and available in full-text form.</p>
</list-item>
</list>
<p>Exclusion criteria:</p>
<list list-type="roman-lower">
<list-item>
<p>Studies published in languages other than English;</p>
</list-item>
<list-item>
<p>Duplicate records across multiple databases;</p>
</list-item>
<list-item>
<p>Non-peer-reviewed sources such as blog posts, news articles, and unverified reports (except official institutional or legal documents);</p>
</list-item>
<list-item>
<p>Studies focusing solely on unrelated AI applications (e.g., generative art) without relevance to misinformation or deepfake detection;</p>
</list-item>
<list-item>
<p>Theoretical discussions lacking empirical or methodological depth.</p>
</list-item>
</list>
<p>These criteria ensured that the final selection consisted of studies with direct relevance, methodological rigor, and conceptual clarity.</p>
</sec>
<sec id="sec6">
<label>2.4</label>
<title>Search strategy</title>
<p>The search strategy combined structured database keyword searches with both backward citations chasing (examining the references of included studies) and forward citation chasing (tracking newer works that cited them). Initial queries employed broad terms such as <italic>deepfake detection</italic>, <italic>generative adversarial networks (GANs)</italic>, <italic>synthetic media</italic>, <italic>misinformation</italic>, and <italic>fake news</italic>. Throughout the review, the strategy was refined repeatedly to incorporate new methods and ideas. Other keywords comprised transformers, Vision Transformer (ViT), CLIP, voice generation, multimodal recognition, Xplainable AI (XAI), retrieval-augmented production (RAG), bias, equity, governance, and regulation. Boolean operators (e.g., &#x201C;deepfake AND identification&#x201D;, &#x201C;synthetic media AND false information&#x201D;, &#x201C;transformer OR CLIP AND deepfake&#x201D;) were employed to enhance recall and precision. This iterative approach ensured that both foundational studies and the most recent developments in technical, social, and regulatory domains were systematically captured.</p>
</sec>
<sec id="sec7">
<label>2.5</label>
<title>Screening process</title>
<p>The screening process followed a three-stage procedure to ensure systematic inclusion of high-quality and relevant studies:</p>
<list list-type="order">
<list-item>
<p>Title and abstract screening: all retrieved records were reviewed to exclude irrelevant topics, such as unrelated computer vision tasks or non-AI media analyses.</p>
</list-item>
<list-item>
<p>Full-text review: remaining articles were evaluated for methodological soundness, empirical contribution, and relevance to the research objectives.</p>
</list-item>
<list-item>
<p>Thematic categorization: eligible studies were coded into thematic clusters representing:</p>
</list-item>
</list>
<list list-type="simple">
<list-item>
<p>(i) Dataset creation and benchmark development.</p>
</list-item>
<list-item>
<p>(ii) Detection models and architectures.</p>
</list-item>
<list-item>
<p>(iii) Explainability and adversarial robustness.</p>
</list-item>
<list-item>
<p>(iv) Social trust and misinformation.</p>
</list-item>
<list-item>
<p>(v) Policy and regulatory frameworks.</p>
</list-item>
</list>
<p>At the full-text screening stage, studies were excluded for the following predefined reasons:</p>
<p>Reason 1: Primary focus on AI applications unrelated to misinformation or deceptive synthetic media (<italic>n</italic>&#x202F;=&#x202F;6).</p>
<p>Reason 2: Lack of empirical evaluation, methodological transparency, or reproducible analysis (<italic>n</italic>&#x202F;=&#x202F;5).</p>
<p>Reason 3: Conceptual or opinion-based papers without sufficient analytical depth or evidence synthesis (<italic>n</italic>&#x202F;=&#x202F;7).</p>
<sec id="sec8">
<label>2.5.1</label>
<title>Data extraction process</title>
<p>Data extraction was conducted using a structured data extraction form developed in Microsoft Excel. Two reviewers independently extracted data to minimize bias, with discrepancies resolved through discussion and consensus.</p>
<p>Extracted fields included:</p>
<list list-type="roman-lower">
<list-item>
<p>Author(s), year, and publication type.</p>
</list-item>
<list-item>
<p>Research domain and methodology.</p>
</list-item>
<list-item>
<p>AI model or framework (e.g., GAN, transformer, CLIP).</p>
</list-item>
<list-item>
<p>Dataset characteristics and evaluation metrics.</p>
</list-item>
<list-item>
<p>Key findings and thematic relevance.</p>
</list-item>
</list>
<p>This approach ensured consistency and completeness in capturing both technical and contextual details.</p>
<p>Inter-rater reliability between reviewers was assessed using Cohen&#x2019;s kappa coefficient (<italic>&#x03BA;</italic>), which indicated strong agreement during the screening and data extraction phases.</p>
</sec>
</sec>
<sec id="sec9">
<label>2.6</label>
<title>Number of studies included</title>
<p>The systematic review included a total of 34 studies published between 2014 and 2025, capturing both the historical development and the latest advances in deepfake research. About 18 of them examined deepfake production, detection methods, and model architectures with an emphasis on computer vision and technical approaches. About eight studies from the social sciences looked at how deepfakes affect user behaviour, media trust, and disinformation in society. The other 8 studies focused on policy and regulatory frameworks, emphasizing ethical issues, governance strategies, and legal actions. Research was chosen for its capability to offer a broad viewpoint across various media formats text, images, video, and audio and for its role in enhancing knowledge of the technical, social, and policy aspects of deepfakes. This selection ensures a holistic view of the field, combining insights into both innovation and the societal challenges posed by synthetic media.</p>
<p>Although the ultimate set of 34 studies may seem narrow compared to the vastness of generative AI research, this demonstrates the careful use of stringent inclusion standards that prioritize interdisciplinary significance, methodological soundness, and clear involvement with detection, social consequences, or governance. This trade-off prioritizes analytical depth and coherence over exhaustive coverage and is acknowledged as a limitation of the review.</p>
</sec>
<sec id="sec10">
<label>2.7</label>
<title>Quality assessment</title>
<p>The methodological quality and potential risk of bias for each study were evaluated using an adapted version of the Critical Appraisal Skills Program (CASP) checklist. Technical studies were assessed based on criteria such as dataset transparency, model reproducibility, and evaluation robustness, while social science and policy studies were reviewed for methodological clarity, validity of interpretation, and evidence linkage. Each study received a quality rating (high, moderate, or low) based on these criteria, which informed the weight given during synthesis of the 34 studies evaluated, 21 were rated high quality, 9 moderate, and 4 low according to the adapted CASP checklist.</p>
</sec>
<sec id="sec11">
<label>2.8</label>
<title>Rationale for selection</title>
<p>The studies included in this review were selected for several key reasons. They first offer a historical basis for comprehending the evolution of generative AI and the rise of misinformation by illustrating the progression of deepfakes from early technical demos to highly advanced synthetic media. Second, they showcase state-of-the-art technological detection techniques, such as multimodal analysis, explainable AI, and machine learning model advancements. Third, the research investigates sociological, ethical, and legal aspects, analysing how deepfakes influence media credibility, public opinion, privacy, and regulatory systems. Finally, the selection deliberately encompasses a range of media modalities text, images, video, and audio to ensure a comprehensive understanding of both the technical challenges and societal implications of deepfakes (see <xref ref-type="fig" rid="fig1">Figure 1</xref>).</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>PRISMA flowchart.</p>
</caption>
<graphic xlink:href="frai-09-1737790-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart detailing the identification, screening, and inclusion of studies. It begins with 120 records from databases. After removing 27 records, 100 are screened, excluding 45. Fifty-five reports sought, with 3 not retrieved. Fifty-two reports assessed, excluding 18 for various reasons. Finally, 34 studies included in the review.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="sec12">
<label>3</label>
<title>Thematic review of literature</title>
<sec id="sec13">
<label>3.1</label>
<title>Overview of thematic analysis</title>
<p>A thematic review approach was used to organize and interpret the selected studies according to recurring patterns, concepts, and research priorities. Finding commonly discussed subjects, assembling related concepts into clusters, and honing these clusters into cohesive themes were the steps in the analysis process. This approach facilitated the emergence of both chronological and conceptual links, underscoring the progression of research on deepfakes and synthetic media from initial technical trials to wider societal and policy-focused conversations.</p>
<p>From this analysis, four overarching themes were identified:</p>
<list list-type="roman-lower">
<list-item>
<p>Evolution of Deepfake Technologies and Detection Research;</p>
</list-item>
<list-item>
<p>Technical developments in deepfake generation and detection methods;</p>
</list-item>
<list-item>
<p>Explainability, robustness, and evaluation challenges in AI-based detection;</p>
</list-item>
<list-item>
<p>Social and psychological impacts, including misinformation, media trust, and user behaviour;</p>
</list-item>
<list-item>
<p>Governance, ethics, and policy frameworks addressing regulation and accountability.</p>
</list-item>
</list>
</sec>
<sec id="sec14">
<label>3.2</label>
<title>Evolution of deepfake technologies and detection research</title>
<p>The trajectory of deepfake development mirrors broader advances in generative AI. Early methods, notably Generative Adversarial Networks (GANs) (<xref ref-type="bibr" rid="ref12">Goodfellow et al., 2014</xref>) established the adversarial paradigm that underpins synthetic image and video generation. Subsequent variants such as StyleGAN2/3 (Lehtinen &#x0026; Aila NVIDIA) dramatically improved realism and controllability. Meanwhile, transformer architectures (<xref ref-type="bibr" rid="ref31">Vaswani et al., 2017</xref>) and large-scale language models (<xref ref-type="bibr" rid="ref1">Brown et al., 2020</xref>) expanded synthetic generation to text and audio, creating a multimodal threat landscape.</p>
<p>These architectural advances not only enabled high-fidelity media synthesis but also reshaped detection research. As deepfakes grew more realistic, early CNN-based detectors emphasizing pixel-level inconsistencies gave way to transformer and CLIP-based frameworks capable of modelling contextual and semantic relationships. This shift reflects an evolution from surface-level artifact detection toward cross-modal, meaning-aware analysis, mirroring the progression of misinformation itself from isolated falsifications to integrated multimodal narratives.</p>
<p>This architectural transition reflects more than incremental performance improvement. Detectors based on CNNs chiefly leverage low-level visual artifacts, which modern generative models increasingly reduce. Transformer and CLIP-based approaches instead model long-range spatial, temporal, and semantic dependencies, enabling improved robustness to compression and post-processing. Nonetheless, these benefits come with trade-offs in computational expense, data reliance, and interpretability. The literature rarely addresses how such models can be deployed in real-time or resource-constrained environments, revealing a gap between benchmark success and operational feasibility (see <xref ref-type="table" rid="tab1">Table 1</xref>).</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Summary of emerging themes in the current state of knowledge.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Theme</th>
<th align="left" valign="top">Focus area</th>
<th align="left" valign="top">Key findings</th>
<th align="left" valign="top">Representative studies/datasets</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">1. Advances in datasets and detection models</td>
<td align="left" valign="top">Development of datasets and model architectures for detection</td>
<td align="left" valign="top">Expansion from CNNs to Transformer-based and CLIP-integrated models; improved generalization via multimodal benchmarks</td>
<td align="left" valign="top">FaceForensics++, DFDC, Celeb-DF, FakeVoices, WaveFake, Deepfake-Eval-2024</td>
</tr>
<tr>
<td align="left" valign="top">2. Explainability and adversarial robustness</td>
<td align="left" valign="middle">Interpretability and resilience of detection systems</td>
<td align="left" valign="top">Use of Grad-CAM and similar methods; emerging risks from explainability-based attacks</td>
<td align="left" valign="top">
<xref ref-type="bibr" rid="ref27">Selvaraju et al. (2017)</xref>
</td>
</tr>
<tr>
<td align="left" valign="top">3. Social, ethical, and policy responses</td>
<td align="left" valign="middle">Societal impacts and governance frameworks</td>
<td align="left" valign="top">Rising misinformation, erosion of trust, regulatory responses via EU AI and DSA Acts</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref15">Idiongo (2024)</xref>, <xref ref-type="bibr" rid="ref22">National Sexual Violence Resource Center (2024)</xref>, and <xref ref-type="bibr" rid="ref10">European Parliament and Council of the European Union (EU DSA) (2022</xref>, <xref ref-type="bibr" rid="ref11">2024)</xref></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec15">
<label>3.3</label>
<title>Comparative analysis</title>
<p>To synthesize and contrast the findings across the selected studies, a comparative analysis was performed focusing on methodological design, evaluation performance, and disciplinary orientation. To find similarities, contrasts, and latest trends, this analysis combines data from computer vision, multimodal AI, social science, and policy research.</p>
<p>A comparative analysis identified three primary methodological clusters CNN-based, Transformer-based, and CLIP-based/multimodal frameworks each presenting unique advantages and disadvantages. CNN architectures (e.g., Section 3: Thematic Review/Detection Models, EfficientNet) exhibit high accuracy on established benchmarks such as FaceForensics++ and DFDC but demonstrate limited generalization to unseen datasets or manipulation techniques. Although they require more data and have greater computing costs, Vision Transformers (ViT, Swin-T) and hybrid models provide better contextual modeling and modest improvements in cross-dataset generalization. CLIP-based and multimodal methods utilize vision-language pretraining to enable zero-shot and few-shot detection, indicating scalability potential across modalities, yet they continue to be vulnerable to adversarial perturbations and domain shifts.</p>
<p>Recent research has delved deeper into multimodal fusion and extensive pretraining techniques to enhance generalization and resilience in deepfake detection, especially in cross-dataset and real-world scenarios (<xref ref-type="bibr" rid="ref3">Chen et al., 2024a</xref>; <xref ref-type="bibr" rid="ref4">Chen et al., 2024b</xref>).</p>
<p>Foundational deep learning architectures underpinning contemporary detection systems include Xception networks (<xref ref-type="bibr" rid="ref5">Chollet, 2017</xref>), transformer-based language models such as BERT (<xref ref-type="bibr" rid="ref7">Devlin et al., 2019</xref>), and generative adversarial networks including StyleGAN (<xref ref-type="bibr" rid="ref16">Karras et al., 2021</xref>). These architectures have directly impacted the generation pipelines for deepfakes and the detection methods assessed in benchmark datasets like FaceForensics++ (<xref ref-type="bibr" rid="ref8">Dolhansky et al., 2019</xref>).</p>
<p>From a disciplinary standpoint, methodological research use diverges:</p>
<list list-type="roman-lower">
<list-item>
<p>Computer vision studies prioritize quantitative metrics such as AUC, accuracy, and F1-score, emphasizing model robustness and scalability.</p>
</list-item>
<list-item>
<p>Social science research focuses on user perception, misinformation spread, and trust restoration, prioritizing ecological validity over computational precision.</p>
</list-item>
<list-item>
<p>Policy and governance studies emphasize legal accountability, transparency mechanisms, and platform obligations, focusing less on algorithms and more on institutional enforcement.</p>
</list-item>
</list>
<sec id="sec16">
<label>3.3.1</label>
<title>Quantitative overview of methodological clusters</title>
<p>Among the 34 studies analysed, approximately 59% (<italic>n</italic>&#x202F;=&#x202F;20) employed CNN-based detection models, 26% (<italic>n</italic>&#x202F;=&#x202F;9) used transformer or hybrid architectures, and 15% (<italic>n</italic>&#x202F;=&#x202F;5) adopted multimodal or CLIP-based approaches. Social science and policy-focused studies comprised ~30% of the total dataset, underscoring the growing interdisciplinary scope of deepfake research.</p>
<p>The predominance of CNN-based approaches (59%) reflects both historical inertia and dataset availability, as many widely used benchmarks were designed to expose CNN-detectable artifacts. While this has accelerated short-term performance gains, it may also constrain innovation by incentivizing dataset-specific optimization rather than real-world generalization. This methodological concentration highlights the need for evaluation protocols that reward robustness, multimodal reasoning, and cross-domain adaptability (see <xref ref-type="table" rid="tab2">Table 2</xref>).</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Comparative summary of detection approaches and research perspectives.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Approach domain</th>
<th align="left" valign="top">Representative studies</th>
<th align="left" valign="top">Core techniques</th>
<th align="left" valign="top">Strengths</th>
<th align="left" valign="top">Limitations</th>
<th align="left" valign="top">Evaluation focus metrics</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">CNN-based detection</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref19">Li et al., 2020</xref> and Verdoliva et al. (2019)</td>
<td align="left" valign="middle">XceptionNet, EfficientNet, ResNet</td>
<td align="left" valign="middle">High benchmark accuracy, efficient training</td>
<td align="left" valign="middle">Poor cross-dataset generalization; sensitive to new manipulations</td>
<td align="left" valign="middle">AUC, F1-score, accuracy</td>
</tr>
<tr>
<td align="left" valign="middle">Transformer-based models</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref3">Chen et al. (2024a</xref>, <xref ref-type="bibr" rid="ref4">2024b)</xref> and <xref ref-type="bibr" rid="ref32">Wang et al. (2024)</xref></td>
<td align="left" valign="middle">ViT, Swin Transformer, hybrid CNN-Transformer</td>
<td align="left" valign="middle">Captures long-range dependencies, strong contextual modelling</td>
<td align="left" valign="middle">Data-hungry, computationally expensive</td>
<td align="left" valign="middle">Accuracy, cross-dataset robustness</td>
</tr>
<tr>
<td align="left" valign="middle">CLIP multimodal approaches</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref33">Yermakov et al. (2025)</xref> and <xref ref-type="bibr" rid="ref2">Brown et al., 2025</xref></td>
<td align="left" valign="middle">Vision&#x2013;language pretraining, frozen CLIP features</td>
<td align="left" valign="middle">Zero-shot and multimodal generalization; scalable</td>
<td align="left" valign="middle">Adversarial vulnerability, distribution shift</td>
<td align="left" valign="middle">Zero-shot accuracy, multimodal retrieval performance</td>
</tr>
<tr>
<td align="left" valign="middle">Explainable &#x0026; robust AI</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref17">Kozik et al. (2024)</xref> and <xref ref-type="bibr" rid="ref27">Selvaraju et al. (2017)</xref></td>
<td align="left" valign="middle">Grad-CAM, feature attribution, robustness testing</td>
<td align="left" valign="middle">Transparency, user interpretability</td>
<td align="left" valign="middle">Risk of adversarial exploitation</td>
<td align="left" valign="middle">Interpretability quality, robustness metrics</td>
</tr>
<tr>
<td align="left" valign="middle">Social behavioural studies</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref15">Idiongo (2024)</xref> and <xref ref-type="bibr" rid="ref35">Zhou et al. (2021)</xref></td>
<td align="left" valign="middle">Surveys, experiments, discourse analysis</td>
<td align="left" valign="middle">Insights into trust, misinformation spread</td>
<td align="left" valign="middle">Lack of quantitative precision</td>
<td align="left" valign="middle">User trust, perception, misinformation spread</td>
</tr>
<tr>
<td align="left" valign="middle">Policy &#x0026; regulatory studies</td>
<td align="left" valign="middle"><xref ref-type="bibr" rid="ref10">European Parliament and Council of the European Union (EU DSA) (2022)</xref> and <xref ref-type="bibr" rid="ref11">European Parliament and Council of the European Union (EU AI Act) (2024)</xref></td>
<td align="left" valign="middle">Legislative review, policy analysis</td>
<td align="left" valign="middle">Governance, accountability frameworks</td>
<td align="left" valign="middle">Limited enforcement mechanisms, lag behind tech</td>
<td align="left" valign="middle">Policy compliance, transparency obligations</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec17">
<label>3.3.2</label>
<title>Interpretation and implications</title>
<p>The comparative synthesis highlights that while technical advancements in AI detection (e.g., transformers, CLIP) show promise, cross-domain generalization and ethical integration remain unresolved. Complementary insights on trust and governance are offered by social and policy studies, indicating the necessity of interdisciplinary frameworks that combine societal resilience, regulatory oversight, and technical detection accuracy. These comparative insights form the foundation for the conceptual framework presented in the next section, integrating strengths across disciplines to advance comprehensive deepfake mitigation strategies.</p>
</sec>
</sec>
<sec id="sec18">
<label>3.4</label>
<title>Conceptual framework</title>
<p>The framework integrates technical, social, and governance dimensions through feedback loops that connect model transparency, user trust, and regulatory compliance to build accountable and resilient AI detection systems (see <xref ref-type="fig" rid="fig2">Figure 2</xref>).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Conceptual framework.</p>
</caption>
<graphic xlink:href="frai-09-1737790-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart depicting four interconnected categories: Policy &#x0026; Governance (Regulation &#x0026; Ethics, Platform Accountability), Explainability Mechanisms (Model Transparency, User Feedback, Joint Influence on AI), Technical (Detection Models, Robust Datasets, Detection Outcomes), and Social (Media Literacy, Trust Perception, Behavioral Impact). Each category is linked with arrows indicating their relationships.</alt-text>
</graphic>
</fig>
<p>The framework is organized in a three-tier system:</p>
<list list-type="order">
<list-item>
<p>Technical layer &#x2013; encompassing detection models, detection outcomes and robust datasets;</p>
</list-item>
<list-item>
<p>Social layer &#x2013; focusing on media literacy, trust perception, and behavioural impact;</p>
</list-item>
<list-item>
<p>Policy &#x0026; governance layer &#x2013; covering regulation, ethics, and platform accountability;</p>
</list-item>
<list-item>
<p>Explainability mechanisms &#x2013; dealing with model transparency, user feedback, perception data, joint influence on responsible AI deployment and societal resilience.</p>
</list-item>
</list>
<p>These components are interconnected through bidirectional feedback loops. Technical detection systems generate transparent outputs that inform social understanding and policy decisions, while governance structures provide ethical and compliance feedback to guide model development and user interaction.</p>
<p>This framework extends prior conceptual models (<xref ref-type="bibr" rid="ref21">Mirsky and Lee, 2021</xref>; <xref ref-type="bibr" rid="ref9002">Zhou et al., 2019</xref>) by explicitly incorporating explainability and feedback dynamics that connect machine learning performance with human trust and policy accountability. Unlike earlier models focusing solely on detection or social impacts, this integrated design emphasizes <italic>mutual reinforcement</italic> between technology, transparency, and governance to create a resilient AI ecosystem for mitigating deepfake threats.</p>
<sec id="sec19">
<label>3.4.1</label>
<title>Operationalization of the conceptual framework</title>
<p>The framework is designed to function as an operational model rather than a static taxonomy. In the technical layer, detection models analyze multimodal content and produce classification results along with explainability indicators like confidence scores or attention maps. These outputs directly influence the social layer by shaping moderation decisions, media literacy interventions, and user trust calibration.</p>
<p>User engagement, such as requests, modifications, and sharing actions, generates feedback data that returns to the technical layer, guiding dataset improvement, bias assessment, and model updates. Simultaneously, the policy and governance layer constrains system behaviour through transparency obligations, accountability requirements, and platform-level enforcement mechanisms, which shape both technical design and social response strategies.</p>
<p>In practical scenarios such as election integrity or non-consensual deepfake mitigation, this closed-loop structure enables continuous alignment between detection accuracy, societal trust, and regulatory compliance.</p>
<p>In contrast to earlier multi-layer frameworks that mainly classify stakeholders or research areas, this framework specifically represents feedback loops among technical outputs, human understanding, and governance limitations. This enables evaluation of how design choices in detection and explainability propagate through social trust and regulatory accountability, making the framework applicable to system design, policy assessment, and interdisciplinary research planning.</p>
</sec>
</sec>
<sec id="sec20">
<label>3.5</label>
<title>Knowledge gaps</title>
<p>Despite significant advances in deepfake detection, several key gaps and contentious issues remain. Generalization is a major challenge, as many detectors perform well on benchmark datasets but struggle to maintain accuracy across new or unseen datasets, limiting their real-world applicability. Multimodal deepfakes, which combine video, audio, and text, are understudied compared to single-modality cases, leaving detection methods less prepared for increasingly sophisticated manipulations. The balance between explainability and security also presents a dilemma: while explainable AI tools improve transparency, they can potentially be exploited by adversaries to bypass detection. Additionally, ethical and legal protections remain insufficient, and harmful such as non-consensual deepfake pornography and political disinformation highlight gaps in current laws and enforcement. Finally, the lack of standardized, adversarial resilient evaluation benchmarks complicates the comparison of detection methods across studies, making it difficult to assess progress and deploy robust solutions in practice.</p>
</sec>
</sec>
<sec id="sec21">
<label>4</label>
<title>Critical discussion</title>
<p>The literature reveals a field advancing rapidly on the technical front while struggling to ensure adequate societal protection. Although detection models have improved significantly, the adversarial dynamics of the problem mean that innovations in synthesis frequently outpace detection capabilities. This ongoing arms race is intensified by dataset biases, inconsistent reporting standards, and fragmented evaluation practices.</p>
<p>In addition, the interdisciplinary gaps persist until today. Social scientists extensively document the social and behavioural consequences of misinformation but often neglect the technical constraints of detection systems. Conversely, many computer vision studies validate models using synthetic benchmarks without evaluating real-world or societal implications. Regulatory initiatives such as the EU AI Act and Digital Services Act introduce frameworks for accountability, yet translating these high-level principles into enforceable technical standards and platform practices remains an open challenge.</p>
<p>Previous surveys and forensic studies established the foundation for contemporary detection research by systematizing manipulation categories and assessment methods (<xref ref-type="bibr" rid="ref9001">Verdoliva et al., 2019</xref>; <xref ref-type="bibr" rid="ref35">Zhou et al., 2021</xref>).</p>
<p>Deployment challenges also include scalability, privacy protection when handling sensitive data, and the ethical management of false positives that could affect legitimate content. These challenges directly relate to the conceptual framework proposed in this review: weaknesses in the <italic>technical domain</italic> (e.g., bias, overfitting) affect <italic>social trust and media literacy</italic>, while insufficient <italic>policy enforcement</italic> weakens governance feedback loops. A more integrated research agenda that unites robust technical evaluation, human-centred design, and co-developed regulatory mechanisms is necessary to ensure that progress in detection contributes meaningfully to societal protection.</p>
<p>Recent regulatory initiatives further underscore the need for accountable AI-driven detection systems. The Digital Services Act (<xref ref-type="bibr" rid="ref10">European Parliament and Council of the European Union (EU DSA), 2022</xref>) of the European Union establishes requirements for platform transparency and risk reduction concerning online misinformation, while the proposed European Union Artificial Intelligence Act (<xref ref-type="bibr" rid="ref11">European Parliament and Council of the European Union (EU AI Act), 2024</xref>) classifies certain AI-driven content moderation and synthetic media systems as high-risk, imposing requirements for explainability, documentation, and human oversight.</p>
</sec>
<sec id="sec22">
<label>5</label>
<title>Future directions</title>
<p>Future research on deepfake detection must move beyond narrow, dataset-driven evaluations toward holistic, context-aware, and ethically aligned systems. Guided by the conceptual framework, four thematic directions namely technical, methodological, ethical, and policy/governance are proposed to structure future development.</p>
<sec id="sec23">
<label>5.1</label>
<title>Technical directions</title>
<p>Future detection models should reflect the complexity of multimodal manipulations and evolving threat landscapes:</p>
<list list-type="roman-lower">
<list-item>
<p>Multimodal Benchmark Development: Construct datasets that jointly assess image, video, audio, and text manipulations to capture cross-modal deepfake narratives.</p>
</list-item>
<list-item>
<p>Cross-Domain Generalization: Mitigate overfitting through domain adaptation, self-supervised learning, and transfer learning to enhance robustness across datasets and manipulation types.</p>
</list-item>
<list-item>
<p>Retrieval-Augmented Detection: Incorporate external evidence (e.g., verified media or fact databases) into detection reasoning for improved factual grounding and reliability.</p>
</list-item>
<list-item>
<p>Explainability and Robustness: Advance interpretable AI methods (e.g., saliency maps, attention visualizations) that clarify decisions and resist adversarial exploitation.</p>
</list-item>
</list>
</sec>
<sec id="sec24">
<label>5.2</label>
<title>Methodological directions</title>
<list list-type="roman-lower">
<list-item>
<p>Adversarial-Resilient Evaluation Protocols: Establish standardized benchmarks that test model performance under realistic, adversarial, and cross-cultural conditions.</p>
</list-item>
<list-item>
<p>Human&#x2013;AI Collaboration: Design hybrid systems where explainable AI tools assist human reviewers, journalists, and policymakers in verifying authenticity and contextual accuracy.</p>
</list-item>
<list-item>
<p>Longitudinal and Cross-Platform Studies: Examine how detection systems perform across time and social media ecosystems to measure real-world efficacy and adaptation.</p>
</list-item>
</list>
</sec>
<sec id="sec25">
<label>5.3</label>
<title>Ethical directions</title>
<list list-type="roman-lower">
<list-item>
<p>Data Privacy and Consent: Ensure deepfake datasets use consensual, privacy-preserving data to avoid reinforcing exploitation or harm.</p>
</list-item>
<list-item>
<p>Bias and Fairness Auditing: Implement fairness checks and demographic audits in training datasets to prevent disproportionate impacts on marginalized groups.</p>
</list-item>
<list-item>
<p>Transparency and Accountability: Promote open reporting of model architectures, performance metrics, and limitations to support reproducibility and ethical oversight.</p>
</list-item>
</list>
</sec>
<sec id="sec26">
<label>5.4</label>
<title>Policy and governance directions</title>
<list list-type="roman-lower">
<list-item>
<p>Regulatory alignment: translate policy instruments such as the <italic>EU AI Act</italic> and <italic>Digital Services Act</italic> into technical compliance standards, platform-level transparency, and auditable mechanisms.</p>
</list-item>
<list-item>
<p>Global governance frameworks: encourage international cooperation to develop harmonized policies for detecting and labelling synthetic media.</p>
</list-item>
<list-item>
<p>Public education and media literacy: foster interdisciplinary collaboration between AI developers, educators, and communication experts to build societal resilience against misinformation.</p>
</list-item>
</list>
<sec id="sec27">
<label>5.4.1</label>
<title>Practical implications</title>
<p>For researchers, this framework encourages the integration of technical and social dimensions, ensuring that advances in model accuracy are matched by attention to transparency and fairness. Policy makers can use it to align AI regulation with technical feasibility, creating compliance standards that are both enforceable and adaptable. Media platforms can operationalize these insights by embedding explainable detection tools within content moderation pipelines. Collectively, these directions emphasize that the sustainability of deepfake detection depends on continuous interaction between innovation, ethics, and governance.</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusions" id="sec28">
<label>6</label>
<title>Conclusion</title>
<p>This review synthesizes technical, social, and policy literature on deepfakes and detection. While there has been commendable progress in detection techniques and benchmark creation, significant challenges remain in generalization, multimodal detection, ethical safeguards, and policy enforcement. Many current models achieve strong results on specific datasets, yet their reliability weakens in real-world contexts where manipulations are more diverse and adversarial adaptive. Multimodal deepfakes, which combine video, audio, and textual fabrications, further complicate detection by exploiting gaps between unimodal approaches.</p>
<p>Beyond technical hurdles, unresolved ethical and legal questions persist. Non-consensual sexual deepfakes and political disinformation highlight the potential for severe personal and societal harm, underscoring the urgency of legal protections, content moderation frameworks, and victim support mechanisms. Policies such as the EU AI Act and Digital Services Act provide valuable blueprints, but enforcement depends on effective translation into technical standards and platform practices. At the same time, overzealous regulation risks constraining legitimate research and creative uses of generative technologies, demanding careful balance.</p>
<p>Addressing these challenges requires more than technical innovation; it demands sustained interdisciplinary collaboration. Engineers, social scientists, policymakers, and ethicists must co-design solutions that are simultaneously robust, explainable, and aligned with democratic values. Evaluation standards should evolve toward adversarial resilient benchmarks that mirror deployment conditions, while explainability tools must empower human reviewers without exposing new attack surfaces.</p>
<p>The promise and perils of generative AI demand coordinated responses that protect individuals and societies while enabling beneficial innovation. If pursued with transparency, inclusivity, and foresight, the next generation of deepfake detection systems and governance structures can mitigate harms while fostering responsible use of synthetic media in education, entertainment, accessibility, and beyond. The trajectory of this field will depend on whether the global community can act not only to keep pace with technological advances, but also to shape them in the service of public trust and social good.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec29">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: FakeNewsNet for text, FaceForensics++ for video.</p>
</sec>
<sec sec-type="author-contributions" id="sec30">
<title>Author contributions</title>
<p>BM: Conceptualization, Investigation, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. TT: Methodology, Supervision, Writing &#x2013; original draft. FM: Formal analysis, Project administration, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. IO: Conceptualization, Formal analysis, Investigation, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="sec31">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec32">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec33">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brown</surname><given-names>T.</given-names></name> <name><surname>Mann</surname><given-names>B.</given-names></name> <name><surname>Ryder</surname><given-names>N.</given-names></name> <name><surname>Subbiah</surname><given-names>M.</given-names></name> <name><surname>Kaplan</surname><given-names>J.</given-names></name> <name><surname>Dhariwal</surname><given-names>P.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Language models are few-shot learners</article-title>. <source>Adv. Neural Inf. Proc. Syst.</source> <volume>33</volume>, <fpage>1877</fpage>&#x2013;<lpage>1901</lpage>.</mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brown</surname><given-names>T.</given-names></name> <name><surname>Murtfeldt</surname><given-names>R.</given-names></name> <name><surname>Qiu</surname><given-names>L.</given-names></name> <name><surname>Karmakar</surname><given-names>A.</given-names></name> <name><surname>Lee</surname><given-names>H.</given-names></name> <name><surname>Tanumihardja</surname><given-names>E.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Deepfake-Eval-2024: a multi-modal in-the-wild benchmark of deepfakes circulated in 2024</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>Y.</given-names></name> <name><surname>Zhang</surname><given-names>L.</given-names></name> <name><surname>Niu</surname><given-names>Y.</given-names></name></person-group> (<year>2024a</year>). <article-title>Forgelens: data-efficient forgery focus for generalizable forgery image detection</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>Y.</given-names></name> <name><surname>Zhang</surname><given-names>L.</given-names></name> <name><surname>Niu</surname><given-names>Y.</given-names></name></person-group> (<year>2024b</year>). <article-title>Guided and fused: efficient frozen CLIP-ViT with feature guidance and multi-stage feature fusion for generalizable deepfake detection</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Chollet</surname><given-names>F.</given-names></name></person-group> (<year>2017</year>). <source>Xception: Deep learning with depthwise separable convolutions</source>: <publisher-name>CVPR</publisher-name>, <fpage>1800</fpage>&#x2013;<lpage>1807</lpage>.</mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chuk-Ke</surname><given-names>C.</given-names></name> <name><surname>Dong</surname><given-names>Y.</given-names></name></person-group> (<year>2024</year>). <article-title>Misinformation and literacies in the era of generative artificial intelligence: a brief overview and a call for future research</article-title>. <source>Emerg. Media</source> <volume>2</volume>, <fpage>70</fpage>&#x2013;<lpage>85</lpage>. doi: <pub-id pub-id-type="doi">10.1177/27523543241240285</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Devlin</surname><given-names>J.</given-names></name> <name><surname>Chang</surname><given-names>M. W.</given-names></name> <name><surname>Lee</surname><given-names>K.</given-names></name> <name><surname>Toutanova</surname><given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>. <source>NAACL</source>. <volume>417</volume>, <fpage>4171</fpage>&#x2013;<lpage>4186</lpage>. doi: <pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dolhansky</surname><given-names>B.</given-names></name> <name><surname>Howes</surname><given-names>R.</given-names></name> <name><surname>Pflaum</surname><given-names>B.</given-names></name> <name><surname>Baram</surname><given-names>N.</given-names></name> <name><surname>Ferrer</surname><given-names>C. C.</given-names></name></person-group> (<year>2019</year>). <article-title>The DeepFake detection challenge dataset</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Donahue</surname><given-names>C.</given-names></name> <name><surname>McAuley</surname><given-names>J.</given-names></name> <name><surname>Puckette</surname><given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>WaveGAN: spectral audio synthesis with generative adversarial networks</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="book"><collab id="coll1">European Parliament and Council of the European Union (EU DSA)</collab> (<year>2022</year>). <source>Regulation (EU) 2022/2065 on a single market for digital services</source>: <publisher-name>Official Journal of the European Union</publisher-name>.</mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="book"><collab id="coll2">European Parliament and Council of the European Union (EU AI Act)</collab> (<year>2024</year>). <source>Regulation (EU) 2024/1689 laying down harmonised rules on artificial intelligence</source>: <publisher-name>Official Journal of the European Union</publisher-name>.</mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Goodfellow</surname><given-names>I.</given-names></name> <name><surname>Pouget-Abadie</surname><given-names>J.</given-names></name> <name><surname>Mirza</surname><given-names>M.</given-names></name> <name><surname>Xu</surname><given-names>B.</given-names></name> <name><surname>Warde-Farley</surname><given-names>D.</given-names></name> <name><surname>Ozair</surname><given-names>S.</given-names></name> <etal/></person-group>. (<year>2014</year>). &#x201C;<article-title>Generative adversarial nets</article-title>&#x201D; in <source>Advances in neural information processing systems</source>. Eds. J. Pouget-Abadie, M. Mirza, B. Xu, D. Warde-Farley, S. Ozair, A. Courville, Y. Bengio, vol. <volume>27</volume>.</mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gowrisankar</surname><given-names>B.</given-names></name> <name><surname>Thing</surname><given-names>V. L. L.</given-names></name></person-group> (<year>2024</year>). <article-title>An adversarial attack approach for explainable AI evaluation on deepfake detection models</article-title>. <source>Comput. Secur.</source> <volume>139</volume>:<fpage>103684</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cose.2023.103684</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Green</surname><given-names>D. M.</given-names></name> <name><surname>Swets</surname><given-names>J. A.</given-names></name></person-group> (<year>1966</year>). <source>Signal detection theory and psychophysics</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Wiley</publisher-name>.</mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Idiongo</surname><given-names>P.</given-names></name></person-group> (<year>2024</year>). <article-title>The impact of fake news on public trust in traditional media outlets</article-title>.<source>J. Commun.</source> <volume>5</volume>. doi: <pub-id pub-id-type="doi">10.47941/jcomm.1984</pub-id></mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Karras</surname><given-names>T.</given-names></name> <name><surname>Laine</surname><given-names>S.</given-names></name> <name><surname>Aila</surname><given-names>T.</given-names></name></person-group> (<year>2021</year>). <source>Analyzing and improving the image quality of StyleGAN3</source>: <publisher-name>CVPR</publisher-name>, <fpage>8110</fpage>&#x2013;<lpage>8119</lpage>.</mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kozik</surname><given-names>R.</given-names></name> <name><surname>Ficco</surname><given-names>M.</given-names></name> <name><surname>Pawlicka</surname><given-names>A.</given-names></name> <name><surname>Pawlicki</surname><given-names>M.</given-names></name> <name><surname>Palmieri</surname><given-names>F.</given-names></name> <name><surname>Chora&#x015B;</surname><given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>When explainability turns into a threat&#x2014;using XAI to fool a fake news detection method</article-title>. <source>Comput. Secur.</source> <volume>137</volume>:<fpage>103599</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cose.2023.103599</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kumar</surname><given-names>A.</given-names></name> <name><surname>Nguyen</surname><given-names>T.</given-names></name> <name><surname>Li</surname><given-names>J.</given-names></name></person-group> (<year>2022</year>). <source>WaveFake: a dataset for synthetic voice detection</source>: <publisher-name>Interspeech</publisher-name>, <fpage>2110</fpage>&#x2013;<lpage>2114</lpage>.</mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Li</surname><given-names>Y.</given-names></name> <name><surname>Chang</surname><given-names>M. C.</given-names></name> <name><surname>Lyu</surname><given-names>S.</given-names></name></person-group> (<year>2020</year>). <source>Celeb-DF: A large-scale challenging dataset for deepfake forensics</source>: <publisher-name>CVPR</publisher-name>, <fpage>3207</fpage>&#x2013;<lpage>3216</lpage>.</mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Loth</surname><given-names>A.</given-names></name> <name><surname>Kappes</surname><given-names>M.</given-names></name> <name><surname>Pahl</surname><given-names>M. O.</given-names></name></person-group> (<year>2024</year>). <article-title>Blessing or curse? A survey on the impact of generative AI on fake news</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mirsky</surname><given-names>Y.</given-names></name> <name><surname>Lee</surname><given-names>W.</given-names></name></person-group> (<year>2021</year>). <article-title>The creation and detection of deepfakes: a survey</article-title>. <source>ACM Comput. Surv.</source> <volume>54</volume>, <fpage>1</fpage>&#x2013;<lpage>41</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3425780</pub-id></mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="other"><collab id="coll3">National Sexual Violence Resource Center</collab>. <year>2024</year>, Taylor Swift and the dangers of deepfake pornography. Available online at: <ext-link xlink:href="https://www.nsvrc.org/blogs/feminism/taylor-swift-and-dangers-deepfake-pornography" ext-link-type="uri">https://www.nsvrc.org/blogs/feminism/taylor-swift-and-dangers-deepfake-pornography</ext-link></mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname><given-names>H.</given-names></name> <name><surname>Fang</surname><given-names>F.</given-names></name> <name><surname>Yamagishi</surname><given-names>J.</given-names></name> <name><surname>Echizen</surname><given-names>I.</given-names></name></person-group> (<year>2020</year>). <article-title>Deep learning for deepfake detection: analysis and challenges</article-title>. <source>IEEE Trans. Inf. Forensics Secur.</source> <volume>15</volume>, <fpage>1879</fpage>&#x2013;<lpage>1893</lpage>.</mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname><given-names>T.</given-names></name> <name><surname>Nguyen</surname><given-names>C. M.</given-names></name> <name><surname>Nguyen</surname><given-names>D.</given-names></name> <name><surname>Nahavandi</surname><given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>Fakevoices: dataset for synthetic voice detection</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>108046</fpage>&#x2013;<lpage>108066</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2022.3211069</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Pearson</surname><given-names>J.</given-names></name> <name><surname>Zinets</surname><given-names>N.</given-names></name></person-group> <year>2022</year>. <source>Deepfake footage purports to show Ukrainian president capitulating</source>. Available online at: <ext-link xlink:href="https://www.reuters.com/world/europe/deepfake-footage-purports-show-ukrainian-president-capitulating-2022-03-16/" ext-link-type="uri">https://www.reuters.com/world/europe/deepfake-footage-purports-show-ukrainian-president-capitulating-2022-03-16/</ext-link></mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Rossler</surname><given-names>A.</given-names></name> <name><surname>Cozzolino</surname><given-names>D.</given-names></name> <name><surname>Verdoliva</surname><given-names>L.</given-names></name> <name><surname>Riess</surname><given-names>C.</given-names></name> <name><surname>Thies</surname><given-names>J.</given-names></name> <name><surname>Nie&#x00DF;ner</surname><given-names>M.</given-names></name></person-group> (<year>2019</year>). <source>FaceForensics++: Learning to detect manipulated facial images</source>: <publisher-name>ICCV</publisher-name>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>.</mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Selvaraju</surname><given-names>R. R.</given-names></name> <name><surname>Cogswell</surname><given-names>M.</given-names></name> <name><surname>Das</surname><given-names>A.</given-names></name> <name><surname>Vedantam</surname><given-names>R.</given-names></name> <name><surname>Parikh</surname><given-names>D.</given-names></name> <name><surname>Batra</surname><given-names>D.</given-names></name></person-group> (<year>2017</year>). <source>Grad-CAM: Visual explanations from deep networks via gradient-based localization</source>: <publisher-name>ICCV</publisher-name>, <fpage>618</fpage>&#x2013;<lpage>626</lpage>.</mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shu</surname><given-names>K.</given-names></name> <name><surname>Mahudeswaran</surname><given-names>D.</given-names></name> <name><surname>Wang</surname><given-names>S.</given-names></name> <name><surname>Lee</surname><given-names>D.</given-names></name> <name><surname>Liu</surname><given-names>H.</given-names></name></person-group> (<year>2020</year>). <article-title>FakeNewsNet: a data repository with news content, social context and spatiotemporal information for fake news research</article-title>. <source>Big Data</source> <volume>8</volume>, <fpage>171</fpage>&#x2013;<lpage>188</lpage>. doi: <pub-id pub-id-type="doi">10.1089/big.2020.0062</pub-id>, <pub-id pub-id-type="pmid">32491943</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Siarohin</surname><given-names>A.</given-names></name> <name><surname>Sangineto</surname><given-names>E.</given-names></name> <name><surname>Lathuiliere</surname><given-names>S.</given-names></name> <name><surname>Sebe</surname><given-names>N.</given-names></name></person-group> (<year>2019</year>). &#x201C;<article-title>First order motion model for image animation</article-title>&#x201D; in <source>Advances in neural information processing systems</source>, vol. <volume>32</volume>, <fpage>7137</fpage>&#x2013;<lpage>7147</lpage>.</mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sweller</surname><given-names>J.</given-names></name></person-group> (<year>1988</year>). <article-title>Cognitive load during problem solving: effects on learning</article-title>. <source>Cogn. Sci.</source> <volume>12</volume>, <fpage>257</fpage>&#x2013;<lpage>285</lpage>. doi: <pub-id pub-id-type="doi">10.1207/s15516709cog1202_4</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Vaswani</surname><given-names>A.</given-names></name> <name><surname>Shazeer</surname><given-names>N.</given-names></name> <name><surname>Parmar</surname><given-names>N.</given-names></name> <name><surname>Uszkoreit</surname><given-names>J.</given-names></name> <name><surname>Jones</surname><given-names>L.</given-names></name> <name><surname>Gomez</surname><given-names>A. N.</given-names></name> <etal/></person-group>. (<year>2017</year>). &#x201C;<article-title>Attention is all you need</article-title>&#x201D; in <source>Advances in neural information processing systems</source>, vol. <volume>30</volume>.</mixed-citation></ref>
<ref id="ref9001"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Verdoliva</surname><given-names>L.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Media forensics and deepfake detection: An overview</article-title>. <source>IEEE Journal of Selected Topics in Signal Processing,</source> <volume>14</volume>, <fpage>910</fpage>&#x2013;<lpage>932</lpage>.</mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>Z.</given-names></name> <name><surname>Cheng</surname><given-names>Z.</given-names></name> <name><surname>Xiong</surname><given-names>J.</given-names></name> <name><surname>Xu</surname><given-names>X.</given-names></name> <name><surname>Li</surname><given-names>T.</given-names></name> <name><surname>Veeravalli</surname><given-names>B.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>A timely survey on vision transformer for deepfake detection</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yermakov</surname><given-names>A.</given-names></name> <name><surname>Cech</surname><given-names>J.</given-names></name> <name><surname>Matas</surname><given-names>J.</given-names></name></person-group> (<year>2025</year>). <article-title>Unlocking the hidden potential of CLIP in generalizable deepfake detection</article-title>. <source>arXiv</source>.</mixed-citation></ref>
<ref id="ref9002"><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Zhou</surname><given-names>X.</given-names></name> <name><surname>Zafarani</surname><given-names>R.</given-names></name> <name><surname>Shu</surname><given-names>K.</given-names></name> <name><surname>Liu</surname><given-names>H.</given-names></name></person-group> (<year>2019</year>). <article-title>Fake news: Fundamental theories, detection strategies, and challenges</article-title> <source>ACM Transactions on Information Systems</source>, <volume>39</volume>, <fpage>1</fpage>&#x2013;<lpage>40</lpage>.</mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>L.</given-names></name> <name><surname>Li</surname><given-names>H.</given-names></name> <name><surname>Wang</surname><given-names>J.</given-names></name></person-group> (<year>2024</year>). <article-title>Vision transformers in deepfake detection: accuracy, generalization, and practical benchmarks</article-title>. <source>IEEE Trans. Inf. Forensics Secur.</source> <volume>19</volume>, <fpage>4321</fpage>&#x2013;<lpage>4338</lpage>.</mixed-citation></ref>
<ref id="ref35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname><given-names>X.</given-names></name> <name><surname>Zafarani</surname><given-names>R.</given-names></name> <name><surname>Shu</surname><given-names>K.</given-names></name> <name><surname>Liu</surname><given-names>H.</given-names></name></person-group> (<year>2021</year>). <article-title>Fake news: fundamental theories, detection strategies, and challenges</article-title>. <source>ACM Trans. Inf. Syst.</source> <volume>53</volume>, <fpage>1</fpage>&#x2013;<lpage>40</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3395046</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3078615/overview">Xin Wang</ext-link>, University at Albany, United States</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3246631/overview">Gregory Gondwe</ext-link>, California State University, San Bernardino, United States</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3281937/overview">Radhika Baskar</ext-link>, Saveetha University, India</p>
</fn>
</fn-group>
</back>
</article>