<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Imaging</journal-id>
<journal-title>Frontiers in Imaging</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Imaging</abbrev-journal-title>
<issn pub-type="epub">2813-3315</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fimag.2025.1504551</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Imaging</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>High-quality deepfakes have a heart!</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Seibold</surname> <given-names>Clemens</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2876470/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Wisotzky</surname> <given-names>Eric L.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2876516/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Beckmann</surname> <given-names>Arian</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Kossack</surname> <given-names>Benjamin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Hilsmann</surname> <given-names>Anna</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1412299/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Eisert</surname> <given-names>Peter</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2857023/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Computer Vision &#x00026; Graphics, Vision &#x00026; Imaging Technologies, Fraunhofer Heinrich-Hertz-Institute HHI</institution>, <addr-line>Berlin</addr-line>, <country>Germany</country></aff>
<aff id="aff2"><sup>2</sup><institution>Visual Computing, Department of Computer Science, Humboldt University</institution>, <addr-line>Berlin</addr-line>, <country>Germany</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Matteo Ferrara, University of Bologna, Italy</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Deepayan Bhowmik, Newcastle University, United Kingdom</p>
<p>Giuseppe Boccignone, University of Milan, Italy</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Peter Eisert <email>peter.eisert&#x00040;hhi.fraunhofer.de</email></corresp>
<fn fn-type="equal" id="fn001"><p>&#x02020;These authors have contributed equally to this work</p></fn></author-notes>
<pub-date pub-type="epub">
<day>30</day>
<month>04</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>4</volume>
<elocation-id>1504551</elocation-id>
<history>
<date date-type="received">
<day>30</day>
<month>09</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>25</day>
<month>02</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Seibold, Wisotzky, Beckmann, Kossack, Hilsmann and Eisert.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Seibold, Wisotzky, Beckmann, Kossack, Hilsmann and Eisert</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Deepfakes have become ubiquitous in our modern society, with both their quantity and quality increasing. The current evolution of image generation techniques makes the detection of manipulated content through visual inspection increasingly difficult. This challenge has motivated researchers to analyze heart-beat-related signal to distinguish deep fakes from genuine videos.</p></sec>
<sec>
<title>Methods</title>
<p>In this study, we analyze deepfake videos of faces generated with novel methods regarding their heart-beat-related signals using remote photoplethysmography (rPPG). The rPPG signal describes the blood flow based, or rather local blood volume changes, and thus reflects the pulse signal. For our analysis, we present a pipeline that extracts rPPG signals and investigate the origin of the extracted signals in deepfake videos using correlation analyses. To validate our rPPG extraction pipeline and analyze rPPG signals of deepfakes, we captured a dataset of facial videos synchronized with an electrocardiogram (ECG) as a ground-truth pulse signal. Additionally, we generated high-quality deepfakes and incorporated publicly available datasets into our evaluation.</p></sec>
<sec>
<title>Results</title>
<p>We prove that our heart rate extraction pipeline produces valid estimates for genuine videos by comparing the estimated results with ECG reference data. Our high-quality deepfakes exhibit valid heart rates and their rPPG signals show a significant correlation with the corresponding driver video that was used to generate them. Furthermore, we show that this also holds for deepfakes from a publicly available dataset.</p></sec>
<sec>
<title>Discussion</title>
<p>Previous research assumed that the subtle heart-beat-related signals get lost during the deepfake generation process, making them useful for deepfake detection. However, this paper shows that this assumption is no longer valid for current deepfake methods. Nevertheless, preliminary experiments indicate that analyzing spatial distribution of bloodflow regarding its plausibility can still help to detect high quality deepfakes.</p></sec></abstract>
<kwd-group>
<kwd>deepfakes</kwd>
<kwd>video forensics</kwd>
<kwd>remote photoplethysmography (rPPG)</kwd>
<kwd>biological signals</kwd>
<kwd>remote heart rate estimation</kwd>
<kwd>imaging photoplethysmography (IPPG)</kwd>
</kwd-group>
<counts>
<fig-count count="13"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="68"/>
<page-count count="15"/>
<word-count count="9583"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Imaging Applications</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>In recent years, deepfakes have emerged as a prominent and concerning phenomenon. Notably, political figures such as Barack Obama, Donald Trump, and Wladimir Klitschko have become targets, drawing significant public attention. The societal and ethical implications of deepfake technology have become increasingly evident. Initial examples were characterized by vivible artifacts, particularly when static images were synthesized into video sequences (DeepFakes, <xref ref-type="bibr" rid="B14">2019</xref>). However, advancements in image generation techniques have significantly improved the realism of these manipulations, making it increasingly difficult to detect alterations through visual inspection alone (Ramesh et al., <xref ref-type="bibr" rid="B44">2021</xref>; Karras et al., <xref ref-type="bibr" rid="B26">2020</xref>).</p>
<p>Modern state-of-the-art deepfake detection approaches rely on features learned by convolutional filters sensitive to inconsistencies in both the spatial and the temporal domain (Wang et al., <xref ref-type="bibr" rid="B58">2023</xref>; Haliassos et al., <xref ref-type="bibr" rid="B22">2022</xref>). Despite receiving outstanding performance results on benchmark datasets, these techniques suffer from a lack of explainability. This weakness becomes critical when human supervisors of video-identification systems face potential misclassifications by these detectors, leading to challenges due to their opaque decision-making processes.</p>
<p>However, contemporary deepfake generation techniques, while increasingly sophisticated in their ability to visually mimic real individuals, do not explicitly model physiological signals present in genuine videos. The cardiovascular pulse, inducing individual pulsating blood flow in human skin, causes subtle color variations that are assumed to be plausible in genuine videos only. This inadequacy has been employed by several researchers for leveraging locally resolved signals, such as those captured by techniques like remote photoplethysmography (rPPG), which capture these subtle variations (Yu et al., <xref ref-type="bibr" rid="B63">2021a</xref>). For example, rPPG can extract physiological information, such as pulse rate, from a recorded video, providing valuable data for deepfake detection (Kossack et al., <xref ref-type="bibr" rid="B29">2019a</xref>). Traditional approaches have primarily focused on extracting a global pulse signal from an entire video sequence (Yu et al., <xref ref-type="bibr" rid="B63">2021a</xref>). Detectors leveraging this global rPPG signal have demonstrated promising results concluding that deepfakes do not include such physiologically induced signals. However, contrary findings indicate that deepfakes can indeed exhibit a one-dimensional signal resembling a heart rate (HR), further complicating the detection process (Fernandes et al., <xref ref-type="bibr" rid="B19">2019</xref>). Additionally, recent advancements in synthetic face generation explicitly incorporate pulsation signals (Ciftci and Yin, <xref ref-type="bibr" rid="B9">2019</xref>) or enable the manipulation of physiological signals in facial videos (Chen et al., <xref ref-type="bibr" rid="B4">2022</xref>), thus blurring the distinction between real and fake rPPG signals. It is also important to note that rPPG-based deepfake detectors may inadvertently rely on non-physiological cues, such as background artifacts, noise, or comparisons between image pairs, rather than purely detecting pulse-related color changes in the skin (&#x000C7;ift&#x000E7;i et al., <xref ref-type="bibr" rid="B6">2024</xref>; Qi et al., <xref ref-type="bibr" rid="B43">2020</xref>; Ciftci et al., <xref ref-type="bibr" rid="B8">2020b</xref>,<xref ref-type="bibr" rid="B7">a</xref>). For instance, Ciftci et al. (<xref ref-type="bibr" rid="B7">2020a</xref>) demonstrated that filtering rPPG signals with a bandpass filter between 4.68 Hz to 15 Hz (i.e., 180 bpm to 900 bpm), can more effectively distinguish real videos from deepfakes compared to filtering signals based on human heart rate frequencies. This highlights a critical limitation in current deepfake detection approaches that rely on rPPG signals, as they often fail to account for the fact that deepfakes can still produce realistic HR signals.</p>
<p>In this article, we demonstrate that HR signals can indeed be derived from deepfake videos, and, more importantly, these signals closely match those of the original driving video, which define the head motion and facial expressions. This finding challenges the assumption that deepfakes inherently lack valid physiological signals and emphasizes the need for detection methods that go beyond simple pulse detection. Our contribution provides new insights into the physiological consistency of deepfakes, raising the bar for future detection techniques.</p>
<p>To validate our findings, we propose a pipeline that extracts the pulse rate from videos while incorporating motion compensation and background noise reduction for enhanced robustness. To further substantiate our approach, we collected a dataset consisting of video recordings synchronized with electrocardiogram (ECG) data. Our experiments demonstrate that the HRs extracted from the videos using our pipeline closely align with those from the ECG signal, confirming the accuracy of the rPPG-based extraction process. To explore the origin of the heart beats detected in the rPPG signals of the deepfake videos, we generated a set of deepfakes based on these original video recordings. In our experiments, we show that the HRs derived from the deepfakes significantly overlap with those of the source (or &#x0201C;driver&#x0201D;) videos, highlighting that deepfake HR signals are not random but rather reflect the physiological information present in the driving video. Furthermore, we extend our analysis to older generations of deepfakes by utilizing the publicly available KoDF dataset (Kwon et al., <xref ref-type="bibr" rid="B33">2021</xref>), where we similarly demonstrate the presence of valid HR signals. These results emphasize that even older deepfake methods can carry realistic physiological signals, further complicating traditional detection methods.</p>
<p>The remainder of this paper is organized as follows: In Section 2, we provide an overview of existing work on deepfake generation, deepfake detection, and rPPG. Our proposed method is presented in Section 3. Section 4 outlines the experiments conducted, along with the presentation of our used dataset and results. Thereafter, we discuss our method&#x00027;s limitations and conclude our paper with a summary of our results and findings in Section 6.</p></sec>
<sec id="s2">
<title>2 Related work</title>
<sec>
<title>2.1 Deepfakes</title>
<p>Deepfakes represent a category of manipulated videos and audio files created through deep learning techniques. These manipulations involve altering faces, modifying gestures and facial expressions, and adjusting physical appearances and mouth movements to align with manipulated audio content. The widespread popularity of deepfakes is evident in various applications, with common usage found in AI-based face swapping techniques. Notably, there is a surge in popularity with smartphone apps that facilitate seamless face swapping, demonstrating the accessibility and user-friendly nature of these technologies. These apps leverage advanced voice synthesis, facial synthesis, and video generation methods to produce convincing and often deceptive content.</p>
<p>The development of GANs (Goodfellow et al., <xref ref-type="bibr" rid="B20">2014</xref>), VAEs (Kingma and Welling, <xref ref-type="bibr" rid="B27">2014</xref>) and, lately, diffusion models (Ho et al., <xref ref-type="bibr" rid="B24">2020</xref>) enabled various possibilities for the forgery of digital content. The seminal deepfake generation method utilizes a dual-decoder autoencoder, with each decoder dedicated to one of the set target identities for swapping (DeepFakes, <xref ref-type="bibr" rid="B14">2019</xref>). Subsequently, this foundational method has been enhanced by the integration of adversarial training, application of more sophisticated convolutional neural networks or advanced blending techniques (Perov et al., <xref ref-type="bibr" rid="B40">2020</xref>; Beckmann et al., <xref ref-type="bibr" rid="B1">2023</xref>). Numerous methods have been developed for manipulating face expressions and appearances, with modern approaches capable of synthesizing a face with a given appearance and an expression of choice in the one-shot scenario (Drobyshev et al., <xref ref-type="bibr" rid="B17">2022</xref>; Nirkin et al., <xref ref-type="bibr" rid="B39">2022</xref>; Wang et al., <xref ref-type="bibr" rid="B57">2021b</xref>,<xref ref-type="bibr" rid="B55">a</xref>). Recently, several approaches leverage denoising diffusion models for the generation and manipulation of high-quality face images (Ho et al., <xref ref-type="bibr" rid="B24">2020</xref>; Zhao et al., <xref ref-type="bibr" rid="B68">2023</xref>; Ding et al., <xref ref-type="bibr" rid="B15">2023</xref>; Huang et al., <xref ref-type="bibr" rid="B25">2023</xref>). This continuous evolution of deepfake technologies poses challenges for content authentication and necessitates the development of robust detection mechanisms.</p>
<p>Early approaches to detect deepfakes exploit physical inconsistencies in the behaviour and appearance of the head. Li et al. (<xref ref-type="bibr" rid="B36">2018</xref>) exploit the fact that early deepfake generation approaches merely use training images with opened eyes, by utilizing facial landmarks to identify the eye-blinking behaviour in videos. In Yang et al. (<xref ref-type="bibr" rid="B61">2019</xref>), the authors take advantage of the fact that the process of cropping, aligning and inserting a face onto another head leads to a misalignment of the attributes in the inner face and the head pose. Other approaches aim to manually generate fake training images by simulating the artifacts introduced by warping or blending operations in genuine images (Li and Lyu, <xref ref-type="bibr" rid="B37">2018</xref>; Li et al., <xref ref-type="bibr" rid="B34">2020</xref>). With the rapid increase of quality in visual fake content, research focus shifted from more obvious and explainable artifacts to high dimensional complex convolutional feature maps. In the foundational FaceForensics&#x0002B;&#x0002B; (FF&#x0002B;&#x0002B;) paper (R&#x000F6;ssler et al., <xref ref-type="bibr" rid="B45">2019</xref>), the authors propose a benchmark dataset for the evaluation of deepfake detectors and analyze the detection performance of several CNN based detectors.</p>
<p>While recent and ongoing works on generating better deepfakes focus mostly on making them look more realistic and appealing, the coherence of biological rPPG signals is not considered. This motivated several researches to work on the promising line of fake detection methods, analyzing the coherence of biological rPPG signals in the spatial and temporal domain and thereby increasing the explainability of the detection process (Ciftci et al., <xref ref-type="bibr" rid="B7">2020a</xref>; Hernandez-Ortega et al., <xref ref-type="bibr" rid="B23">2020</xref>). FakeCatcher (Ciftci et al., <xref ref-type="bibr" rid="B7">2020a</xref>) extracts rPPG signals from three face regions which are subject to various signal transformations. Moreover, the extracted signals are consolidated into image-like PPG maps, which represent the temporal and spatial distribution of biological signals across the analyzed facial regions. Those signal maps are then fed to a CNN for classification. DeepFakeON-Phys (Hernandez-Ortega et al., <xref ref-type="bibr" rid="B23">2020</xref>) adapts the heart rate estimation method proposed in Chen and McDuff (<xref ref-type="bibr" rid="B5">2018</xref>) and modifies it through the usage of a two branch convolutional attention network to assess both appearance and motion related information for deepfake video detection. In Wu et al. (<xref ref-type="bibr" rid="B59">2023</xref>), the authors propose the usage of a temporal transformer in combination with a mask-guided local attention module in order to capture spatial and temporal inconsistencies over long distances in the used PPG maps. Detection methods that specifically pay attention to the heart rate (HR) information extracted from rPPG were proposed in Ciftci et al. (<xref ref-type="bibr" rid="B8">2020b</xref>) and Boccignone et al. (<xref ref-type="bibr" rid="B3">2022</xref>).</p></sec>
<sec>
<title>2.2 rPPG</title>
<p>The extraction of human vital signs from face videos is a rapidly growing and emerging field with numerous recent publications (Poh et al., <xref ref-type="bibr" rid="B42">2010</xref>; De Haan and Jeanne, <xref ref-type="bibr" rid="B13">2013</xref>; Wang et al., <xref ref-type="bibr" rid="B56">2017</xref>; Tulyakov et al., <xref ref-type="bibr" rid="B53">2016</xref>). The medical measurement of the HR typically relies on the optical measuring technique known as photoplethysmography (PPG) (Zaunseder et al., <xref ref-type="bibr" rid="B67">2018</xref>). This technique capitalizes on human blood circulation, where blood&#x00027;s light absorption exceeds that of surrounding tissue. Consequently, variations in blood volume influence light transmission or reflectance accordingly (Tamura et al., <xref ref-type="bibr" rid="B51">2014</xref>). A PPG sensor, commonly used for measuring the human pulse rate, is placed directly on the skin to optically detect changes in blood volume (Tamura et al., <xref ref-type="bibr" rid="B51">2014</xref>). Remote photoplethysmography employs the same principle, allowing for contactless HR measurements using a standard RGB camera (Zaunseder et al., <xref ref-type="bibr" rid="B67">2018</xref>). In this technique, the continuous change in skin color, resulting from blood flow through the circulatory system, is analyzed by rPPG methods to determine HR (Poh et al., <xref ref-type="bibr" rid="B42">2010</xref>; De Haan and Jeanne, <xref ref-type="bibr" rid="B13">2013</xref>; Wang et al., <xref ref-type="bibr" rid="B56">2017</xref>; Tulyakov et al., <xref ref-type="bibr" rid="B53">2016</xref>).</p>
<p>To robustly extract an rPPG signal, irrespective of the subject&#x00027;s skin tone and non-white illumination conditions, the Plane-Orthogonal-to-Skin Transformation (POS) (Wang et al., <xref ref-type="bibr" rid="B56">2017</xref>) of the rPPG signal has been developed for pre-processing the input video sequence.</p>
<p>Given that global model-based methods may be susceptible to noise, compression artifacts, or masking, recent rPPG-related publications leverage deep neural networks for HR extraction from video data (Chen and McDuff, <xref ref-type="bibr" rid="B5">2018</xref>; Yu et al., <xref ref-type="bibr" rid="B66">2019</xref>, <xref ref-type="bibr" rid="B65">2020</xref>). Yang et al. (<xref ref-type="bibr" rid="B62">2021</xref>) conducted a comparative study of three neural networks [Deepphys (Chen and McDuff, <xref ref-type="bibr" rid="B5">2018</xref>), rPPGNet (Yu et al., <xref ref-type="bibr" rid="B66">2019</xref>), and Physnet (Yu et al., <xref ref-type="bibr" rid="B65">2020</xref>)] against model-based approaches [independent component analysis (ICA) (Poh et al., <xref ref-type="bibr" rid="B42">2010</xref>), CHROM (De Haan and Jeanne, <xref ref-type="bibr" rid="B13">2013</xref>), and POS (Wang et al., <xref ref-type="bibr" rid="B56">2017</xref>)] using the publicly available UBFC-rPPG dataset (Bobbia et al., <xref ref-type="bibr" rid="B2">2019</xref>). In these experiments, under constant lighting conditions, deep-learning-based approaches outperformed model-based ones. However, model-based approaches (ICA, CHROM, and POS) exhibited more accurate and robust results in varying lighting conditions (Yang et al., <xref ref-type="bibr" rid="B62">2021</xref>).</p>
<p>The locally analyzed rPPG signal extracted from videos is visualized based on amplitude, velocity, or signal-to-noise ratio (SNR) maps (Kossack et al., <xref ref-type="bibr" rid="B31">2019b</xref>; Yang Jun, Guthier B, <xref ref-type="bibr" rid="B60">2015</xref>; Zaunseder et al., <xref ref-type="bibr" rid="B67">2018</xref>). Particularly, blood flow in facial videos has been scrutinized (Yang Jun, Guthier B, <xref ref-type="bibr" rid="B60">2015</xref>; Kossack et al., <xref ref-type="bibr" rid="B31">2019b</xref>,<xref ref-type="bibr" rid="B29">a</xref>), where blood flow velocity is calculated from the relative phase shift of the frequency component corresponding to HR in the frequency domain. These methods assume that the difference between neighboring phase values directly corresponds to the velocity at that point.</p>
<p>Beyond medical applications (Schraven et al., <xref ref-type="bibr" rid="B46">2023</xref>; Kossack et al., <xref ref-type="bibr" rid="B32">2023</xref>), rPPG analysis has also been employed to detect presentation attacks on authentication systems (Kossack et al., <xref ref-type="bibr" rid="B28">2022</xref>). In multiple studies, rPPG methods are applied to facial videos to discern whether the face is covered by a mask (Li et al., <xref ref-type="bibr" rid="B35">2017</xref>; Kossack et al., <xref ref-type="bibr" rid="B29">2019a</xref>; Yu et al., <xref ref-type="bibr" rid="B64">2021b</xref>). However, deepfake detection proposes another challenge, and as described in Section 2.1, discrepancies between images resulting from deepfake generation disrupt the natural color variations in the skin induced by the heartbeat.</p></sec></sec>
<sec id="s3">
<title>3 Methods and data</title>
<p>We propose a pipeline for extracting and analyzing physiologically related signals, specifically focusing on those associated with the cardiovascular cycle, which typically occur in the frequency range of 0.7 Hz to 3 Hz. To ensure the accurate detection of these signals, the pipeline requires an input video showing the face of a single person for at least 10 s. The proposed pipeline incorporates motion compensation techniques and accounts for frequencies introduced by external factors, such as compression or camera properties, to ensure a robust extraction of physiologically related signals. The details of these components are discussed in the following two sections.</p>
<p>Following the components of our pipeline, we describe the data used for the experiments. This includes the dataset of videos and ECG data that we captured, the method used to generate deepfakes and finally an external dataset that was used for evaluation.</p>
<sec>
<title>3.1 Reference face and temporal alignment</title>
<p>We focus on global rPPG signals over time, specifically the averaged color changes across various spatial positions on the facial surface. To ensure accurate signal extraction, it is essential to compensate for any movements made by the person in the video. To achieve this, each frame of the input video is aligned with a reference face by detecting facial landmarks using MediaPipe (Google, <xref ref-type="bibr" rid="B21">2022</xref>). These landmarks form the basis for Delaunay triangulation (de Berg et al., <xref ref-type="bibr" rid="B12">2008</xref>), generating a 2D mesh over the facial region. The reference 2D mesh consists of 918 triangles, and serves as a foundation for tracking and stabilizing the facial movements across the video. While this approach is easy to implement, it does not consider motion blur, and the accuracy of the registration is constrained by the facial landmarks. To further enhance this important process, we will extend our method in the future by using the approach proposed by Seibold et al. (<xref ref-type="bibr" rid="B47">2017</xref>) for removing motion blur and Seibold et al. (<xref ref-type="bibr" rid="B50">2024</xref>) for a pixel-wise registration.</p>
<p>In each input frame, we track the detected facial landmarks and use the 2D mesh to warp each triangle to its corresponding reference position. This warping process aligns the facial features from the input video to the reference face, as illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>. The outcome is a motion-compensated image sequence that serves as the foundation for our subsequent analysis, ensuring that the extracted rPPG signals are not affected by facial movements.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Illustration of the temporal alignment process. A reference mesh, composed of 918 triangles formed from MediaPipe facial landmarks, <bold>(center)</bold> is used to spatially warp each frame from the video sequence <bold>(top)</bold> to a reference position <bold>(bottom)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0001.tif"/>
</fig></sec>
<sec>
<title>3.2 Encoding of heart rate related features</title>
<p>To extract heart rates, we perform a global analysis of the entire video to obtain a single robust reference rPPG signal, which is associated with the subject&#x00027;s pulse signal (Kossack et al., <xref ref-type="bibr" rid="B30">2021</xref>). For rPPG calculation, we apply the Plane-to-Orthogonal skin (POS) transformation (Wang et al., <xref ref-type="bibr" rid="B56">2017</xref>) on a 10 s window orange, i.e., including 300 frames. A preliminary analysis about the optimal window length showed a standard deviation of the differences between extracted HR and ground truth for the 10 s window of 1.39 bpm, increasing to 3.38 bpm, 3.76 bpm, and 4.15 bpm for 8 s, 6 s, and 5 s windows, respectively. The entire video is processed by sliding this window over the video duration with a step size of one frame, ensuring continuous analysis and accurate pulse signal extraction.</p>
<p>After processing the entire video sequence, the output signal is normalized and filtered with a fifth-order Butterworth digital band-pass filter with a frequency range between 0.7 Hz and 3.0 Hz (corresponding to a HR of 42 bpm to 180 bpm). This filtering produces the final rPPG signals. For each time step, we then transform rPPG signal from the time domain to the frequency domain using a fast Fourier transform (FFT), mapping the signal magnitudes across all time steps for further analysis.</p>
<p>This processing is applied to the entire face region to capture physiologically relevant signals, as well as to two homogeneous square regions in the background to collect image noise information, see <xref ref-type="fig" rid="F2">Figure 2</xref>. The two background regions are averaged during transformation, resulting in a single FFT map for the background. The two FFT maps - one from the face and one from the background - are then subtracted based on their intensities to generate a background-free FFT map that focuses on the physiologically induced signals.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Heart rate extraction pipeline. From the registered video sequence, we calculate a global rPPG signal of the face as well as the background. Following, we determine the magnitudes in frequency space for each signal over time. To robustly extract the heart rate both signals are &#x0201C;subtracted&#x0201D;.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0002.tif"/>
</fig>
<p>To determine the HR signal over the entire video duration, the highest magnitude in the subtraction FFT map is identified, and based on this peak, the HR for each time instant is extracted through a single optimization step.</p></sec>
<sec>
<title>3.3 Captured dataset</title>
<p>Given that many of the most popular datasets for deepfake analysis are several years old and deepfake generation techniques have advanced significantly, we created a fully controlled, high-quality dataset to ensure optimal compression and realism. To validate the functionality of our method, we collected recordings of twelve individuals, representing diverse genders, ages and ethnic background in a controlled studio environment. The recordings were captured with participants positioned in front of a white background, under uniform lighting provided by white LED illumination. For each participant, 10-20 frontal view recordings were taken, with the head centered throughout the video. During each recording, participants were asked to perform a range of activities, including talking, reading, and interacting with the recording supervisor. All participants provided written consent for the use of their recordings in this experiment and its subsequent publication.</p>
<p>We used an industry RGB camera<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> to capture the video recordings. The recordings vary in length, ranging from 10 s up to several minutes, with a frame-rate of 25 fps and a resolution of 2448 &#x000D7; 2048 pixels. In addition to the RGB video, we measured the ECG and PPG of selected subjects. These physiological signals were used to calculate the heart rate (HR) as ground truth for our analysis. Selected frames from our dataset are shown in <xref ref-type="fig" rid="F3">Figure 3A</xref>.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>A subset of our recorded data representing six participants <bold>(A)</bold> and a correlating subset of generated deepfakes <bold>(B)</bold>.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0003.tif"/>
</fig></sec>
<sec>
<title>3.4 Creation of high-quality deepfakes</title>
<p>Publicly available datasets have not kept pace with the rapid development in deepfake technology as new techniques and architectures continuously emerge, leading to increasingly realistic and higher-quality deepfakes. This progress likely impacts previous assumptions about deepfakes, particularly the notion that they do not contain HR-related signals. As deepfake generation methods improve, it becomes necessary to reassess these conclusions in light of more sophisticated and physiologically accurate manipulations.</p>
<p>To generate our own set of high-quality deepfakes, we employed a dual-decoder autoencoder architecture along with an advanced blending procedure, as described in Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>). Unlike a standard autoencoder with a single decoder to reconstruct the input image, this model utilizes two decoders. Each decoder is trained to reconstruct the input image but with the identity of a specific person respectively, the source and target person. During training, the autoencoder is fed with pairs of images of the source and target person. Once trained, the model can be used to swap faces in images and, accordingly, in videos. The advanced blending procedure enhances quality of the deepfakes by modifying the mask used for blending. Specifically, the mask is adjusted to create a greater distance between the edges of the face and the boundaries of the mask by &#x0201C;squeezing&#x0201D; it by approximately 15 pixels on each side. This adjustment excludes non-facial regions from the blending process, thereby reducing blending artifacts at the boundaries and improving the overall realism of the generated deepfakes.</p>
<p>Following data collection, we created various identity pairs and trained a separate deepfake autoencoder for each pair. Using these autoencoders, we performed face swaps between all videos for each identity pair, generating a total of 858 identity-specific deepfake videos and 156 unaltered counterparts. <xref ref-type="fig" rid="F3">Figure 3B</xref> shows examples of our deepfakes. For more details on the used method see Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>).</p>
<p>In addition to these deep fakes, we generated additional ones using the open-source tool DeepFaceLive (DFL) (Petrov, <xref ref-type="bibr" rid="B41">2023</xref>). This tool was developed for real-time face swapping. It requires a driver video and swaps the face in the video with that of a target face model, while maintaining the driver&#x00027;s expression and head pose. A set of target face models is provided by the tool. We used four of these provided face models to generate 32 deep fake videos. These videos are used in our experiments to show that the rPPG signal of a deep fake is similar to that of its driver video and also to our fakes generated using the same driver video.</p></sec>
<sec>
<title>3.5 External data</title>
<p>In addition to our own dataset, which includes videos with ECG data and corresponding deepfakes, we also utilized publicly available datasets to enhance the scope of our analysis. First, we used the deepfakes generated in Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>) based on the &#x0201C;actors&#x0201D; subset of the deepfake detection dataset (Dufour et al., <xref ref-type="bibr" rid="B18">2019</xref>), its corresponding originals as well as the fakes from that dataset based on the same originals.</p>
<p>Recognizing that many existing deepfake datasets may have limitations in terms of size and diversity, we selected the KoDF dataset (Kwon et al., <xref ref-type="bibr" rid="B33">2021</xref>), which is designed generalize more effectively to real-world deepfakes compared to other public datasets like FF&#x0002B;&#x0002B; (R&#x000F6;ssler et al., <xref ref-type="bibr" rid="B45">2019</xref>) or Celeb-DF (Dang-Nguyen et al., <xref ref-type="bibr" rid="B11">2020</xref>). KoDF contains 403 Korean subjects and a few ten-thousands of real and fake videos. In addition, KoDF includes six synthesis models for deepfake creation, which brings a large diversity of fakes to the set; in our study, we utilized four of these six methods due to fake quality.</p>
<p>Finally, we selected 45 videos from the KoDF dataset and generated an additional 45 Deepfake videos using the Picsi.Ai platform<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref>, leveraging its available synthesis methods.</p></sec></sec>
<sec sec-type="results" id="s4">
<title>4 Results</title>
<sec>
<title>4.1 Signal analysis</title>
<p>In the initial phase of our analysis, we focused on our own dataset, where we successfully extracted meaningful heart rate (HR) signals from both genuine and deepfake videos. In all cases, the detected HR corresponded to the face of the subject in the video, regardless of whether the video was real or a deepfake.</p>
<p>The average signal-to-noise ratio (SNR) of the extracted HR was significantly higher in the original videos compared to the deepfake videos with values of -1.97 dB for genuine and -3.35 dB for deepfakes. This difference in SNR highlights the lower quality of rPPG signals in deepfakes, likely due to artifacts introduced during the generation process. As all participants were seated during the recordings and made only slight movements, it is reasonable to assume that the resting heart rate (normally between 60 bpm to 90 bpm) was detected for all participants. A higher HR was only measured for two participants, but this was consistent across all recordings and verified by the ECG measurements, suggesting the reliability of our extraction process. The results of our analysis on four videos, two fake and two genuine, are exemplary depicted in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>This illustration presents two pairs of genuine and fake videos. On the <bold>left</bold> of each example, frames from each video sequence are displayed. On the <bold>right</bold>, the extracted reference rPPG signal is plotted for each paired fake and original video. Additionally, the measured heart rate of the person recorded is displayed.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0004.tif"/>
</fig>
<p>For the videos with a captured PPG reference signal and deepfakes based on these videos, we further analysed the rPPG in time domain. We calculated the Pearson correlation coefficients for the PPG and rPPG signals for the genuine videos. Since no ground truth PPG signal can be measured for the deepfakes, the signal from the underlying driver video is used instead. In addition to the correlation, we calculated the Mean Squared Error (MSE) for the rPPG signals, using the PPG signals as ground truth. Before calculating the MSE, the mean and variance of both signals were normalized to zero and one, respectively. The results are shown in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Correlation and deviation of rPPG to PPG signal as well as the absolute difference of the heart rate (HR) between detected and ground truth. The rPPG signal of the deep fake videos generated with DeepFaceLive (DFL) shows a similarly strong correlation to the measured PPG signal as the rPPG signal for the genuine videos. The correlation for the rPPG signal of the deep fake videos generated with the methods of (Beckmann et al., <xref ref-type="bibr" rid="B1">2023</xref>) is slightly weaker but still moderate. The MSE is in a similar range for all types of videos.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0005.tif"/>
</fig>
<p>For all types of videos, there is a moderate to strong correlation in most samples. The correlation between the PPG and rPPG signals for the genuine videos and the DeepFaceLive (DFL) fakes shows a similar distribution, while the correlation for deepfakes generated with the method of Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>) is slightly lower. These high correlations between the rPPG signals of the deepfakes and the ground truth PPG signal of the driver videos show that these fakes replicate the rPPG signal of the driver videos. This point is further supported by the HR gained from the rPPG signal. The absolute difference to the ground truth across all videos and time periods is on average 1.80 bpm, 1.85 bpm and 3.24 bmp for the genuine videos, the Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>) and DFL fakes, respectively. It should be noted that rPPG signal extraction from videos includes, alongside the PPG-related signal, additional components induced by body motion and other noise sources, and thus cannot perfectly reflect a true PPG signal.</p>
<p>In addition to comparisons with ground truth PPG signals, we calculated the Pearson correlation coefficients between these deepfakes and their underlying driver videos. The results are shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. While the correlation for videos generated with DFL is strong in most cases, the correlation for those generated with the method of Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>) is moderate for most videos. This provides further evidence that deepfakes mimic the rPPG signal of the driver video.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Correlation and deviation of rPPG signals of deepfakes to their underlying driver video&#x00027;s rPPG signal. The rPPG signals of the deepfakes generated with DeepFaceLive (DFL) show a strong correlation to the rPPG signal of the underlying driver videos in most cases, while those for the deepfakes generated with the method of (Beckmann et al., <xref ref-type="bibr" rid="B1">2023</xref>) are weaker but, on average, still moderate. The DFL deepfakes outperform also in terms of MSE those of (Beckmann et al., <xref ref-type="bibr" rid="B1">2023</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0006.tif"/>
</fig>
<p>Building on these results, we further analyzed the generated deepfakes to investigate the origin of their rPPG signals. In the majority of cases, the rPPG signals in the deepfakes closely mirrored those of the original source videos, with only minor variations observed. When comparing the HR measured in the genuine videos with those from their deepfake counterparts, we found that the global HR in the deepfake videos was remarkably similar to the HR of the original source recordings, as well as to the measured ECG ground truth, see examples in <xref ref-type="fig" rid="F7">Figure 7</xref>. For all fakes in our dataset, we found a high correlation to the HR of the original driving video, see <xref ref-type="fig" rid="F8">Figure 8</xref>. The average correlation between the HR of the fakes created by using the method of Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>) is <inline-formula><mml:math id="M1"><mml:mover accent="true"><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mo>&#x00304;</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>57</mml:mn></mml:math></inline-formula> (median <italic>r</italic> &#x0003D; 0.55) and for the fakes generated with DFL <inline-formula><mml:math id="M2"><mml:mover accent="true"><mml:mrow><mml:mi>r</mml:mi></mml:mrow><mml:mo>&#x00304;</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>82</mml:mn></mml:math></inline-formula> (median <italic>r</italic> &#x0003D; 0.89). For the other fakes (KoDF dataset, both methods on actor subset), the correlation is above <italic>r</italic>&#x0003E;0.4. However, for the public available FF&#x0002B;&#x0002B; fakes, the deviation is remarkably high (min <italic>r</italic> &#x0003D; &#x02212;0.23 to max <italic>r</italic> &#x0003D; 0.91). These findings confirm that the heart rate signals in high-quality deepfakes are often inherited from the source video, further complicating the task of distinguishing between real and fake content based solely on global HR analysis.</p>

<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Heart rates of different videos. In each plot the extracted HR of the original video (red), the recorded ECG signal (yellow) and a created high-quality deepfake (blue) using the original video as source video is shown. <bold>(A)</bold> capture ID 04 0100. <bold>(B)</bold> capture ID 04 0101. <bold>(C)</bold> capture ID 011 1100.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0007.tif"/>
</fig>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Correlation and MSE of Heart Rates (HR) of the deepfakes to their underlying driver video&#x00027;s HR. The correlation of the HRs of the deepfakes generated with DeepFaceLive show a strong correlation, while those for the deepfakes generated with the method of Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>) are moderate.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0008.tif"/>
</fig>
<p>The FFT maps visualize that the rPPG signal, which can be traced back to physiological properties, clearly originates from the source video. <xref ref-type="fig" rid="F9">Figures 9</xref>, <xref ref-type="fig" rid="F10">10</xref> show two examples with a set of six FFT maps, the background, face and subtraction FFT maps of an original video and an deepfake, where the original served as source. The extracted HRs for both examples can be found in <xref ref-type="fig" rid="F7">Figure 7</xref>.</p>

<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>FFT maps of capturing <italic>ID_04_0100</italic>. A similar HR can be detected in both cases, original source <bold>(A&#x02013;C)</bold> and deepfake <bold>(D&#x02013;F)</bold> video with about 59 bpm. The correlations between the original and deepfake FFT maps shows a strong relationship for all three map pairs: 0.96 for background, 0.77 for face, and 0.78 for subtraction map.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0009.tif"/>
</fig>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>FFT maps of capturing <italic>ID_11_1100</italic>. A similar HR can be detected in both cases, original source <bold>(A&#x02013;C)</bold> and deepfake <bold>(D&#x02013;F)</bold> video with about 71 bpm. The correlations between the original and deepfake FFT maps shows a moderate to strong relationship for all three map pairs: 0.50 for background (moderate relationship), 0.91 for face (strong), and 0.89 for subtraction map (strong).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0010.tif"/>
</fig>
<p>Example <italic>ID_04_0100</italic> demonstrates the influence of our proposed background analysis on signal detection. In this instance, a strong noise signal around 150 bpm is detectable in the background. Due to the nature of deepfake generation, this noise signal is also present in all fakes where that capture served as source, resulting in a high correlation between the FFT maps of original and deepfakes (with a correlation of 0.96). In the original face video, the physiological signal (at about 59 bpm) is twice as strong as the background noise signal (at 150 bpm), making it easy to extract. However, in the deepfake face, the HR and noise signals are of comparable magnitudes, complicating clear pulse extraction. This issue is resolved by incorporating background analysis, as shown in <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
<p>The correlation between the original and deepfake FFT maps increases slightly, 0.7667 for the face FFT maps to 0.7826 for the subtraction maps, further emphasizing that the rPPG signal in the deepfake originates from the source video. This strong relationship between the original and deepfake signals extends to cases where the background signals in the deepfakes differ more significantly from the originals, reinforcing the notion that deepfakes inherit their rPPG signals from the driver video (cf. <xref ref-type="fig" rid="F10">Figure 10</xref>).</p>
<p>Both examples clearly demonstrate that, in the analysis of the face region in deepfakes, the background signal (induced by noise, compression, etc.) plays a significantly stronger role, as the transferred HR signal is reproduced with less intensity compared to the original video. This is also reflected in the corresponding SNRs (see above). Due to the weaker transmission and artificial replication of the pulse signal, a strong correlation between the original and deepfake signal is not always observed, see <xref ref-type="fig" rid="F11">Figure 11</xref> as example for a moderate relationship between original source and deepfake subtraction maps with a correlation of 0.53. However, upon closer examination, a trace of the original video&#x00027;s HR signal can still be detected in the faked face. This &#x02018;signal trace&#x00027; underscores that, despite noise and degradation, elements of the physiological signal from the source video remain present in the deepfake.</p>
<fig id="F11" position="float">
<label>Figure 11</label>
<caption><p>FFT subtraction maps of <bold>(A)</bold> capturing ID_004_0110 and <bold>(B)</bold> a related deepfake. In the original FFT map, a HR can be identified clearly at around 65 bpm. The FFT subtraction map of the deepfake is more noisy but the HR of the underlying original is detectable as well. The correlation between both maps is moderate with 0.53. <bold>(A)</bold> Original. <bold>(B)</bold> Deepfake.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0011.tif"/>
</fig></sec>
<sec>
<title>4.2 Analysis on external data</title>
<p>Given the limited size of our dataset, we extended of HR analysis to the KoDF and FF&#x0002B;&#x0002B; dataset. Despite varying compression rates and relatively high image noise, we were able to consistently extract HR signals from all genuine videos. Although some deepfake videos presented challenges due to noise and compression artifacts, we were still able to extract signals in most cases that could be associated with HR (cf. <xref ref-type="fig" rid="F8">Figure 8</xref>). However, as the datasets do not include the participants&#x00027; actual HR data, we were unable to validate these extracted HR signals against ground truth measurements.</p>
<p>A closer look to quality parameter (<xref ref-type="table" rid="T1">Table 1</xref>) shows a extremely low signal-to-noise ratio (SNR) of the extracted HRs for the external datasets, especially for FF&#x0002B;&#x0002B;, while the deviation of the HR over time is high although all videos involve individuals who are at rest and should therefore have a stable pulse. This indicates that for a certain amount of videos, the detected HR is not plausible, i.e., not related to the real HR, although we have selected the videos with best signal quality and analyzed the corresponding deepfakes. It is important to note, that it is not possible for the KoDF dataset to identify the exact driver video used for each fake. Therefore, we only looked at whether it is possible to identify a physiologically meaningful HR in both the original and the fake videos. Here, similar results as with our dataset could be achieved (cf. <xref ref-type="fig" rid="F12">Figure 12</xref>). For the deepfakes, a signal which can be related to a HR is in most cases detectable. For further examples of FFT maps of deepfakes from the KoDF dataset are shown in Appendix.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Signal-to-noise ratio (SNR) and standard deviation (STD) of the extracted HR signal for all included data.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>)</bold></th>
<th valign="top" align="center"><bold>DFL</bold></th>
<th valign="top" align="center"><bold>KoDF</bold></th>
<th valign="top" align="center"><bold>Actor our fakes</bold></th>
<th valign="top" align="center"><bold>Actor FF&#x0002B;&#x0002B; fakes</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SNR originals [dB]</td>
<td valign="top" align="center">5.595</td>
<td valign="top" align="center">8.947</td>
<td valign="top" align="center">4.470</td>
<td valign="top" align="center">2.951</td>
<td valign="top" align="center">1.852</td>
</tr> <tr>
<td valign="top" align="left">SNR fakes [dB]</td>
<td valign="top" align="center">3.566</td>
<td valign="top" align="center">7.838</td>
<td valign="top" align="center">3.027</td>
<td valign="top" align="center">2.730</td>
<td valign="top" align="center">2.345</td>
</tr> <tr>
<td valign="top" align="left">STD originals [bpm]</td>
<td valign="top" align="center">5.033</td>
<td valign="top" align="center">2.779</td>
<td valign="top" align="center">7.892</td>
<td valign="top" align="center">6.929</td>
<td valign="top" align="center">8.291</td>
</tr> <tr>
<td valign="top" align="left">STD fakes [bpm]</td>
<td valign="top" align="center">7.300</td>
<td valign="top" align="center">2.245</td>
<td valign="top" align="center">8.609</td>
<td valign="top" align="center">8.931</td>
<td valign="top" align="center">8.833</td>
</tr></tbody>
</table>
</table-wrap>
<fig id="F12" position="float">
<label>Figure 12</label>
<caption><p>FFT maps of a deepfake taken from the KoDF dataset. <bold>(A)</bold> background, <bold>(B)</bold> face, <bold>(C)</bold> subtraction. The extracted HR is about 69 bpm.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fimag-04-1504551-g0012.tif"/>
</fig>
</sec></sec>
<sec sec-type="discussion" id="s5">
<title>5 Discussion</title>
<p>As discussed in Section 3.5, numerous datasets have been developed to support deepfake research, such as the DeepFake Detection Challenge Dataset (Dolhansky et al., <xref ref-type="bibr" rid="B16">2020</xref>), FF&#x0002B;&#x0002B; (R&#x000F6;ssler et al., <xref ref-type="bibr" rid="B45">2019</xref>), and Celeb-DF (Dang-Nguyen et al., <xref ref-type="bibr" rid="B11">2020</xref>). These datasets have significantly advanced deepfake detection techniques. However, few authors have explored deepfake detection through the analysis of physiologically related signals, such as rPPG. Despite their importance, public datasets present several challenges when used for analyzing rPPG signals in the context of deepfake detection as rPPG is sensitive to video quality.</p>
<p>Many deepfake datasets suffer from compression artifacts, low resolution, inconsistent frame rates, high background noise, and challenging illumination settings (D&#x00027;Amelio et al., <xref ref-type="bibr" rid="B10">2023</xref>; Kwon et al., <xref ref-type="bibr" rid="B33">2021</xref>). These factors can substantially degrade the quality of rPPG signals, making it difficult to reliably extract physiological features (Wang et al., <xref ref-type="bibr" rid="B54">2024</xref>; Zaunseder et al., <xref ref-type="bibr" rid="B67">2018</xref>; McDuff et al., <xref ref-type="bibr" rid="B38">2017</xref>). Consequently, the utility of rPPG analysis in deepfake detection has been limited, particularly in datasets where video quality is compromised.</p>
<p>Previous studies (&#x000C7;ift&#x000E7;i et al., <xref ref-type="bibr" rid="B6">2024</xref>; Qi et al., <xref ref-type="bibr" rid="B43">2020</xref>; Ciftci et al., <xref ref-type="bibr" rid="B8">2020b</xref>,<xref ref-type="bibr" rid="B7">a</xref>; Hernandez-Ortega et al., <xref ref-type="bibr" rid="B23">2020</xref>) concluded that deepfakes do not exhibit a detectable heartbeat (Boccignone et al., <xref ref-type="bibr" rid="B3">2022</xref>), suggesting that this could be used as a reliable marker for deepfake detection. However, much of this research was conducted on datasets of low image quality. In contrast, our study reveals that for recent and high-quality deepfakes, such as those generated using the method described in Beckmann et al. (<xref ref-type="bibr" rid="B1">2023</xref>), DeepFaceLive or present in the KoDF dataset, it is possible to robustly detect a HR signal that originates from the source (driver) video. Our experiments demonstrated that deepfakes can exhibit realistic heart rates, contradicting previous findings. Specifically, in all fake videos from our dataset and most videos from the KoDF dataset, valid HR signals were successfully extracted. This indicates that solely relying on the analysis of global HR signals is no longer sufficient to detect deepfakes.</p>
<p>Another significant challenge in existing deepfake datasets is the lack of reference measurements, such as concurrent ECG or PPG sensor readings, which are crucial for validating the accuracy of extracted rPPG signals. Without these ground truth data, it becomes difficult to assess the reliability of physiological signal extraction and, consequently, the conclusions drawn from them.</p>
<p>To improve the utility of physiological signals for deepfake detection, we propose shifting from global HR analysis to locally resolved signals within the face. Recent advances in video-based vital sign analysis have moved toward capturing local pulse signals from specific facial regions (Kossack et al., <xref ref-type="bibr" rid="B31">2019b</xref>, <xref ref-type="bibr" rid="B30">2021</xref>), which better reflect the anatomical blood flow patterns of the human face. By leveraging these localized physiological patterns, we aim to enhance both the robustness and interpretability of deepfake detection. Building on this idea, we performed initial experiments where we extracted rPPG-related feature maps from a subset of our dataset following the approach described in Schraven et al. (<xref ref-type="bibr" rid="B46">2023</xref>) and trained an EfficientNet-B4 model as a convolutional deepfake detector (Tan and Le, <xref ref-type="bibr" rid="B52">2019</xref>). Our preliminary results (AUROC score of 87.4%) show promising results that these local maps can be used for deepfake detection. Using the rPPG-based features improves interpretability by providing more understandable features, but the detector itself lacks transparency by design. To overcome this issue, we adapt in the future the concept proposed by Seibold et al. (<xref ref-type="bibr" rid="B49">2021</xref>), which leads to detectors that accurately determines which part of the input contributes to the prediction that an input is a forgery.</p></sec>
<sec sec-type="conclusions" id="s6">
<title>6 Conclusion</title>
<p>In conclusion, our study demonstrates that high-quality deepfakes exhibit rPPG signals that correspond to the HR of the source (driver) video. By comparing the different PPG signals and analyzing the FFT maps as well as the extracted HRs and its correlations, we confirmed that the globally derived rPPG signal originates from the driving video, rather than being artificially generated. This finding challenges previous assumptions that deepfakes inherently lack valid physiological signals, revealing the limitations of using simple HR analysis for detecting high-quality deepfakes.</p>
<p>One of the key contributions of our study is the demonstration that HR signals in deepfakes can closely match those of the source video, making traditional global HR-based detection methods insufficient for distinguishing between real and fake content. By performing our analysis not only on our own dataset but also on fakes created with DeepFaceLive and from the KoDF dataset, we confirmed the generalization of our findings, showing that even older deepfake datasets contain valid HR signals.</p>
<p>To address this limitation, we propose leveraging local blood flow information for deepfake detection. Preliminary experiments indicate that this localized analysis holds significant promise for improving detection accuracy. As part of ongoing work, we are further refining this approach, which also offers the added benefit of enhanced explainability. Visualizing local blood flow patterns could provide clearer insight into the decision-making process of detection algorithms. Another important factor in ensuring robust detection is the availability of good and diverse training data. An attacker may attempt to mimic blood flow patterns to evade detection; therefore, we plan to enhance our deepfake dataset using style-transfer with a temporal component by extending the work on improved image forgeries of Seibold et al. (<xref ref-type="bibr" rid="B48">2019</xref>).</p>
<p>In summary, our contributions include: (1) providing evidence that deepfakes can exhibit realistic heart rate signals, (2) highlighting the insufficiency of global HR analysis for detecting high-quality deepfakes, and (3) proposing the use of localized rPPG signals to enhance both the robustness and explainability of deepfake detection. Our approach could serve as a valuable complement to existing techniques, with the potential to improve the security and integrity of multimedia content across platforms.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The datasets presented in this article are not readily available because no commercial use. Requests to access the datasets should be directed to Peter Eisert, <email>peter.eisert&#x00040;hhi.fraunhofer.de</email>.</p>
</sec>
<sec sec-type="ethics-statement" id="s8">
<title>Ethics statement</title>
<p>Written informed consent was obtained from the individual(s) for the publication of any potentially identifiable images or data included in this article.</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>CS: Conceptualization, Formal analysis, Validation, Investigation, Writing &#x02013; original draft. EW: Conceptualization, Formal analysis, Validation, Investigation, Writing &#x02013; original draft. AB: Methodology, Investigation. BK: Investigation, Writing &#x02013; original draft. AH: Supervision, Writing &#x02013; review &#x00026; editing. PE: Funding acquisition, Supervision, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="funding-information" id="s10">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was funded by the German Federal Ministry of Education and Research (BMBF) under Grant No. 13N15735 (FakeID) and by Horizon Europe under Grant No. 101121280 (Einstein).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s11">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p></sec><sec sec-type="disclaimer" id="s12">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn0001"><p><sup>1</sup>ace acA2440-75uc, Basler AG, Germany.</p></fn>
<fn id="fn0002"><p><sup>2</sup><ext-link ext-link-type="uri" xlink:href="https://www.picsi.ai">https://www.picsi.ai</ext-link></p></fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Beckmann</surname> <given-names>A.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Fooling state-of-the-art deepfake detection with high-quality deepfakes,&#x0201D;</article-title> in <source>Proceedings of the 2023 ACM Workshop on Information Hiding and Multimedia Security, IH&#x00026;MMSec &#x00027;23</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>175</fpage>&#x02013;<lpage>180</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bobbia</surname> <given-names>S.</given-names></name> <name><surname>Macwan</surname> <given-names>R.</given-names></name> <name><surname>Benezeth</surname> <given-names>Y.</given-names></name> <name><surname>Mansouri</surname> <given-names>A.</given-names></name> <name><surname>Dubois</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Unsupervised skin tissue segmentation for remote photoplethysmography</article-title>. <source>Pattern Recognit. Lett</source>. <volume>124</volume>, <fpage>82</fpage>&#x02013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1016/j.patrec.2017.10.017</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boccignone</surname> <given-names>G.</given-names></name> <name><surname>Bursic</surname> <given-names>S.</given-names></name> <name><surname>Cuculo</surname> <given-names>V.</given-names></name> <name><surname>D&#x00027;Amelio</surname> <given-names>A.</given-names></name> <name><surname>Grossi</surname> <given-names>G.</given-names></name> <name><surname>Lanzarotti</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;Deepfakes have no heart: A simple rppg-based method to reveal fake videos,&#x0201D;</article-title> in <source>Image Analysis and Processing</source> - <italic>ICIAP 2022: 21st International Conference</italic>(Berlin, Heidelberg: Springer-Verlag), <fpage>186</fpage>&#x02013;<lpage>195</lpage>.</citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>M.</given-names></name> <name><surname>Liao</surname> <given-names>X.</given-names></name> <name><surname>Wu</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Pulseedit: Editing physiological signals in facial videos for privacy protection</article-title>. <source>IEEE Trans. Inform. Forens. Secur</source>. <volume>17</volume>, <fpage>457</fpage>&#x02013;<lpage>471</lpage>. <pub-id pub-id-type="doi">10.1109/TIFS.2022.3142993</pub-id></citation>
</ref>
<ref id="B5">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>W.</given-names></name> <name><surname>McDuff</surname> <given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Deepphys: Video-based physiological measurement using convolutional attention networks,&#x0201D;</article-title> in <source>Proceedings of the European Conference on Computer Vision (ECCV</source>) (The Computer Vision Foundation (CVF)).</citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>&#x000C7;ift&#x000E7;i</surname> <given-names>U. A.</given-names></name> <name><surname>Demir</surname> <given-names>&#x00130;.</given-names></name> <name><surname>Yin</surname> <given-names>L.</given-names></name></person-group> (<year>2024</year>). <article-title>Deepfake source detection in a heart beat</article-title>. <source>Vis. Comput</source>. <volume>40</volume>, <fpage>2733</fpage>&#x02013;<lpage>2750</lpage>. <pub-id pub-id-type="doi">10.1007/s00371-023-02981-0</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ciftci</surname> <given-names>U. A.</given-names></name> <name><surname>Demir</surname> <given-names>I.</given-names></name> <name><surname>Yin</surname> <given-names>L.</given-names></name></person-group> (<year>2020a</year>). <article-title>&#x0201C;Fakecatcher: Detection of synthetic portrait videos using biological signals,&#x0201D;</article-title> in <source>IEEE Transactions on Pattern Analysis and Machine Intelligence</source> (<publisher-loc>IEEE</publisher-loc>).<pub-id pub-id-type="pmid">32750816</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ciftci</surname> <given-names>U. A.</given-names></name> <name><surname>Demir</surname> <given-names>I.</given-names></name> <name><surname>Yin</surname> <given-names>L.</given-names></name></person-group> (<year>2020b</year>). <article-title>&#x0201C;How do the hearts of deep fakes beat? Deep fake source detection via interpreting residuals with biological signals,&#x0201D;</article-title> in <source>2020 IEEE International Joint Conference on Biometrics (IJCB)</source> (<publisher-loc>Houston, TX</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>10</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ciftci</surname> <given-names>U. A.</given-names></name> <name><surname>Yin</surname> <given-names>L.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Heart rate based face synthesis for pulse estimation,&#x0201D;</article-title> in <source>Advances in Visual Computing: 14th International Symposium on Visual Computing, ISVC 2019</source> (<publisher-loc>Lake Tahoe</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>540</fpage>&#x02013;<lpage>551</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>D&#x00027;Amelio</surname> <given-names>A.</given-names></name> <name><surname>Lanzarotti</surname> <given-names>R.</given-names></name> <name><surname>Patania</surname> <given-names>S.</given-names></name> <name><surname>Grossi</surname> <given-names>G.</given-names></name> <name><surname>Cuculo</surname> <given-names>V.</given-names></name> <name><surname>Valota</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>&#x0201C;On using rppg signals for deepfake detection: a cautionary note,&#x0201D;</article-title> in <source>International Conference on Image Analysis and Processing</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>235</fpage>&#x02013;<lpage>246</lpage>.</citation>
</ref>
<ref id="B11">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Dang-Nguyen</surname> <given-names>D.-T.</given-names></name> <name><surname>Dang-Nguyen</surname> <given-names>D.-S.</given-names></name> <name><surname>Piras</surname> <given-names>L.</given-names></name> <name><surname>Giacinto</surname> <given-names>G.</given-names></name> <name><surname>Boato</surname> <given-names>G.</given-names></name></person-group> (<year>2020</year>). <source>CelebDF: A Large-Scale Challenging Dataset for Deepfake Forensics</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://github.com/yuezunli/celeb-deepfakeforensics">https://github.com/yuezunli/celeb-deepfakeforensics</ext-link> (accessed March 7, 2025).</citation>
</ref>
<ref id="B12">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>de Berg</surname> <given-names>M.</given-names></name> <name><surname>Cheong</surname> <given-names>O.</given-names></name> <name><surname>van Kreveld</surname> <given-names>M.</given-names></name> <name><surname>Overmars</surname> <given-names>M.</given-names></name></person-group> (<year>2008</year>). <source>Computational Geometry: Algorithms and Applications</source>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer Science &#x00026; Business Media</publisher-name>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>De Haan</surname> <given-names>G.</given-names></name> <name><surname>Jeanne</surname> <given-names>V.</given-names></name></person-group> (<year>2013</year>). <article-title>Robust pulse rate from chrominance-based rPPG</article-title>. <source>IEEE Trans. Biomed. Eng</source>. <volume>60</volume>, <fpage>2878</fpage>&#x02013;<lpage>2886</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2013.2266196</pub-id><pub-id pub-id-type="pmid">23744659</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="book"><person-group person-group-type="author"><collab>DeepFakes</collab></person-group> (<year>2019</year>). <source>Faceswap</source>.</citation>
</ref>
<ref id="B15">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ding</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Xia</surname> <given-names>Z.</given-names></name> <name><surname>Jebe</surname> <given-names>L.</given-names></name> <name><surname>Tu</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Diffusionrig: Learning personalized priors for facial appearance editing,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>IEEE</publisher-loc>).</citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dolhansky</surname> <given-names>B.</given-names></name> <name><surname>Bitton</surname> <given-names>J.</given-names></name> <name><surname>Pflaum</surname> <given-names>B.</given-names></name> <name><surname>Lu</surname> <given-names>J.</given-names></name> <name><surname>Howes</surname> <given-names>R.</given-names></name> <name><surname>Wang</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>The deepfake detection challenge (DFDC) dataset</article-title>. <source>arXiv</source> [preprint] arXiv:2006.07397. <pub-id pub-id-type="doi">10.48550/arXiv.2006.07397</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Drobyshev</surname> <given-names>N.</given-names></name> <name><surname>Chelishev</surname> <given-names>J.</given-names></name> <name><surname>Khakhulin</surname> <given-names>T.</given-names></name> <name><surname>Ivakhnenko</surname> <given-names>A.</given-names></name> <name><surname>Lempitsky</surname> <given-names>V.</given-names></name> <name><surname>Zakharov</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Megaportraits: One-shot megapixel neural head avatars,&#x0201D;</article-title> in <source>Proc. of the 30th ACM International Conference on Multimedia</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>).</citation>
</ref>
<ref id="B18">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Dufour</surname> <given-names>N.</given-names></name> <name><surname>Gully</surname> <given-names>A.</given-names></name> <name><surname>Karlsson</surname> <given-names>P.</given-names></name> <name><surname>Vorbyov</surname> <given-names>A.</given-names></name> <name><surname>Leung</surname> <given-names>T.</given-names></name> <name><surname>Childs</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2019</year>). <source>Deepfakes Detection Dataset</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Google and Jigsaw</publisher-name>.</citation>
</ref>
<ref id="B19">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Fernandes</surname> <given-names>S.</given-names></name> <name><surname>Raj</surname> <given-names>S.</given-names></name> <name><surname>Ortiz</surname> <given-names>E.</given-names></name> <name><surname>Vintila</surname> <given-names>I.</given-names></name> <name><surname>Salter</surname> <given-names>M.</given-names></name> <name><surname>Urosevic</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>&#x0201C;Predicting heart rate variations of deepfake videos using neural ode,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF International Conference on Computer Vision Workshops</source> (<publisher-loc>Seoul</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B20">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Goodfellow</surname> <given-names>I.</given-names></name> <name><surname>Pouget-Abadie</surname> <given-names>J.</given-names></name> <name><surname>Mirza</surname> <given-names>M.</given-names></name> <name><surname>Xu</surname> <given-names>B.</given-names></name> <name><surname>Warde-Farley</surname> <given-names>D.</given-names></name> <name><surname>Ozair</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>&#x0201C;Generative adversarial nets,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, eds. Z. Ghahramani, M. Welling, C. Cortes, N. Lawrence, and K. Weinberger (New York: Curran Associates, Inc).</citation>
</ref>
<ref id="B21">
<citation citation-type="web"><person-group person-group-type="author"><collab>Google</collab></person-group> (<year>2022</year>). <source>MediaPipe: A Framework for Building Multimodal Applied Machine Learning Pipelines</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://mediapipe.dev/">https://mediapipe.dev/</ext-link> (accessed: March 14, 2024).</citation>
</ref>
<ref id="B22">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Haliassos</surname> <given-names>A.</given-names></name> <name><surname>Mira</surname> <given-names>R.</given-names></name> <name><surname>Petridis</surname> <given-names>S.</given-names></name> <name><surname>Pantic</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Leveraging real talking faces via self-supervision for robust forgery detection,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>New Orleans, LA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>14950</fpage>&#x02013;<lpage>14962</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hernandez-Ortega</surname> <given-names>J.</given-names></name> <name><surname>Tolosana</surname> <given-names>R.</given-names></name> <name><surname>Fierrez</surname> <given-names>J.</given-names></name> <name><surname>Morales</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Deepfakeson-phys: Deepfakes detection based on heart rate estimation</article-title>. <source>arXiv</source> [preprint] arXiv:2010.00400. <pub-id pub-id-type="doi">10.48550/arXiv.2010.00400</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ho</surname> <given-names>J.</given-names></name> <name><surname>Jain</surname> <given-names>A.</given-names></name> <name><surname>Abbeel</surname> <given-names>P.</given-names></name></person-group> (<year>2020</year>). <article-title>Denoising diffusion probabilistic models</article-title>. <source>arXiv</source> [preprint] arxiv:2006.11239. <pub-id pub-id-type="doi">10.48550/arXiv.2006.11239</pub-id></citation>
</ref>
<ref id="B25">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Z.</given-names></name> <name><surname>Chan</surname> <given-names>K. C.</given-names></name> <name><surname>Jiang</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Collaborative diffusion for multi-modal face generation and editing,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>Vancouver, BC</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B26">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Karras</surname> <given-names>T.</given-names></name> <name><surname>Laine</surname> <given-names>S.</given-names></name> <name><surname>Aittala</surname> <given-names>M.</given-names></name> <name><surname>Hellsten</surname> <given-names>J.</given-names></name> <name><surname>Lehtinen</surname> <given-names>J.</given-names></name> <name><surname>Aila</surname> <given-names>T.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Analyzing and improving the image quality of StyleGAN,&#x0201D;</article-title> in <source>Proceedings of CVPR</source> (<publisher-loc>IEEE/CVF</publisher-loc>).</citation>
</ref>
<ref id="B27">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>D. P.</given-names></name> <name><surname>Welling</surname> <given-names>M.</given-names></name></person-group> (<year>2014</year>). <article-title>&#x0201C;Auto-encoding variational bayes,&#x0201D;</article-title> in <source>2nd International Conference on Learning Representations</source> (<publisher-loc>Banff, AB</publisher-loc>: <publisher-name>Conference Track Proceedings</publisher-name>).<pub-id pub-id-type="pmid">32176273</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kossack</surname> <given-names>B.</given-names></name> <name><surname>Wisotzky</surname> <given-names>E.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name> <name><surname>Schraven</surname> <given-names>S. P.</given-names></name> <name><surname>Globke</surname> <given-names>B.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Perfusion assessment via local remote photoplethysmography (RPPG),&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>New Orleans, LA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>2192</fpage>&#x02013;<lpage>2201</lpage>.</citation>
</ref>
<ref id="B29">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kossack</surname> <given-names>B.</given-names></name> <name><surname>Wisotzky</surname> <given-names>E. L.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name></person-group> (<year>2019a</year>). <article-title>&#x0201C;Local remote photoplethysmography signal analysis for application in presentation attack detection,&#x0201D;</article-title> in <source>Vision, Modeling and Visualization-VMV</source> (<publisher-loc>London</publisher-loc>: <publisher-name>The Eurographics Association</publisher-name>), <fpage>135</fpage>&#x02013;<lpage>142</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kossack</surname> <given-names>B.</given-names></name> <name><surname>Wisotzky</surname> <given-names>E. L.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Automatic region-based heart rate measurement using remote photoplethysmography,&#x0201D;</article-title> in <source>2021 IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)</source> (<publisher-loc>Montreal, BC</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>2755</fpage>&#x02013;<lpage>2759</lpage>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kossack</surname> <given-names>B.</given-names></name> <name><surname>Wisotzky</surname> <given-names>E. L.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name> <name><surname>H&#x000E4;nsch</surname> <given-names>R.</given-names></name></person-group> (<year>2019b</year>). <article-title>Local blood flow analysis and visualization from RGB-video sequences</article-title>. <source>Curr. Direct. Biomed. Eng</source>. <volume>5</volume>:<fpage>1</fpage>. <pub-id pub-id-type="doi">10.1515/cdbme-2019-0094</pub-id></citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kossack</surname> <given-names>B.</given-names></name> <name><surname>Wisotzky</surname> <given-names>E. L.</given-names></name> <name><surname>Schraven</surname> <given-names>S.</given-names></name> <name><surname>Skopnik</surname> <given-names>L.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name></person-group> (<year>2023</year>). <article-title>Modified allen test assessment via imaging photoplethysmography</article-title>. <source>Curr. Direct. Biomed. Eng</source>. <volume>9</volume>, <fpage>571</fpage>&#x02013;<lpage>574</lpage>. <pub-id pub-id-type="doi">10.1515/cdbme-2023-1143</pub-id></citation>
</ref>
<ref id="B33">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kwon</surname> <given-names>P.</given-names></name> <name><surname>You</surname> <given-names>J.</given-names></name> <name><surname>Nam</surname> <given-names>G.</given-names></name> <name><surname>Park</surname> <given-names>S.</given-names></name> <name><surname>Chae</surname> <given-names>G.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Kodf: A large-scale korean deepfake detection dataset,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)</source> (<publisher-loc>Montreal, QC</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>10744</fpage>&#x02013;<lpage>10753</lpage>.</citation>
</ref>
<ref id="B34">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Bao</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>T.</given-names></name> <name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>D.</given-names></name> <name><surname>Wen</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;Face x-ray for more general face forgery detection,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</source> (<publisher-loc>Seattle, WA</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Komulainen</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>G.</given-names></name> <name><surname>Yuen</surname> <given-names>P. C.</given-names></name> <name><surname>Pietikainen</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Generalized face anti-spoofing by detecting pulse from face videos,&#x0201D;</article-title> in <source>Proceedings</source> - <italic>International Conference on Pattern Recognition</italic> (Cancun: IEEE), <fpage>4244</fpage>&#x02013;<lpage>4249</lpage>.</citation>
</ref>
<ref id="B36">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Chang</surname> <given-names>M.-C.</given-names></name> <name><surname>Lyu</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;In Ictu Oculi: Exposing ai generated fake face videos by detecting eye blinking,&#x0201D;</article-title> in <source>2018 IEEE International Workshop on Information Forensics and Security (WIFS)</source> (<publisher-loc>Hong Kong</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Lyu</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>Exposing deepfake videos by detecting face warping artifacts</article-title>. <source>arXiv</source> [preprint] arXiv:1811.00656. <pub-id pub-id-type="doi">10.48550/arXiv.1811.00656</pub-id></citation>
</ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>McDuff</surname> <given-names>D. J.</given-names></name> <name><surname>Blackford</surname> <given-names>E. B.</given-names></name> <name><surname>Estepp</surname> <given-names>J. R.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;The impact of video compression on remote cardiac pulse measurement using imaging photoplethysmography,&#x0201D;</article-title> in <source>2017 12th IEEE International Conference on Automatic Face</source> &#x00026; <italic>Gesture Recognition (FG 2017)</italic> (Washington, DC: IEEE), <fpage>63</fpage>&#x02013;<lpage>70</lpage>.</citation>
</ref>
<ref id="B39">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Nirkin</surname> <given-names>Y.</given-names></name> <name><surname>Keller</surname> <given-names>Y.</given-names></name> <name><surname>Hassner</surname> <given-names>T.</given-names></name></person-group> (<year>2022</year>). <source>Fsganv2: Improved Subject Agnostic Face Swapping and Reenactment</source> (<publisher-loc>IEEE</publisher-loc>).<pub-id pub-id-type="pmid">35471874</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Perov</surname> <given-names>I.</given-names></name> <name><surname>Gao</surname> <given-names>D.</given-names></name> <name><surname>Chervoniy</surname> <given-names>N.</given-names></name> <name><surname>Liu</surname> <given-names>K.</given-names></name> <name><surname>Marangonda</surname> <given-names>S.</given-names></name> <name><surname>Um&#x000E9;</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Deepfacelab: A simple, flexible and extensible face swapping framework</article-title>. <source>arXiv</source> [preprint] arXiv:2005.05535. <pub-id pub-id-type="doi">10.48550/arXiv.2005.05535</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Petrov</surname> <given-names>I.</given-names></name></person-group> (<year>2023</year>). <source>DeepFaceLive: Real-Time Face Swap for PC Streaming or Video Calls</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://github.com/iperov/DeepFaceLive">https://github.com/iperov/DeepFaceLive</ext-link> (accessed: October 25, 2024).</citation>
</ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Poh</surname> <given-names>M.-Z.</given-names></name> <name><surname>McDuff</surname> <given-names>D. J.</given-names></name> <name><surname>Picard</surname> <given-names>R. W.</given-names></name></person-group> (<year>2010</year>). <article-title>Non-contact, automated cardiac pulse measurements using video imaging and blind source separation</article-title>. <source>Opt. Express</source> <volume>18</volume>:<fpage>10762</fpage>. <pub-id pub-id-type="doi">10.1364/OE.18.010762</pub-id><pub-id pub-id-type="pmid">20588929</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Qi</surname> <given-names>H.</given-names></name> <name><surname>Guo</surname> <given-names>Q.</given-names></name> <name><surname>Juefei-Xu</surname> <given-names>F.</given-names></name> <name><surname>Xie</surname> <given-names>X.</given-names></name> <name><surname>Ma</surname> <given-names>L.</given-names></name> <name><surname>Feng</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;Deeprhythm: exposing deepfakes with attentional visual heartbeat rhythms,&#x0201D;</article-title> in <source>Proceedings of the 28th ACM International Conference on Multimedia</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>4318</fpage>&#x02013;<lpage>4327</lpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ramesh</surname> <given-names>A.</given-names></name> <name><surname>Pavlov</surname> <given-names>M.</given-names></name> <name><surname>Goh</surname> <given-names>G.</given-names></name> <name><surname>Gray</surname> <given-names>S.</given-names></name> <name><surname>Voss</surname> <given-names>C.</given-names></name> <name><surname>Radford</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Zero-shot text-to-image generation,&#x0201D;</article-title> in <source>Proceedings of the 38th International Conference on Machine Learning</source>, eds. M. Meila, and T. Zhang (New York: PMLR), <fpage>8821</fpage>&#x02013;<lpage>8831</lpage>.</citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>R&#x000F6;ssler</surname> <given-names>A.</given-names></name> <name><surname>Cozzolino</surname> <given-names>D.</given-names></name> <name><surname>Verdoliva</surname> <given-names>L.</given-names></name> <name><surname>Riess</surname> <given-names>C.</given-names></name> <name><surname>Thies</surname> <given-names>J.</given-names></name> <name><surname>Nie&#x000DF;ner</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;FaceForensics&#x0002B;&#x0002B;: Learning to detect manipulated facial images,&#x0201D;</article-title> in <source>International Conference on Computer Vision (ICCV)</source>.<pub-id pub-id-type="pmid">34960275</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schraven</surname> <given-names>S. P.</given-names></name> <name><surname>Kossack</surname> <given-names>B.</given-names></name> <name><surname>Str&#x000FC;der</surname> <given-names>D.</given-names></name> <name><surname>Jung</surname> <given-names>M.</given-names></name> <name><surname>Skopnik</surname> <given-names>L.</given-names></name> <name><surname>Gross</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Continuous intraoperative perfusion monitoring of free microvascular anastomosed fasciocutaneous flaps using remote photoplethysmography</article-title>. <source>Sci. Rep</source>. <volume>13</volume>:<fpage>1532</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-023-28277-w</pub-id><pub-id pub-id-type="pmid">36707664</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Seibold</surname> <given-names>C.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name></person-group> (<year>2017</year>). <article-title>Model-based motion blur estimation for the improvement of motion tracking</article-title>. <source>Comp. Vision Image Understand</source>. <volume>160</volume>, <fpage>1077</fpage>&#x02013;<lpage>3142</lpage>. <pub-id pub-id-type="doi">10.1016/j.cviu.2017.03.005</pub-id><pub-id pub-id-type="pmid">17356201</pub-id></citation></ref>
<ref id="B48">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Seibold</surname> <given-names>C.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Style your face morph and improve your face morphing attack detector,&#x0201D;</article-title> in <source>2019 International Conference of the Biometrics Special Interest Group (BIOSIG)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>1</fpage>&#x02013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Seibold</surname> <given-names>C.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>Feature focus: towards explainable and transparent deep face morphing attack detectors</article-title>. <source>Computers</source> <volume>10</volume>:<fpage>117</fpage>. <pub-id pub-id-type="doi">10.3390/computers10090117</pub-id></citation>
</ref>
<ref id="B50">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Seibold</surname> <given-names>C.</given-names></name> <name><surname>Hilsmann</surname> <given-names>A.</given-names></name> <name><surname>Eisert</surname> <given-names>P.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Towards better morphed face images without ghosting artifacts,&#x0201D;</article-title> in <source>Proceedings of the 19th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications</source> (<publisher-loc>Rome</publisher-loc>: <publisher-name>SCITEPRESS</publisher-name>). <pub-id pub-id-type="doi">10.5220/0012302800003660</pub-id></citation>
</ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tamura</surname> <given-names>T.</given-names></name> <name><surname>Maeda</surname> <given-names>Y.</given-names></name> <name><surname>Sekine</surname> <given-names>M.</given-names></name> <name><surname>Yoshida</surname> <given-names>M.</given-names></name></person-group> (<year>2014</year>). <article-title>Wearable Photoplethysmographic sensors&#x02013;past and present</article-title>. <source>Electronics</source> <volume>3</volume>, <fpage>282</fpage>&#x02013;<lpage>302</lpage>. <pub-id pub-id-type="doi">10.3390/electronics3020282</pub-id></citation>
</ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tan</surname> <given-names>M.</given-names></name> <name><surname>Le</surname> <given-names>Q.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;EfficientNet: Rethinking model scaling for convolutional neural networks,&#x0201D;</article-title> in <source>Proceedings of the 36th International Conference on Machine Learning</source>, eds. K. Chaudhuri, and R. Salakhutdinov (New York: PMLR), <fpage>6105</fpage>&#x02013;<lpage>6114</lpage>.<pub-id pub-id-type="pmid">35077359</pub-id></citation></ref>
<ref id="B53">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Tulyakov</surname> <given-names>S.</given-names></name> <name><surname>Alameda-Pineda</surname> <given-names>X.</given-names></name> <name><surname>Ricci</surname> <given-names>E.</given-names></name> <name><surname>Yin</surname> <given-names>L.</given-names></name> <name><surname>Cohn</surname> <given-names>J. F.</given-names></name> <name><surname>Sebe</surname> <given-names>N.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Self-adaptive matrix completion for heart rate estimation from face videos under realistic conditions,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</source> (<publisher-loc>Las Vegas, NV</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Shan</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>L.</given-names></name> <name><surname>Hou</surname> <given-names>Z.</given-names></name></person-group> (<year>2024</year>). <article-title>Camera-based physiological measurement: Recent advances and future prospects</article-title>. <source>Neurocomputing</source> <volume>2024</volume>:<fpage>127282</fpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2024.127282</pub-id></citation>
</ref>
<ref id="B55">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>T.-C.</given-names></name> <name><surname>Mallya</surname> <given-names>A.</given-names></name> <name><surname>Liu</surname> <given-names>M.-Y.</given-names></name></person-group> (<year>2021a</year>). <article-title>&#x0201C;One-shot free-view neural talking-head synthesis for video conferencing,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>Nashville, TN</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Den Brinker</surname> <given-names>A. C.</given-names></name> <name><surname>Stuijk</surname> <given-names>S.</given-names></name> <name><surname>De Haan</surname> <given-names>G.</given-names></name></person-group> (<year>2017</year>). <article-title>Algorithmic Principles of Remote PPG</article-title>. <source>IEEE Trans. Biomed. Eng</source>. <volume>64</volume>, <fpage>1479</fpage>&#x02013;<lpage>1491</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2016.2609282</pub-id><pub-id pub-id-type="pmid">28113245</pub-id></citation></ref>
<ref id="B57">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Zhu</surname> <given-names>J.</given-names></name> <name><surname>Chu</surname> <given-names>W.</given-names></name> <name><surname>Tai</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2021b</year>). <article-title>&#x0201C;Hififace: 3D shape and semantic prior guided high fidelity face swapping,&#x0201D;</article-title> in <source>Proceedings of the Thirtieth International Joint Conference on Artificial Intelligence</source> (<publisher-loc>Montreal</publisher-loc>: <publisher-name>IJCAI-21</publisher-name>).</citation>
</ref>
<ref id="B58">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Bao</surname> <given-names>J.</given-names></name> <name><surname>Zhou</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Li</surname> <given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Altfreezing for more general video face forgery detection,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</source> (<publisher-loc>Vancouver, BC</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>4129</fpage>&#x02013;<lpage>4138</lpage>.</citation>
</ref>
<ref id="B59">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Zhu</surname> <given-names>Y.</given-names></name> <name><surname>Jiang</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Lin</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>Local attention and long-distance interaction of rppg for deepfake detection</article-title>. <source>Vis. Comput</source>. <volume>40</volume>, <fpage>1083</fpage>&#x02013;<lpage>1094</lpage>. <pub-id pub-id-type="doi">10.1007/s00371-023-02833-x</pub-id><pub-id pub-id-type="pmid">37361461</pub-id></citation></ref>
<ref id="B60">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Guthier</surname> <given-names>B.</given-names></name> <name><surname>Saddik</surname> <given-names>E. A.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Estimating two-dimensional blood flow velocities from videos,&#x0201D;</article-title> in <source>International Conference on Image Processing (ICIP)</source> (<publisher-loc>Quebec City, QC</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>3768</fpage>&#x02013;<lpage>3772</lpage>.</citation>
</ref>
<ref id="B61">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Lyu</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Exposing deep fakes using inconsistent head poses,&#x0201D;</article-title> in <source>ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</source> (<publisher-loc>Brighton</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>8261</fpage>&#x02013;<lpage>8265</lpage>.</citation>
</ref>
<ref id="B62">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Lu</surname> <given-names>F.</given-names></name></person-group> (<year>2021</year>). Assessment of deep learning-based heart rate estimation using remote photoplethysmography under different illuminations. arXiv [preprint] arXiv:2107.13193. <pub-id pub-id-type="doi">10.48550/arXiv.2107.13193</pub-id></citation>
</ref>
<ref id="B63">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>P.</given-names></name> <name><surname>Xia</surname> <given-names>Z.</given-names></name> <name><surname>Fei</surname> <given-names>J.</given-names></name> <name><surname>Lu</surname> <given-names>Y.</given-names></name></person-group> (<year>2021a</year>). <article-title>A survey on deepfake video detection</article-title>. <source>IET Biomet</source>. <volume>10</volume>, <fpage>607</fpage>&#x02013;<lpage>624</lpage>. <pub-id pub-id-type="doi">10.1049/bme2.12031</pub-id></citation>
</ref>
<ref id="B64">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Zhao</surname> <given-names>G.</given-names></name></person-group> (<year>2021b</year>). <article-title>Transrppg: Remote photoplethysmography transformer for 3d mask face presentation attack detection</article-title>. <source>IEEE Signal Process. Lett</source>. <volume>28</volume>, <fpage>1290</fpage>&#x02013;<lpage>1294</lpage>. <pub-id pub-id-type="doi">10.1109/LSP.2021.3089908</pub-id></citation>
</ref>
<ref id="B65">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Zhao</surname> <given-names>G.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Remote photoplethysmograph signal measurement from facial videos using spatio-temporal networks,&#x0201D;</article-title> in <source>30th British Machine Vision Conference 2019</source> (<publisher-loc>Glasgow</publisher-loc>: <publisher-name>BMVC</publisher-name>).</citation>
</ref>
<ref id="B66">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>Z.</given-names></name> <name><surname>Peng</surname> <given-names>W.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Hong</surname> <given-names>X.</given-names></name> <name><surname>Zhao</surname> <given-names>G.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Remote heart rate measurement from highly compressed facial videos: an end-to-end deep learning solution with video enhancement,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)</source> (<publisher-loc>Seoul</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B67">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zaunseder</surname> <given-names>S.</given-names></name> <name><surname>Trumpp</surname> <given-names>A.</given-names></name> <name><surname>Wedekind</surname> <given-names>D.</given-names></name> <name><surname>Malberg</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>Cardiovascular assessment by imaging photoplethysmography - a review</article-title>. <source>Biomedizinische Technik</source> <volume>2018</volume>, <fpage>1</fpage>&#x02013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1515/bmt-2017-0119</pub-id><pub-id pub-id-type="pmid">29897880</pub-id></citation></ref>
<ref id="B68">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>W.</given-names></name> <name><surname>Rao</surname> <given-names>Y.</given-names></name> <name><surname>Shi</surname> <given-names>W.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Zhou</surname> <given-names>J.</given-names></name> <name><surname>Lu</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Diffswap: high-fidelity and controllable face swapping via 3D-aware masked diffusion,&#x0201D;</article-title> in <source>2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</source> (<publisher-loc>Vancouver, BC</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>8568</fpage>&#x02013;<lpage>8577</lpage>.</citation>
</ref>
</ref-list>
</back>
</article>