%Aigaion2 BibTeX export from Idiap Publications %Monday 30 December 2024 06:00:26 PM @INPROCEEDINGS{Korshunov_AVFAKES_ICML_2019, author = {Korshunov, Pavel and Halstead, Michael and Castan, Diego and Graciarena, Martin and McLaren, Mitchell and Burns, Brian and Lawson, Aaron and Marcel, S{\'{e}}bastien}, keywords = {inconsistencies detection, lip-syncing, Video tampering}, projects = {Idiap, SAVI}, month = jul, title = {Tampered Speaker Inconsistency Detection with Phonetically Aware Audio-visual Features}, booktitle = {International Conference on Machine Learning}, series = {Synthetic Realities: Deep Learning for Detecting AudioVisual Fakes}, year = {2019}, note = {Best paper award in ICML workshop "Synthetic Realities: Deep Learning for Detecting AudioVisual Fakes"}, abstract = {The recent increase in social media based propaganda, i.e., ‘fake news’, calls for automated methods to detect tampered content. In this paper, we focus on detecting tampering in a video with a person speaking to a camera. This form of manipulation is easy to perform, since one can just replace a part of the audio, dramatically chang- ing the meaning of the video. We consider several detection approaches based on phonetic features and recurrent networks. We demonstrate that by replacing standard MFCC features with embeddings from a DNN trained for automatic speech recognition, combined with mouth landmarks (visual features), we can achieve a significant performance improvement on several challenging publicly available databases of speakers (VidTIMIT, AMI, and GRID), for which we generated sets of tampered data. The evaluations demonstrate a relative equal error rate reduction of 55\% (to 4.5\% from 10.0\%) on the large GRID corpus based dataset and a satisfying generalization of the model on other datasets.}, pdf = {https://publications.idiap.ch/attachments/papers/2019/Korshunov_AVFAKESICML_2019.pdf} }