%Aigaion2 BibTeX export from Idiap Publications
%Thursday 17 April 2025 04:28:15 PM

@INPROCEEDINGS{Korshunov_AVFAKES_ICML_2019,
         author = {Korshunov, Pavel and Halstead, Michael and Castan, Diego and Graciarena, Martin and McLaren, Mitchell and Burns, Brian and Lawson, Aaron and Marcel, S{\'{e}}bastien},
       keywords = {inconsistencies detection, lip-syncing, Video tampering},
       projects = {Idiap, SAVI},
          month = jul,
          title = {Tampered Speaker Inconsistency Detection with Phonetically Aware Audio-visual Features},
      booktitle = {International Conference on Machine Learning},
         series = {Synthetic Realities: Deep Learning for Detecting AudioVisual Fakes},
           year = {2019},
           note = {Best paper award in ICML workshop "Synthetic Realities: Deep Learning for Detecting AudioVisual Fakes"},
       abstract = {The recent increase in social media based propaganda, i.e., ‘fake news’, calls for automated methods to detect tampered content. In this paper, we focus on detecting tampering in a video with a person speaking to a camera. This form of manipulation is easy to perform, since one can just replace a part of the audio, dramatically chang- ing the meaning of the video. We consider several detection approaches based on phonetic features and recurrent networks. We demonstrate that by replacing standard MFCC features with embeddings from a DNN trained for automatic speech recognition, combined with mouth landmarks (visual features), we can achieve a significant performance improvement on several challenging publicly available databases of speakers (VidTIMIT, AMI, and GRID), for which we generated sets of tampered data. The evaluations demonstrate a relative equal error rate reduction of 55\% (to 4.5\% from 10.0\%) on the large GRID corpus based dataset and a satisfying generalization of the model on other datasets.},
            pdf = {https://publications.idiap.ch/attachments/papers/2019/Korshunov_AVFAKESICML_2019.pdf}
}