%Aigaion2 BibTeX export from Idiap Publications
%Thursday 21 November 2024 11:38:45 AM

@INPROCEEDINGS{Garau_INTERSPEECH_2010,
         author = {Garau, Giulia and Dielmann, Alfred and Bourlard, Herv{\'{e}}},
       keywords = {audio{\^{a}}€“visual speech synchrony, canonical correlation analysis, multimodal speaker diarisation, multiparty meetings, mutual information},
       projects = {Idiap, AMIDA, IM2},
          month = {9},
          title = {Audio{\^{a}}€“Visual Synchronisation for Speaker Diarisation},
        journal = {Interspeech},
      booktitle = {International Conference on Speech and Language Processing, Interspeech},
           year = {2010},
       location = {Makuhari, Japan},
       abstract = {The role of audio{\^{a}}€“visual speech synchrony for speaker diarisation is investigated on the multiparty meeting  domain. We measured both mutual information and canonical correlation on different sets of audio and video features. As acoustic features we considered energy and MFCCs. As visual features we experimented both with motion intensity features, computed on the whole image, and Kanade Lucas Tomasi motion estimation.
Thanks to KLT we decomposed the motion in its horizontal and vertical components. The vertical component was found to be more reliable for speech synchrony estimation. The mutual information between acoustic energy and KLT vertical motion of skin pixels, not only resulted in a 20\% relative improvement over a MFCC only diarisation system, but also outperformed visual features such as motion intensities and head poses.},
            pdf = {https://publications.idiap.ch/attachments/papers/2010/Garau_INTERSPEECH_2010.pdf}
}