%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 11:36:32 AM @INPROCEEDINGS{Friedland_ACMMM_2009, author = {Friedland, Gerald and Yeo, Chuohao and Hung, Hayley}, projects = {Idiap, AMIDA, IM2}, title = {Visual Speaker Localization Aided by Acoustic Models}, booktitle = {ACM Multimedia}, year = {2009}, abstract = {The following paper presents a novel audio-visual approach for unsupervised speaker locationing. Using recordings from a single, low-resolution room overview camera and a single far-field microphone, a state-of-the art audio-only speaker localization system (traditionally called speaker diarization) is extended so that both acoustic and visual models are estimated as part of a joint unsupervised optimization problem. The speaker diarization system first automatically determines the number of speakers and estimates {\^{a}}€{\oe}who spoke when{\^{a}}€, then, in a second step, the visual models are used to infer the location of the speakers in the video. The experiments were performed on real-world meetings using 4.5 hours of the publicly available AMI meeting corpus. The proposed system is able to exploit audio-visual integration to not only improve the accuracy of a state-of-the-art (audioonly) speaker diarization, but also adds visual speaker locationing at little incremental engineering and computation costs.} }