%Aigaion2 BibTeX export from Idiap Publications
%Thursday 21 November 2024 11:36:32 AM

@INPROCEEDINGS{Friedland_ACMMM_2009,
         author = {Friedland, Gerald and Yeo, Chuohao and Hung, Hayley},
       projects = {Idiap, AMIDA, IM2},
          title = {Visual Speaker Localization Aided by Acoustic Models},
      booktitle = {ACM Multimedia},
           year = {2009},
       abstract = {The following paper presents a novel audio-visual approach
for unsupervised speaker locationing. Using recordings from
a single, low-resolution room overview camera and a single
far-field microphone, a state-of-the art audio-only speaker
localization system (traditionally called speaker diarization)
is extended so that both acoustic and visual models are estimated
as part of a joint unsupervised optimization problem.
The speaker diarization system first automatically determines
the number of speakers and estimates {\^{a}}€{\oe}who spoke
when{\^{a}}€, then, in a second step, the visual models are used to
infer the location of the speakers in the video. The experiments
were performed on real-world meetings using 4.5 hours
of the publicly available AMI meeting corpus. The proposed
system is able to exploit audio-visual integration to
not only improve the accuracy of a state-of-the-art (audioonly)
speaker diarization, but also adds visual speaker locationing
at little incremental engineering and computation
costs.}
}