%Aigaion2 BibTeX export from Idiap Publications
%Friday 05 December 2025 03:37:38 PM
@INPROCEEDINGS{gatica05c-conf,
author = {Gatica-Perez, Daniel and Lathoud, Guillaume and Odobez, Jean-Marc and McCowan, Iain A.},
projects = {Idiap},
title = {Multimodal Multispeaker Probabilistic Tracking in Meetings},
booktitle = {Proc. Int. Conf. on Multimodal Interfaces (ICMI)},
year = {2005},
crossref = {gatica05c},
abstract = {Tracking speakers in multiparty conversations constitutes a fundamental task for automatic meeting analysis. In this paper, we present a probabilistic approach to jointly track the location and speaking activity of multiple speakers in a multisensor meeting room, equipped with a small microphone array and multiple uncalibrated cameras. Our framework is based on a mixed-state dynamic graphical model defined on a multiperson state-space, which includes the explicit definition of a proximity-based interaction model. The model integrates audio-visual (AV) data through a novel observation model. Audio observations are derived from a source localization algorithm. Visual observations are based on models of the shape and spatial structure of human heads. Approximate inference in our model, needed given its complexity, is performed with a Markov Chain Monte Carlo particle filter (MCMC-PF,',','),
which results in high sampling efficiency. We present results -based on an objective evaluation procedure- that show that our framework (1) is capable of locating and tracking the position and speaking activity of multiple meeting participants engaged in real conversations with good accuracy; (2) can deal with cases of visual clutter and partial occlusion; and (3) significantly outperforms a traditional sampling-based approach.},
pdf = {https://publications.idiap.ch/attachments/reports/2004/rr-04-66.pdf},
postscript = {ftp://ftp.idiap.ch/pub/reports/2004/rr-04-66.ps.gz},
ipdmembership={speech, vision},
}
crossreferenced publications:
@TECHREPORT{gatica05c,
author = {Gatica-Perez, Daniel and Lathoud, Guillaume and Odobez, Jean-Marc and McCowan, Iain A.},
projects = {Idiap},
title = {Multimodal Multispeaker Probabilistic Tracking in Meetings},
type = {Idiap-RR},
number = {Idiap-RR-66-2004},
year = {2004},
institution = {IDIAP},
address = {Martigny, Switzerland},
note = {in Proc. Int. Conf. on Multimodal Interfaces (ICMI,',','),
Trento, Oct. 2005.},
abstract = {Tracking speakers in multiparty conversations constitutes a fundamental task for automatic meeting analysis. In this paper, we present a probabilistic approach to jointly track the location and speaking activity of multiple speakers in a multisensor meeting room, equipped with a small microphone array and multiple uncalibrated cameras. Our framework is based on a mixed-state dynamic graphical model defined on a multiperson state-space, which includes the explicit definition of a proximity-based interaction model. The model integrates audio-visual (AV) data through a novel observation model. Audio observations are derived from a source localization algorithm. Visual observations are based on models of the shape and spatial structure of human heads. Approximate inference in our model, needed given its complexity, is performed with a Markov Chain Monte Carlo particle filter (MCMC-PF,',','),
which results in high sampling efficiency. We present results -based on an objective evaluation procedure- that show that our framework (1) is capable of locating and tracking the position and speaking activity of multiple meeting participants engaged in real conversations with good accuracy; (2) can deal with cases of visual clutter and partial occlusion; and (3) significantly outperforms a traditional sampling-based approach.},
pdf = {https://publications.idiap.ch/attachments/reports/2004/rr-04-66.pdf},
postscript = {ftp://ftp.idiap.ch/pub/reports/2004/rr-04-66.ps.gz},
ipdmembership={speech, vision},
language={English},
}