%Aigaion2 BibTeX export from Idiap Publications
%Thursday 04 December 2025 11:00:49 PM

@INPROCEEDINGS{lathoud04c,
                      author = {Lathoud, Guillaume and Odobez, Jean-Marc and Gatica-Perez, Daniel},
                    projects = {Idiap},
                       title = {{AV16.3}: an Audio-{V}isual {C}orpus for {S}peaker {L}ocalization and {T}racking},
                   booktitle = {Proceedings of the 2004 MLMI Workshop, S. Bengio and H. Bourlard Eds, Springer Verlag},
                        year = {2005},
                        note = {IDIAP-RR 04-28},
                    crossref = {lathoud-rr-04-28},
                    abstract = {Assessing the quality of a speaker localization or tracking algorithm on a few short examples is difficult, especially when the ground-truth is absent or not well defined. One step towards systematic performance evaluation of such algorithms is to provide time-continuous speaker location annotation over a series of real recordings, covering various test cases. Areas of interest include audio, video and audio-visual speaker localization and tracking. The desired location annotation can be either 2-dimensional (image plane) or 3-dimensional (physical space). This paper motivates and describes a corpus of audio-visual data called ``AV16.3'', along with a method for 3-D location annotation based on calibrated cameras. ``16.3'' stands for 16 microphones and 3 cameras, recorded in a fully synchronized manner, in a meeting room. Part of this corpus has already been successfully used to report research results.},
                         pdf = {https://publications.idiap.ch/attachments/papers/2004/lathoud04c.pdf},
                  postscript = {ftp://ftp.idiap.ch/pub/papers/2004/lathoud04c.ps.gz},
ipdinar={2004},
ipdmembership={speech, vision},
}



crossreferenced publications: 
@TECHREPORT{lathoud-rr-04-28,
                      author = {Lathoud, Guillaume and Odobez, Jean-Marc and Gatica-Perez, Daniel},
                    projects = {Idiap},
                       title = {{AV16.3}: an Audio-{V}isual {C}orpus for {S}peaker {L}ocalization and {T}racking},
                        type = {Idiap-RR},
                      number = {Idiap-RR-28-2004},
                        year = {2004},
                 institution = {IDIAP},
                     address = {Martigny, Switzerland},
                        note = {Published in ``Proceedings of the 2004 MLMI Workshop''},
                    abstract = {Assessing the quality of a speaker localization or tracking algorithm on a few short examples is difficult, especially when the ground-truth is absent or not well defined. One step towards systematic performance evaluation of such algorithms is to provide time-continuous speaker location annotation over a series of real recordings, covering various test cases. Areas of interest include audio, video and audio-visual speaker localization and tracking. The desired location annotation can be either 2-dimensional (image plane) or 3-dimensional (physical space). This paper motivates and describes a corpus of audio-visual data called ``AV16.3'', along with a method for 3-D location annotation based on calibrated cameras. ``16.3'' stands for 16 microphones and 3 cameras, recorded in a fully synchronized manner, in a meeting room. Part of this corpus has already been successfully used to report research results.},
                         pdf = {https://publications.idiap.ch/attachments/reports/2004/rr-04-28.pdf},
                  postscript = {ftp://ftp.idiap.ch/pub/reports/2004/rr-04-28.ps.gz},
ipdinar={2004},
ipdmembership={speech, vision},
language={English},
}