%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 12:27:28 PM @INPROCEEDINGS{hari-rr-05-03b, author = {McCowan, Iain A. and Krishna, Maganti Hari and Gatica-Perez, Daniel and Moore, Darren and Ba, Sil{\`{e}}ye O.}, projects = {Idiap}, title = {Speech Acquisition in Meetings with an Audio-Visual Sensor Array}, booktitle = {Pro. IEEE ICME}, year = {2005}, note = {IDIAP-RR 05-03}, crossref = {hari-rr-05-03}, abstract = {Close-talk headset microphones have been traditionally used for speech acquisition in a number of applications, as they naturally provide a higher signal-to-noise ratio -needed for recognition tasks- than single distant microphones. However, in multi-party conversational settings like meetings, microphone arrays represent an important alternative to close-talking microphones, as they allow for localisation and tracking of speakers and signal-independent enhancement, while providing a non-intrusive, hands-free operation mode. In this article, we investigate the joint use of a small table-top microphone array and a camera array for speaker tracking and speech enhancement in meetings. Our methodology first fuses audio and video for person tracking, and then integrates the output of the tracker with a beamformer for speech enhancement. We compare and discuss the features of the resulting speech signal with respect to that obtained from single close-talking and table-top microphones.}, pdf = {https://publications.idiap.ch/attachments/reports/2005/hari-icme05.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2005/rr-05-03.ps.gz}, ipdmembership={speech}, } crossreferenced publications: @TECHREPORT{hari-rr-05-03, author = {McCowan, Iain A. and Krishna, Maganti Hari and Gatica-Perez, Daniel and Moore, Darren and Ba, Sil{\`{e}}ye O.}, projects = {Idiap}, title = {{Speech Acquisition in Meetings with an Audio-Visual Sensor Array}}, type = {Idiap-RR}, number = {Idiap-RR-03-2005}, year = {2005}, institution = {IDIAP}, address = {Martigny, Switzerland}, note = {Published in ``Prof. IEEE ICME'', July, 2005}, abstract = {Close-talk headset microphones have been traditionally used for speech acquisition in a number of applications, as they naturally provide a higher signal-to-noise ratio -needed for recognition tasks- than single distant microphones. However, in multi-party conversational settings like meetings, microphone arrays represent an important alternative to close-talking microphones, as they allow for localisation and tracking of speakers and signal-independent enhancement, while providing a non-intrusive, hands-free operation mode. In this article, we investigate the joint use of a small table-top microphone array and a camera array for speaker tracking and speech enhancement in meetings. Our methodology first fuses audio and video for person tracking, and then integrates the output of the tracker with a beamformer for speech enhancement. We compare and discuss the features of the resulting speech signal with respect to that obtained from single close-talking and table-top microphones.}, pdf = {https://publications.idiap.ch/attachments/reports/2005/rr-05-03.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2005/rr-05-03.ps.gz}, ipdinar={2005}, ipdmembership={speech}, language={English}, }