<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">hari-rr-05-03b/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Speech Acquisition in Meetings with an Audio-Visual Sensor Array</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">McCowan, Iain A.</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Krishna, Maganti Hari</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Gatica-Perez, Daniel</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Moore, Darren</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Ba, Silèye O.</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2005/hari-icme05.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/hari-rr-05-03</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Pro. IEEE ICME</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2005</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">IDIAP-RR 05-03</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Close-talk headset microphones have been traditionally used for speech acquisition in a number of applications, as they naturally provide a higher signal-to-noise ratio -needed for recognition tasks- than single distant microphones. However, in multi-party conversational settings like meetings, microphone arrays represent an important alternative to close-talking microphones, as they allow for localisation and tracking of speakers and signal-independent enhancement, while providing a non-intrusive, hands-free operation mode. In this article, we investigate the joint use of a small table-top microphone array and a camera array for speaker tracking and speech enhancement in meetings. Our methodology first fuses audio and video for person tracking, and then integrates the output of the tracker with a beamformer for speech enhancement. We compare and discuss the features of the resulting speech signal with respect to that obtained from single close-talking and table-top microphones.</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">hari-rr-05-03/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Speech Acquisition in Meetings with an Audio-Visual Sensor Array</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">McCowan, Iain A.</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Krishna, Maganti Hari</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Gatica-Perez, Daniel</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Moore, Darren</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Ba, Silèye O.</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2005/rr-05-03.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-03-2005</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2005</subfield>
			<subfield code="b">IDIAP</subfield>
			<subfield code="a">Martigny, Switzerland</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">Published in ``Prof. IEEE ICME'', July, 2005</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Close-talk headset microphones have been traditionally used for speech acquisition in a number of applications, as they naturally provide a higher signal-to-noise ratio -needed for recognition tasks- than single distant microphones. However, in multi-party conversational settings like meetings, microphone arrays represent an important alternative to close-talking microphones, as they allow for localisation and tracking of speakers and signal-independent enhancement, while providing a non-intrusive, hands-free operation mode. In this article, we investigate the joint use of a small table-top microphone array and a camera array for speaker tracking and speech enhancement in meetings. Our methodology first fuses audio and video for person tracking, and then integrates the output of the tracker with a beamformer for speech enhancement. We compare and discuss the features of the resulting speech signal with respect to that obtained from single close-talking and table-top microphones.</subfield>
		</datafield>
	</record>
</collection>