<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">ARTICLE</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Duffner_IJCEE_2012/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">The TA2 Database ? A Multi-Modal Database From Home Entertainment</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Duffner, Stefan</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Motlicek, Petr</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Korchagin, Danil</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">high-definition video-conferencing</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">multi-face tracking</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">multi-modal database</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">voice-activity detection</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2012/Duffner_IJCEE_2012.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="773" ind1=" " ind2=" ">
			<subfield code="p">International Journal of Computer and Electrical Engineering</subfield>
			<subfield code="v">4</subfield>
			<subfield code="n">5</subfield>
			<subfield code="c">670-673</subfield>
			<subfield code="x">1793-8163</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2012</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://www.ijcee.org/</subfield>
			<subfield code="z">URL</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This paper presents a new database containing high-definition audio and video recordings in a rather unconstrained video-conferencing-like environment. The
database consists of recordings of people sitting around a table in two separate rooms communicating and playing online games with each other. Extensive annotation of head positions, voice activity and word transcription has been performed on the dataset, making it especially useful for evaluating automatic
speech-recognition, voice activity detection, speaker localisation, multi-face detection and tracking, and other audio-visual analysis algorithms.</subfield>
		</datafield>
	</record>
</collection>