<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Dielmann_INTERSPEECH,SEPTEMBER2010_2010/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Floor Holder Detection and End of Speaker Turn Prediction in Meetings</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Dielmann, Alfred</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Garau, Giulia</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Dynamic Bayesian Network</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">floor control</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Multiparty Conversation</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">non-verbal features</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">speaker turn</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2010/Dielmann_INTERSPEECH,SEPTEMBER2010_2010.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">International Conference on Speech and Language Processing, Interspeech</subfield>
			<subfield code="c">Makuhari, Japan</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2010</subfield>
			<subfield code="b">ISCA</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">September 2010</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">We propose a novel fully automatic framework to detect which meeting participant is currently holding the conversational floor and when the current speaker turn is going to finish. Two sets of experiments were conducted on a large collection of multiparty conversations: the AMI meeting corpus. Unsupervised speaker turn detection was performed by post-processing the speaker diarization and the speech activity detection outputs. A supervised end-of-speaker-turn prediction framework, based on Dynamic Bayesian Networks and automatically extracted multimodal features (related to prosody, overlapping speech, and visual motion,',','),
 was also investigated. These novel approaches resulted in good floor holder detection rates (13:2% Floor Error Rate,',','),
 attaining state of the art end-of-speaker-turn prediction performances.</subfield>
		</datafield>
	</record>
</collection>