<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Valente_ICASSP_2012/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Automatic Speaker Role Labeling in AMI Meetings: Recognition of Formal and Social Roles</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Sapru, A.</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Valente, Fabio</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">AMI Meetings</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Speaker Role Labeling</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2012/Valente_ICASSP_2012.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Proceedings IEEE International Conference on Acoustics, Speech and Signal Processing, Kyoto, Japan, 2012</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2012</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This work aims at investigating the automatic recognition of speaker
role in meeting conversations from the AMI corpus. Two types of
roles are considered: formal roles, fixed over the meeting duration
and recognized at recording level, and social roles related to the way
participants interact between themselves, recognized at speaker turn
level. Various structural, lexical and prosodic features as well as Dialog
Act tags are exhaustively investigated and combined for this purpose.
Results reveal an accuracy of 74% in recognizing the speakers
formal roles and an accuracy of 66% (percentage of time) in correctly
labeling the social roles. Feature analysis reveals that lexical
features provide the higher performances in formal/functional
role recognition while prosodic features provide the higher performances
in social role recognition. Furthermore results reveal that
social role recognition in case of rare roles in the corpus can be improved
through the use of lexical and Dialog Act information combined
over short time windows.</subfield>
		</datafield>
	</record>
</collection>