<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Purohit_ICML_2022/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Comparing supervised and self-supervised embedding for ExVo Multi-Task learning track</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Purohit, Tilak</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Ben Mahmoud, Imen</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Vlasenko, Bogdan</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Emotion Recognition</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Expressive Vocalizations</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Multi-task learning</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Self-supervised embedding</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2022/Purohit_ICML_2022.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Proceedings of the ICML Expressive Vocalizations Workshop held in conjunction with the 39th International Conference on Machine Learning</subfield>
			<subfield code="c">Maryland, USA</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2022</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">The ICML Expressive Vocalizations (ExVo) Multi-task challenge 2022, focuses on understanding the emotional facets of the non-linguistic vocalizations (vocal bursts (VB)). The objective of this challenge is to predict emotional intensities for VB, being a multi-task challenge it also requires to predict speakers' age and native-country. For this challenge we study and compare two distinct embedding spaces namely, self-supervised learning (SSL) based embeddings and task-specific supervised learning based embeddings. Towards that, we investigate feature representations obtained from several pre-trained SSL neural networks and task-specific supervised classification neural networks. Our studies show that the best performance is obtained with an hybrid approach, where predictions derived via both SSL and task-specific supervised learning are used. Our best system on test-set surpass the ComPARE baseline (harmonic-mean of all sub-task scores i.e., S_MLT) by a relative 13% margin.</subfield>
		</datafield>
	</record>
</collection>