<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">ARTICLE</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Siegfried_TOMM_2021/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Robust Unsupervised Gaze Calibration using Conversation and Manipulation Attention Priors</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Siegfried, Remy</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Odobez, Jean-Marc</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">conversation</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Gaze estimation</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">manipulation</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">online calibration.</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">remote sensor</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">RGB-D camera</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">unsupervised calibration</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">visual focus of attention</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2022/Siegfried_TOMM_2021.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="773" ind1=" " ind2=" ">
			<subfield code="p">ACM Transactions on Multimedia Computing, Communications, and Applications</subfield>
			<subfield code="v">18</subfield>
			<subfield code="n">1</subfield>
			<subfield code="c">26</subfield>
			<subfield code="x">1551-6857</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2022</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">https://doi.org/10.1145/3472622</subfield>
			<subfield code="z">URL</subfield>
		</datafield>
		<datafield tag="024" ind1="7" ind2=" ">
			<subfield code="a">10.1145/3472622</subfield>
			<subfield code="2">doi</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Gaze estimation is a difficult task, even for humans. However, as humans, we are good at understanding a situation and exploiting it to guess the expected visual focus of attention (VFOA) of people, and we usually use this information to retrieve people’s gaze. In this paper, we propose to leverage such situation-based expectation about people’s VFOA to collected weakly labeled gaze samples and perform person-specific calibration of gaze estimators in an unsupervised and online way. In this context, our contributions are the following: i) we show how task contextual attention priors can be used to gather reference gaze samples, which is a cumbersome process otherwise; ii) we propose a robust estimation framework to exploit these weak labels for the estimation of the calibration model parameters; iii) we demonstrate the applicability of this approach on two Human-Human and Human-Robot interaction settings, namely conversation, and manipulation. Experiments on three datasets validate our approach, providing insights on the effectiveness of the prior and on the impact of different calibration models, in particular the usefulness of taking head pose into account.</subfield>
		</datafield>
	</record>
</collection>