<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Apicella_ICCV_ACVR_2023/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Affordance segmentation of hand-occluded containers from exocentric images</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Apicella, Tommaso</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Xompero, Alessio</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Ragusa, Edoardo</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Berta, Riccardo</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Cavallaro, Andrea</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Gastaldo, Paolo</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Affordances</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">computer vision</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Convolutional Neural Networks</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Fuses</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Geometry</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Hand occlusion</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">image segmentation</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">training</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">virtual reality</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Visualization</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Proceedings of the IEEE/CVF International Conference on Computer Vision Workshops (ICCVW)</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2023</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">https://ieeexplore.ieee.org/document/10350499</subfield>
			<subfield code="z">URL</subfield>
		</datafield>
		<datafield tag="024" ind1="7" ind2=" ">
			<subfield code="a">10.1109/ICCVW60793.2023.00204</subfield>
			<subfield code="2">doi</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Visual affordance segmentation identifies the surfaces of
an object an agent can interact with. Common challenges
for the identification of affordances are the variety of the
geometry and physical properties of these surfaces as well
as occlusions. In this paper, we focus on occlusions of an
object that is hand-held by a person manipulating it. To
address this challenge, we propose an affordance segmentation model that uses auxiliary branches to process the object
and hand regions separately. The proposed model learns
affordance features under hand-occlusion by weighting the
feature map through hand and object segmentation. To train
the model, we annotated the visual affordances of an existing dataset with mixed-reality images of hand-held containers in third-person (exocentric) images. Experiments
on both real and mixed-reality images show that our model
achieves better affordance segmentation and generalisation
than existing models.</subfield>
		</datafield>
	</record>
</collection>