<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Habibi_Idiap-RR-14-2012/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Using Crowdsourcing to Compare Document Recommendation Strategies for Conversations</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Habibi, Maryam</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Popescu-Belis, Andrei</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Amazon Mechanical Turk</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">comparative evaluation</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Crowdsourcing</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Document recommender system</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">user initiative</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2012/Habibi_Idiap-RR-14-2012.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-14-2012</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2012</subfield>
			<subfield code="b">Idiap</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">June 2012</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This paper explores a crowdsourcing approach to the evaluation of a document recommender system intended for use in meetings. The system uses words from the conversation to perform just-in-time document retrieval. We compare several versions of the system, including the use of keywords, retrieval using semantic similarity, and the possibility for user initiative. The system's results are submitted for comparative evaluations to workers recruited via a crowdsourcing platform, Amazon's Mechanical Turk. We introduce a new method, Pearson Correlation Coefficient-Information Entropy (PCC-H), to abstract over the quality of the workers' judgments and produce system-level scores. We measure the workers' reliability by the inter-rater agreement of each of them against the others, and use entropy to weight the difficulty of each comparison task. The proposed evaluation method is shown to be reliable, and demonstrates that adding user initiative improves the relevance of recommendations.</subfield>
		</datafield>
	</record>
</collection>