<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">odobez-rr-05-11/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">OCR Based Slide Retrieval</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Daddaoua, Nabil</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Odobez, Jean-Marc</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Vinciarelli, Alessandro</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2005/rr05-11.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-11-2005</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2005</subfield>
			<subfield code="b">IDIAP</subfield>
			<subfield code="a">Martigny, Switzerland</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">Submitted for publication</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This work addresses the problem of acquiring, indexing and retrieving slides in the context of automatic oral presentation processing. Since the most suitable acquisition technique, in such a context, is the use of a framegrabber (a device capturing as images the slides displayed on a screen,',','),
 the slides must be transcribed with an Optical Character Recognition system. Retrieval experiments performed on a corpus of 570 slides (26 presentations) gathered at a workshop show that performance obtained with the OCR transcriptions are close to those obtained by extracting the text from the electronic version (pdf or ppt) of the slides (through apposite API's).</subfield>
		</datafield>
	</record>
</collection>