<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">pinto:rr07-32/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Comparing Different Word Lattice Rescoring Approaches Towards Keyword Spotting</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Pinto, Joel Praveen</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Greve, Zacharie De</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Hermansky, Hynek</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2007/pinto-idiap-rr-07-32.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-32-2007</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2007</subfield>
			<subfield code="b">IDIAP</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">Submitted for publication</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">In this paper, we further investigate the large vocabulary continuous speech recognition approach to keyword spotting. Given a speech utterance, recognition is performed to obtain a word lattice. The posterior probability of keyword hypotheses in the lattice is computed and used to derive a confidence measure to accept/reject the keyword. We extend this framework and replace the acoustic likelihoods in the lattice obtained from a Gaussian mixture model (GMM) with likelihoods derived from a multilayered perceptron (MLP). We compare the two rescoring techniques on the conversational telephone speech database distributed by NIST for the spoken term detection evaluation. Experimental results show that GMM lattices still perform better than the rescored lattices for short and medium length keywords, but on longer keywords, the MLP rescored word lattices perform slightly better.</subfield>
		</datafield>
	</record>
</collection>