<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Motlicek_ICASSP_2012/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">IMPROVING ACOUSTIC BASED KEYWORD SPOTTING USING LVCSR LATTICES</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Motlicek, Petr</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Valente, Fabio</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Szoke, Igor</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/Motlicek_Idiap-RR-36-2012</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">IEEE - Proceedings on IEEE International Conference on Acoustics, Speech and Signal Processing</subfield>
			<subfield code="c">Japan</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2012</subfield>
		</datafield>
		<datafield tag="773" ind1=" " ind2=" ">
			<subfield code="c">4413-4416</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This paper investigates detection of English keywords in a conversational scenario using a combination of acoustic and LVCSR based keyword spotting systems. Acoustic KWS systems search predefined words in parameterized spoken data. Corresponding confidences are represented by likelihood ratios given the keyword models and a background model. First, due to the especially high number of false-alarms, the acoustic KWS system is augmented with confidence measures estimated from corresponding LVCSR lattices. Then, various strategies to combine scores estimated by the acoustic and several LVCSR based KWS systems are explored. We show that a linear regression based combination significantly outperforms other (model-based) techniques. Due to that, the relative number of false-alarms of the combined KWS system decreased by more than 50% compared to the acoustic KWS system. Finally, an attention is also paid to the complexities of the KWS systems enabling them to potentially be exploited in real-detection tasks.</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Motlicek_Idiap-RR-36-2012/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">IMPROVING ACOUSTIC BASED KEYWORD SPOTTING USING LVCSR LATTICES</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Motlicek, Petr</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Valente, Fabio</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Szoke, Igor</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Confidence Measure (CM)</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">KeyWord Spotting (KWS)</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Spoken Term Detection (STD)</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2012/Motlicek_Idiap-RR-36-2012.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-36-2012</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2012</subfield>
			<subfield code="b">Idiap</subfield>
			<subfield code="a">Rue Marconi 19</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">December 2012</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This paper investigates detection of English keywords in a conversational
scenario using a combination of acoustic and LVCSR based
keyword spotting systems. Acoustic KWS systems search predefined
words in parameterized spoken data. Corresponding confidences
are represented by likelihood ratios given the keyword models
and a background model. First, due to the especially high number
of false-alarms, the acoustic KWS system is augmented with
confidence measures estimated from corresponding LVCSR lattices.
Then, various strategies to combine scores estimated by the acoustic
and several LVCSR based KWS systems are explored. We show
that a linear regression based combination significantly outperforms
other (model-based) techniques. Due to that, the relative number of
false-alarms of the combined KWS system decreased by more than
50% compared to the acoustic KWS system. Finally, an attention is
also paid to the complexities of the KWS systems enabling them to
potentially be exploited in real-detection tasks.</subfield>
		</datafield>
	</record>
</collection>