<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">aradilla:simpe:2007/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Detection and Recognition of Number Sequences in Spoken Utterances</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Aradilla, Guillermo</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Ajmera, Jitendra</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2007/aradilla-simpe-2007.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/aradilla:rr07-42</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">2nd Workshop on Speech in Mobile and Pervasive Environments (SiMPE)</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2007</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">IDIAP-RR 07-42</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">In this paper we investigate the detection and recognition of sequences of numbers in spoken utterances. This is done in two steps: first, the entire utterance is decoded assuming that only numbers were spoken. In the second step, non-number segments (garbage) are detected based on word confidence measures. We compare this approach to conventional garbage models. Also, a comparison of several phone posterior based confidence measures is presented in this paper. The work is evaluated in terms of detection task (hit rate and false alarms) and recognition task (word accuracy) within detected number sequences. The proposed method is tested on German continuous spoken utterances where target content (numbers) is only 20\%.</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">aradilla:rr07-42/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Detection and Recognition of Number Sequences in Spoken Utterances</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Aradilla, Guillermo</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Ajmera, Jitendra</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2007/aradilla-idiap-rr-07-42.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-42-2007</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2007</subfield>
			<subfield code="b">IDIAP</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">In this paper we investigate the detection and recognition of sequences of numbers in spoken utterances. This is done in two steps: first, the entire utterance is decoded assuming that only numbers were spoken. In the second step, non-number segments (garbage) are detected based on word confidence measures. We compare this approach to conventional garbage models. Also, a comparison of several phone posterior based confidence measures is presented in this paper. The work is evaluated in terms of detection task (hit rate and false alarms) and recognition task (word accuracy) within detected number sequences. The proposed method is tested on German continuous spoken utterances where target content (numbers) is only 20\%.</subfield>
		</datafield>
	</record>
</collection>