<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">McGreevy04b/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Pseudo-Syntactic Language Modeling for Disfluent Speech Recognition</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">McGreevy, Michael</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2004/mcgreevy-sst04.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/mcgreevy04a</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Proceedings of SST 2004 (10th Australian International Conference on Speech Science &amp; Technology,',','),
 Sydney, Australia, 2004</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2004</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">December 2004</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">IDIAP-RR 04-55</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Language models for speech recognition are generally trained on text corpora. Since these corpora do not contain the disfluencies found in natural speech, there is a train/test mismatch when these models are applied to conversational speech. In this work we investigate a language model (LM) designed to model these disfluencies as a syntactic process. By modeling self-corrections we obtain an improvement over our baseline syntactic model. We also obtain a 30\% relative reduction in perplexity from the best performing standard {N-gram} model when we interpolate it with our syntactically derived models.</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">McGreevy04a/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Pseudo-Syntactic Language Modeling for Disfluent Speech Recognition</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">McGreevy, Michael</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2004/rr04-55.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-55-2004</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2004</subfield>
			<subfield code="b">IDIAP</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">Published in Proceedings of SST, 2004</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Language models for speech recognition are generally trained on text corpora. Since these corpora do not contain the disfluencies found in natural speech, there is a train/test mismatch when these models are applied to conversational speech. In this work we investigate a language model (LM) designed to model these disfluencies as a syntactic process. By modeling self-corrections we obtain an improvement over our baseline syntactic model. We also obtain a 30\% relative reduction in perplexity from the best performing standard {N-gram} model when we interpolate it with our syntactically derived models.</subfield>
		</datafield>
	</record>
</collection>