<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">stephenson02c/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Dynamic Bayesian Network Based Speech Recognition with Pitch and Energy as Auxiliary Variables</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Stephenson, Todd Andrew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Escofet, Jaume</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2002/todd-nnsp2002.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/stephenson02b</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">2002 IEEE International Workshop on Neural Networks for for Signal Processing (NNSP~2002)</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2002</subfield>
			<subfield code="a">Martigny, Switzerland</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">September 2002</subfield>
		</datafield>
		<datafield tag="773" ind1=" " ind2=" ">
			<subfield code="c">637-646</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Pitch and energy are two fundamental features describing speech, having importance in human speech recognition. However, when incorporated as features in automatic speech recognition (ASR,',','),
 they usually result in a significant degradation on recognition performance due to the noise inherent in estimating or modeling them. In this paper, we show experimentally how this can be corrected by either conditioning the emission distributions upon these features or by marginalizing out these features in recognition. Since this is not obvious to do with standard hidden Markov models (HMMs,',','),
 this work has been performed in the framework of dynamic Bayesian networks (DBNs,',','),
 resulting in more flexibility in defining the topology of the emission distributions and in specifying whether variables should be marginalized out.</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">stephenson02b/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Dynamic Bayesian Network Based Speech Recognition with Pitch and Energy as Auxiliary Variables</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Stephenson, Todd Andrew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Escofet, Jaume</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2002/rr02-24.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-24-2002</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2002</subfield>
			<subfield code="b">IDIAP</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">In ``2002 IEEE International Workshop on Neural Networks for Signal Processing (NNSP~2002)'', 2002</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Pitch and energy are two fundamental features describing speech, having importance in human speech recognition. However, when incorporated as features in automatic speech recognition (ASR,',','),
 they usually result in a significant degradation on recognition performance due to the noise inherent in estimating or modeling them. In this paper, we show experimentally how this can be corrected by either conditioning the emission distributions upon these features or by marginalizing out these features in recognition. Since this is not obvious to do with standard hidden Markov models (HMMs,',','),
 this work has been performed in the framework of dynamic Bayesian networks (DBNs,',','),
 resulting in more flexibility in defining the topology of the emission distributions and in specifying whether variables should be marginalized out.</subfield>
		</datafield>
	</record>
</collection>