<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Cernak_Idiap-RR-24-2013/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Syllable-based Pitch Encoding for Low Bit Rate Speech Coding   with Recognition/Synthesis Architecture</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Cernak, Milos</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Na, Xingyu</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Garner, Philip N.</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">pitch analysis</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">speech coding</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">speech synthesis</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2013/Cernak_Idiap-RR-24-2013.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-24-2013</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2013</subfield>
			<subfield code="b">Idiap</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">June 2013</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Current HMM-based low bit rate speech coding systems work with
  phonetic vocoders. Pitch contour coding (on frame or phoneme level)
  is usually fairly orthogonal to other speech coding parameters. We
  make an assumption in our work that the speech signal contains
  supra-segmental cues. Hence, we present encoding of the pitch on the
  syllable level, used in the framework of a recognition/synthesis
  speech coder with phonetic vocoder. The results imply that high
  accuracy pitch contour reconstruction with negligible speech quality
  degradation is possible. The proposed pitch encoding technique
  operates on 30 - 35 bits per second.</subfield>
		</datafield>
	</record>
</collection>