<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Liang_Idiap-RR-05-2010/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">A Comparison of Supervised and Unsupervised Cross-Lingual Speaker Adaptation Approaches for HMM-Based Speech Synthesis</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Liang, Hui</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Dines, John</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Saheer, Lakshmi</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2010/Liang_Idiap-RR-05-2010.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/Liang_ICASSP_2010</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-05-2010</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2010</subfield>
			<subfield code="b">Idiap</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">February 2010</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">The EMIME project aims to build a personalized speech-to-speech translator, such that spoken input of a user in one language is used to produce spoken output that still sounds like the user's voice however in another language. This distinctiveness makes unsupervised cross-lingual speaker adaptation one key to the project's success. So far, research has been conducted into unsupervised and cross-lingual cases separately by means of decision tree marginalization and HMM state mapping respectively. In this paper we combine the two techniques to perform unsupervised cross-lingual speaker adaptation. The performance of eight speaker adaptation systems (supervised vs. unsupervised, intra-lingual vs. cross-lingual) are compared using objective and subjective evaluations. Experimental results show the performance of unsupervised cross-lingual speaker adaptation is comparable to that of the supervised case in terms of spectrum adaptation in the EMIME scenario, even though automatically obtained transcriptions have a very high phoneme error rate.</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Liang_ICASSP_2010/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">A Comparison of Supervised and Unsupervised Cross-Lingual Speaker Adaptation Approaches for HMM-Based Speech Synthesis</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Liang, Hui</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Dines, John</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Saheer, Lakshmi</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">decision tree marginalization</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">HMM state mapping</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">unsupervised cross-lingual speaker adaptation</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2009/Liang_ICASSP_2010.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/Liang_Idiap-RR-05-2010</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing</subfield>
			<subfield code="c">Dallas, U.S.A.</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2010</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">March 2010</subfield>
		</datafield>
		<datafield tag="773" ind1=" " ind2=" ">
			<subfield code="c">4598-4601</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">The EMIME project aims to build a personalized speech-to-speech translator, such that spoken input of a user in one language is used to produce spoken output that still sounds like the user's voice however in another language. This distinctiveness makes unsupervised cross-lingual speaker adaptation one key to the project's success. So far, research has been conducted into unsupervised and cross-lingual cases separately by means of decision tree marginalization and HMM state mapping respectively. In this paper we combine the two techniques to perform unsupervised cross-lingual speaker adaptation. The performance of eight speaker adaptation systems (supervised vs. unsupervised, intra-lingual vs. cross-lingual) are compared using objective and subjective evaluations. Experimental results show the performance of unsupervised cross-lingual speaker adaptation is comparable to that of the supervised case in terms of spectrum adaptation in the EMIME scenario, even though automatically obtained transcriptions have a very high phoneme error rate.</subfield>
		</datafield>
	</record>
</collection>