<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Ullmann_INTERSPEECH_2015/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Objective Intelligibility Assessment of Text-to-Speech Systems Through Utterance Verification</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Ullmann, Raphael</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Rasipuram, Ramya</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">KL-divergence</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">KL-HMM</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">objective measures</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Speech intelligibility</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">text-to-speech synthesis</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">utterance verification</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2015/Ullmann_INTERSPEECH_2015.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/Ullmann_Idiap-RR-06-2015</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Proceedings of Interspeech</subfield>
			<subfield code="c">Dresden, Germany</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2015</subfield>
		</datafield>
		<datafield tag="773" ind1=" " ind2=" ">
			<subfield code="c">3501-3505</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://isca-speech.org/archive/interspeech_2015/i15_3501.html</subfield>
			<subfield code="z">URL</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Objective assessment of synthetic speech intelligibility can be a useful tool for the development of text-to-speech (TTS) systems, as it provides a reproducible and inexpensive alternative to subjective listening tests. In a recent work, it was shown that the intelligibility of synthetic speech could be assessed objectively by comparing two sequences of phoneme class conditional probabilities, corresponding to instances of synthetic and human reference speech, respectively. In this paper, we build on those findings to propose a novel approach that formulates objective intelligibility assessment as an utterance verification problem using hidden Markov models, thereby alleviating the need for human reference speech. Specifically, given each text input to the TTS system, the proposed approach automatically verifies the words in the output synthetic speech signal and estimates an intelligibility score based on word recall statistics. We evaluate the proposed approach on the 2011 Blizzard Challenge data, and show that the estimated scores and the subjective intelligibility scores are highly correlated (Pearson’s |R| = 0.94).</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Ullmann_Idiap-RR-06-2015/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Objective Intelligibility Assessment of Text-to-Speech Systems Through Utterance Verification</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Ullmann, Raphael</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Rasipuram, Ramya</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">KL-divergence</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">KL-HMM</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">objective measures</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Speech intelligibility</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">text-to-speech synthesis</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">utterance verification</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2015/Ullmann_Idiap-RR-06-2015.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-06-2015</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2015</subfield>
			<subfield code="b">Idiap</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">April 2015</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Objective assessment of synthetic speech intelligibility can be a useful tool for the development of text-to-speech (TTS) systems, as it provides a reproducible and inexpensive alternative to subjective listening tests. In a recent work, it was shown that the intelligibility of synthetic speech could be assessed objectively by comparing two sequences of phoneme class conditional probabilities, corresponding to instances of synthetic and human reference speech, respectively. In this paper, we build on those findings to propose a novel approach that formulates objective intelligibility assessment as an utterance verification problem using hidden Markov models, thereby alleviating the need for human reference speech. Specifically, given each text input to the TTS system, the proposed approach automatically verifies the words in the output synthetic speech signal and estimates an intelligibility score based on word recall statistics. We evaluate the proposed approach on the 2011 Blizzard Challenge data, and show that the estimated scores and the subjective intelligibility scores are highly correlated (Pearson’s |R| = 0.94).</subfield>
		</datafield>
	</record>
</collection>