<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Janbakhshi_ICASSP_2021/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">AUTOMATIC DYSARTHRIC SPEECH DETECTION EXPLOITING PAIRWISE DISTANCE-BASED CONVOLUTIONAL NEURAL NETWORKS</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Janbakhshi, Parvaneh</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Kodrasi, Ina</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2021/Janbakhshi_ICASSP_2021.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/Janbakhshi_Idiap-RR-32-2020</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">45th International Conference on Acoustics, Speech, and Signal Processing</subfield>
			<subfield code="c">Toronto, Canada</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2021</subfield>
		</datafield>
		<datafield tag="773" ind1=" " ind2=" ">
			<subfield code="c">7328–7332</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Janbakhshi_Idiap-RR-32-2020/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">AUTOMATIC DYSARTHRIC SPEECH DETECTION EXPLOITING PAIRWISE DISTANCE-BASED CONVOLUTIONAL NEURAL NETWORKS</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Janbakhshi, Parvaneh</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Kodrasi, Ina</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2020/Janbakhshi_Idiap-RR-32-2020.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-32-2020</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2020</subfield>
			<subfield code="b">Idiap</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">December 2020</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">Submitted</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Automatic dysarthric speech detection can provide reliable and cost-effective computer-aided tools to assist the clinical diagnosis and management of dysarthria. In this paper we propose a novel automatic dysarthric speech detection approach based on analyses of pairwise distance matrices using convolutional neural networks (CNNs). We represent utterances through articulatory posteriors and consider pairs of phonetically-balanced representations, with one representation from a healthy speaker (i.e., the reference
representation) and the other representation from the test speaker (i.e., test representation). Given such pairs of reference and test representations, features are first extracted using a feature extraction front-end, a frame-level distance matrix is computed, and the obtained distance matrix is considered as an image by a CNN-based binary classifier. The feature extraction, distance matrix computation, and CNN-based classifier are jointly optimized in an end-to-end framework. Experimental results on two databases of healthy and dysarthric speakers for different languages and pathologies show that the proposed approach yields a high dysarthric speech detection performance, outperforming other CNN-based baseline approaches.</subfield>
		</datafield>
	</record>
</collection>