%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 04:45:17 PM @ARTICLE{Motlicek_EURASIP_2015, author = {Motlicek, Petr and Imseng, David and Potard, Blaise and Garner, Philip N. and Himawan, Ivan}, projects = {Idiap, SAMSUNG, DBOX, SIIP}, month = jun, title = {Exploiting foreign resources for DNN-based ASR}, journal = {EURASIP Journal on Audio, Speech, and Music Processing}, number = {2015:17}, year = {2015}, doi = {10.1186/s13636-015-0058-5}, crossref = {Motlicek_Idiap-RR-27-2015}, abstract = {Manual transcription of audio databases for the development of automatic speech recognition (ASR) systems is a costly and time-consuming process. In the context of deriving acoustic models adapted to a specific application, or in low-resource scenarios, it is therefore essential to explore alternatives capable of improving speech recognition results. In this paper, we investigate the relevance of foreign data characteristics, in particular domain and language, when using this data as an auxiliary data source for training ASR acoustic models based on deep neural networks (DNNs). The acoustic models are evaluated on a challenging bilingual database within the scope of the MediaParl project. Experimental results suggest that in-language (but out-of-domain) data is more beneficial than in-domain (but out-of-language) data when employed in either supervised or semi-supervised training of DNNs. The best performing ASR system, an HMM/GMM acoustic model that exploits DNN as a discriminatively trained feature extractor outperforms the best performing HMM/DNN hybrid by about 5 \% relative (in terms of WER). An accumulated relative gain with respect to the MFCC-HMM/GMM baseline is about 30 \% WER.}, pdf = {https://publications.idiap.ch/attachments/papers/2015/Motlicek_EURASIP_2015.pdf} } crossreferenced publications: @TECHREPORT{Motlicek_Idiap-RR-27-2015, author = {Motlicek, Petr and Imseng, David and Potard, Blaise and Garner, Philip N. and Himawan, Ivan}, keywords = {Acoustic model adaptation, Automatic Speech Recognition, Deep learning for speech, Semi-supervised training}, projects = {Idiap, SAMSUNG, DBOX, SIIP}, month = {7}, title = {Exploiting foreign resources for DNN-based ASR}, type = {Idiap-RR}, number = {Idiap-RR-27-2015}, year = {2015}, institution = {Idiap}, address = {Rue Marconi 19}, abstract = {Manual transcription of audio databases for the development of automatic speech recognition (ASR) systems is a costly and time-consuming process. In the context of deriving acoustic models adapted to a specific application, or in low-resource scenarios, it is therefore essential to explore alternatives capable of improving speech recognition results. In this paper, we investigate the relevance of foreign data characteristics, in particular domain and language, when using this data as an auxiliary data source for training ASR acoustic models based on deep neural networks (DNNs). The acoustic models are evaluated on a challenging bilingual database within the scope of the MediaParl project. Experimental results suggest that in-language (but out-of-domain) data is more beneficial than in-domain (but out-of-language) data when employed in either supervised or semi-supervised training of DNNs. The best performing ASR system, an HMM/GMM acoustic model that exploits DNN as a discriminatively trained feature extractor outperforms the best performing HMM/DNN hybrid by about 5\% relative (in terms of WER). An accumulated relative gain with respect to the MFCC-HMM/GMM baseline is about 30\% WER.}, pdf = {https://publications.idiap.ch/attachments/reports/2015/Motlicek_Idiap-RR-27-2015.pdf} }