%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 12:32:22 PM @INPROCEEDINGS{Dey_INTERSPEECH2019_2019, author = {Dey, Subhadeep and Motlicek, Petr and Bui, Trung and Dernoncourt, Franck}, projects = {Innosuisse-SM2}, title = {Exploiting semi-supervised training through a dropout regularization in end-to-end speech recognition}, booktitle = {Proc. of Interspeech 2019}, year = {2019}, abstract = {In this paper, we explore various approaches for semi- supervised learning in an end-to-end automatic speech recog- nition (ASR) framework. The first step in our approach in- volves training a seed model on the limited amount of labelled data. Additional unlabelled speech data is employed through a data-selection mechanism to obtain the best hypothesized out- put, further used to retrain the seed model. However, uncer- tainties of the model may not be well captured with a single hypothesis. As opposed to this technique, we apply a dropout mechanism to capture the uncertainty by obtaining multiple hy- pothesized text transcripts of an speech recording. We assume that the diversity of automatically generated transcripts for an utterance will implicitly increase the reliability of the model. Finally, the data-selection process is also applied on these hy- pothesized transcripts to reduce the uncertainty. Experiments on freely-available TEDLIUM corpus and proprietary Adobe’s internal dataset show that the proposed approach significantly reduces ASR errors, compared to the baseline model.} }