%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 12:44:01 PM @INPROCEEDINGS{Linke_LREC_2022, author = {Linke, Julian and Garner, Philip N. and Kubin, Gernot and Schuppler, Barbara}, projects = {Idiap}, month = jun, title = {Conversational Speech Recognition Needs Data? Experiments with Austrian German}, booktitle = {Proceedings of the 13th Language Resources and Evaluation Conference}, year = {2022}, pages = {4684--4691}, organization = {European Language Resources Association}, address = {Marseille, France}, url = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.500.pdf}, abstract = {Conversational speech represents one of the most complex of automatic speech recognition (ASR) tasks owing to the high inter-speaker variation in both pronunciation and conversational dynamics. Such complexity is particularly sensitive to low-resourced (LR) scenarios. Recent developments in self-supervision have allowed such scenarios to take advantage of large amounts of otherwise unrelated data. In this study, we characterise an (LR) Austrian German conversational task. We begin with a non-pre-trained baseline and show that fine-tuning of a model pre-trained using self-supervision leads to improvements consistent with those in the literature; this extends to cases where a lexicon and language model are included. We also show that the advantage of pre-training indeed arises from the larger database rather than the self-supervision. Further, by use of a leave-one-conversation out technique, we demonstrate that robustness problems remain with respect to inter-speaker and inter-conversation variation. This serves to guide where future research might best be focused in light of the current state-of-the-art.} }