%Aigaion2 BibTeX export from Idiap Publications
%Thursday 21 November 2024 12:44:01 PM

@INPROCEEDINGS{Linke_LREC_2022,
         author = {Linke, Julian and Garner, Philip N. and Kubin, Gernot and Schuppler, Barbara},
       projects = {Idiap},
          month = jun,
          title = {Conversational Speech Recognition Needs Data?  Experiments with Austrian German},
      booktitle = {Proceedings of the 13th Language Resources and Evaluation Conference},
           year = {2022},
          pages = {4684--4691},
   organization = {European Language Resources Association},
        address = {Marseille, France},
            url = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.500.pdf},
       abstract = {Conversational speech represents one of the most complex of automatic speech recognition (ASR) tasks owing to the high inter-speaker variation in both pronunciation and conversational dynamics. Such complexity is particularly sensitive to low-resourced (LR) scenarios. Recent developments in self-supervision have allowed such scenarios to take advantage of large amounts of otherwise unrelated data. In this study, we characterise an (LR) Austrian German conversational task. We begin with a non-pre-trained baseline and show that fine-tuning of a model pre-trained using self-supervision leads to improvements consistent with those in the literature; this extends to cases where a lexicon and language model are included. We also show that the advantage of pre-training indeed arises from the larger database rather than the self-supervision. Further, by use of a leave-one-conversation out technique, we demonstrate that robustness problems remain with respect to inter-speaker and inter-conversation variation. This serves to guide where future research might best be focused in light of the current state-of-the-art.}
}