%Aigaion2 BibTeX export from Idiap Publications
%Sunday 27 April 2025 02:07:02 AM

@INPROCEEDINGS{Kocour_9THOPENSKYSYMPOSIUM2020_2021,
         author = {Kocour, Martin and Vesely, Karel and Szoke, Igor and Kesiraju, Santosh and Zuluaga-Gomez, Juan and Alexander, Blatt and Prasad, Amrutha and Iuliia, Nigmatulina and Motlicek, Petr and et al.},
       keywords = {Air traffic control, Automatic Speech Recognition, Contextual Adaptation, language identification, named entity recognition, OpenSky Network},
       projects = {Idiap, EC H2020- ATCO2},
          month = nov,
          title = {Automatic processing pipeline for collecting and annotating air-traffic voice communication data},
      booktitle = {Proceedings of 9th OpenSky Symposium 2020},
           year = {2021},
          pages = {1-9},
      publisher = {MDPI},
       location = {Brussels, Belgium},
   organization = {OpenSky Network},
       abstract = {This document describes our pipeline for automatic processing of the ATCO --  pilot audio communication we developed as part of the ATCO2 project. So far we collected two thousand hours of audio recordings that we either pre-process for the transcribers or use the data for semi-supervised training. Both ways of using the collected data can further improve our pipeline by retraining our models.
The proposed automatic processing pipeline is a cascade of many stand-alone components, namely: a) segmentation, b) volume control, c) signal-to-noise ratio filtering, d) diarization, e) 'speech-to-text' (ASR) module, f) English language detection, g) call-sign code recognition, h) ATCO -- pilot classification and i) highlighting the commands and values. %COMMAND_VALUE_REFERENCE
The key component of the pipeline is a speech-to-text transcription system that has to be trained with the real-world ATC data, otherwise, the performance is poor. To further improve the speech-to-text performance, we apply both the semi-supervised training with our recordings, and the contextual adaptation that uses a list of plausible call-signs from surveillance data as auxiliary information. The downstream NLP/NLU tasks are important from the application point of view. These application tasks need accurate models operating on top of the real speech-to-text output, so there is a need for more data too. And creating the ATC data is the main aspiration of the ATCO$^2$ project. At the end of the project, the data will be packaged and distributed by ELDA.},
            pdf = {https://publications.idiap.ch/attachments/papers/2021/Kocour_9THOPENSKYSYMPOSIUM2020_2021.pdf}
}