%Aigaion2 BibTeX export from Idiap Publications
%Thursday 21 November 2024 01:19:12 PM

@ARTICLE{Cernak_TASLP_2015,
         author = {Cernak, Milos and Garner, Philip N. and Lazaridis, Alexandros and Motlicek, Petr and Na, Xingyu},
       projects = {Idiap},
          month = jun,
          title = {Incremental Syllable-Context Phonetic Vocoding},
        journal = {IEEE/ACM TRANSACTIONS ON AUDIO, SPEECH, AND LANGUAGE PROCESSING},
         volume = {23},
         number = {6},
           year = {2015},
            url = {https://www.idiap.ch/paper/3107},
       crossref = {Cernak_Idiap-RR-05-2015},
            pdf = {https://publications.idiap.ch/attachments/papers/2015/Cernak_TASLP_2015.pdf}
}



crossreferenced publications: 
@TECHREPORT{Cernak_Idiap-RR-05-2015,
         author = {Cernak, Milos and Garner, Philip N. and Lazaridis, Alexandros and Motlicek, Petr and Na, Xingyu},
       keywords = {parametric speech synthesis, Very low bit rate speech coding},
       projects = {Idiap, armasuisse},
          month = {2},
          title = {Incremental Syllable-Context Phonetic Vocoding},
           type = {Idiap-RR},
         number = {Idiap-RR-05-2015},
           year = {2015},
    institution = {Idiap},
       abstract = {Current very low bit rate speech coders are, due to complexity limitations, designed to work off-line.
This paper investigates incremental speech coding that operates real-time and incrementally (i.e., encoded speech depends only on already-uttered speech without the need of future speech information).
Since human speech communication is asynchronous (i.e., different information flows being simultaneously processed), we hypothesised that such an incremental speech coder should also operate asynchronously. To accomplish this task, we describe speech coding that reflects the human cortical temporal
sampling that packages information into units of different temporal granularity, such as phonemes and
syllables, in parallel. More specifically, a phonetic vocoder — cascaded speech recognition and synthesis
systems — extended with syllable-based information transmission mechanisms is investigated. There
are two main aspects evaluated in this work, the synchronous and asynchronous coding. Synchronous
coding refers to the case when the phonetic vocoder and speech generation process depend on the syllable boundaries during encoding and decoding respectively. On the other hand, asynchronous coding
refers to the case when the phonetic encoding and speech generation processes are done independently
of the syllable boundaries. Our experiments confirmed that the asynchronous incremental speech coding performs better, in terms of intelligibility and overall speech quality, mainly due to better alignment
of the segmental and prosodic information. The proposed vocoding operates at an uncompressed bit
rate of 213 bits/sec and achieves an average communication delay of 243 ms.},
            pdf = {https://publications.idiap.ch/attachments/reports/2014/Cernak_Idiap-RR-05-2015.pdf}
}