%Aigaion2 BibTeX export from Idiap Publications
%Thursday 21 November 2024 11:59:24 AM

@TECHREPORT{Lazaridis_Idiap-RR-12-2016,
         author = {Lazaridis, Alexandros and Cernak, Milos and Garner, Philip N.},
       keywords = {deep neural networks, probabilistic amplitude demodulation, speech prosody, speech synthesis},
       projects = {Idiap, SIWIS, SCOPES-SP2},
          month = {4},
          title = {Probabilistic Amplitude Demodulation features in Speech Synthesis for Improving Prosody},
           type = {Idiap-RR},
         number = {Idiap-RR-12-2016},
           year = {2016},
    institution = {Idiap},
       abstract = {Abstract Amplitude demodulation (AM) is a signal decomposition technique by which a signal can be decomposed to a product of two signals, i.e, a quickly varying carrier and a slowly varying modulator. In this work, the probabilistic amplitude demodulation (PAD) features are used to improve prosody in speech synthesis. The PAD is applied iteratively for generating syllable and stress amplitude modulations in a cascade manner. The PAD features are used as a secondary input scheme along with the standard text-based input features in statistical parametric speech syn- thesis. Specifically, deep neural network (DNN)-based speech synthesis is used to evaluate the importance of these features. Objective evaluation has shown that the proposed system using the PAD features has improved mainly prosody modelling; it outperforms the baseline system by approximately 5\% in terms of relative reduction in root mean square error (RMSE) of the fundamental frequency (F0). The significance of this improvement is validated by subjective evaluation of the overall speech quality, achieving 38.6\% over 19.5\% preference score in respect to the baseline system, in an ABX test.},
            pdf = {https://publications.idiap.ch/attachments/reports/2016/Lazaridis_Idiap-RR-12-2016.pdf}
}