%Aigaion2 BibTeX export from Idiap Publications
%Saturday 21 December 2024 05:08:33 PM

@ARTICLE{Schnell_SPECOM_2022,
         author = {Schnell, Bastian and Garner, Philip N.},
       projects = {NAST, MASS},
          month = mar,
          title = {Investigating a neural all pass warp in modern TTS applications},
        journal = {Speech Communication},
         volume = {138},
           year = {2022},
          pages = {26--37},
           note = {Open Access},
            doi = {10.1016/j.specom.2021.12.002},
       abstract = {We present a neural implementation of the all pass warp (APW) previously used for vocal tract length normalisation. This includes an efficient back-propagation, which can easily be integrated in modern neural network frameworks. The APW offers a low-dimensional control to alter the spectrum, which by design generalises over different speakers. We investigate the APW in two tasks required for future dialogue or translation agents, and provide a fairly thorough literature review for both: (1) Zero-shot speaker adaptation to allow keeping the source speaker identity with very small amounts of data. Experiments show increased speaker similarity and prove that the APW increases the generalisability of a multi-speaker model. (2) Emotional speech synthesis to translate or produce affective cues. To the best of our knowledge this is the first attempt on emotional speech synthesis with an APW. While the APW is not able to increase expressiveness or audio quality, our analysis shows that the warping correlates with the level of valence in the emotion. This work should enable future research on emotion translation during machine translation.},
            pdf = {https://publications.idiap.ch/attachments/papers/2022/Schnell_SPECOM_2022.pdf}
}