%Aigaion2 BibTeX export from Idiap Publications
%Saturday 06 December 2025 12:57:45 AM
@INPROCEEDINGS{Vlasenko_INTERSPEECH_2025,
author = {Vlasenko, Bogdan and Magimai-Doss, Mathew},
keywords = {emotional prosody, Multilingual, Multimodal, sentence mode prediction},
projects = {Idiap, IICT},
mainresearchprogram = {AI for Everyone},
additionalresearchprograms = {Human-AI Teaming, AI for Everyone},
title = {Multimodal Prosody Modeling: A Use Case for Multilingual Sentence Mode Prediction},
booktitle = {Proceedings of Interspeech},
year = {2025},
abstract = {Prosody modeling has garnered significant attention from the speech processing community. Recent developments in multilingual latent spaces for representing linguistic and acoustic information have become a new trend in various research directions. Therefore, we decided to evaluate the ability of multilingual acoustic neural embeddings and knowledge-based features to preserve sentence-mode-related information at the suprasegmental level. For linguistic information modeling, we selected neural embeddings based on word- and phoneme-level latent space representations. The experimental study was conducted using Italian, French, and German audiobook recordings, as well as emotional speech samples from EMO-DB. Both intra- and inter-language experimental protocols were used to assess classification performance for uni- and multimodal (early fusion approach) features. For comparison, we used a sentence mode prediction system built on top of automatically generated WHISPER-based transcripts.},
pdf = {https://publications.idiap.ch/attachments/papers/2025/Vlasenko_INTERSPEECH_2025.pdf}
}