%Aigaion2 BibTeX export from Idiap Publications
%Friday 05 December 2025 03:40:19 PM
@TECHREPORT{Rangappa_Idiap-RR-09-2025,
author = {Rangappa, Pradeep and Prasad, Amrutha and Madikeri, Srikanth and Motlicek, Petr},
keywords = {DISPLACE-2, ECAPA-TDNN embedding, local speaker segmentation, Speaker Diarization},
projects = {EC H2020-ROXANNE, ELOQUENCE},
mainresearchprogram = {Human-AI Teaming},
month = {8},
title = {Enhancing Speaker Diarization using Correlation-Based Clustering Initialization},
type = {Idiap-RR},
number = {Idiap-RR-09-2025},
year = {2025},
institution = {Idiap},
abstract = {Speaker diarization becomes challenging in multilingual and code-switched speech due to frequent speaker changes and acoustic variability. While PyAnnote achieves state-of-the-art performance on standard benchmarks, its effectiveness drops on complex datasets like DISPLACE-2. To address this issue, we propose to improve the performance of the global agglomerative clustering by improving the input embeddings. Specifically, we enhance the embeddings by analyzing their pairwise correlations and averaging highly correlated embeddings. This approach improves speaker representation for highly correlated embeddings while reducing speaker confusion and improving clustering accuracy. Evaluated on DISPLACE-2 Track-1 (multilingual speaker diarization), our method shows a 3\% relative DER improvement over the baseline, and 8\% when combined with segmentation fine-tuning. Notably, the approach reduces DER in rapid turn-taking and language transition regions, improving robustness in code-mixed speech.},
pdf = {https://publications.idiap.ch/attachments/reports/2025/Rangappa_Idiap-RR-09-2025.pdf}
}