%Aigaion2 BibTeX export from Idiap Publications
%Thursday 21 November 2024 11:53:21 AM

@INPROCEEDINGS{Imseng_ICASSP_2010,
         author = {Imseng, David and Friedland, Gerald},
       keywords = {Gaussian Mixture Models, Prosodic features, Speaker Diarization},
       projects = {Idiap, IM2, AMIDA},
          month = {3},
          title = {An Adaptive Initialization Method for Speaker Diarization based on Prosodic Features},
      booktitle = {Proceedings IEEE International Conference on Acoustics, Speech and Signal Processing},
           year = {2010},
          pages = {4946-4949},
       location = {Dallas, USA},
       crossref = {Imseng_Idiap-RR-02-2010},
       abstract = {The following article presents a novel, adaptive initialization scheme that can be applied to most state-ofthe-art Speaker Diarization algorithms, i.e. algorithms that use agglomerative hierarchical clustering with Bayesian Information Criterion (BIC) and Gaussian Mixture Models (GMMs) of frame-based cepstral features (MFCCs). The initialization method is a combination of the recently proposed {\^{a}}€{\oe}adaptive seconds per Gaussian{\^{a}}€ (ASPG) method and a new pre-clustering and number of initial clusters estimation method based on prosodic features. The presented initialization method has two important advantages. First, the method requires no manual tuning and is robust against file length and speaker count variations. Second, the method outperforms our previously used initialization methods on all benchmark files that were presented in the 2006, 2007, and 2009 NIST Rich Transcription (RT) evaluations and results in a Diarization Error Rate (DER) improvement of up to 67\% (relative).},
            pdf = {https://publications.idiap.ch/attachments/papers/2010/Imseng_ICASSP_2010.pdf}
}



crossreferenced publications: 
@TECHREPORT{Imseng_Idiap-RR-02-2010,
         author = {Imseng, David and Friedland, Gerald},
       projects = {Idiap, IM2, AMIDA},
          month = {1},
          title = {An Adaptive Initialization Method for Speaker Diarization based on Prosodic Features},
           type = {Idiap-RR},
         number = {Idiap-RR-02-2010},
           year = {2010},
    institution = {Idiap},
       abstract = {The following article presents a novel, adaptive initialization scheme that can be applied to most state-ofthe-art Speaker Diarization algorithms, i.e. algorithms that use agglomerative hierarchical clustering with Bayesian Information Criterion (BIC) and Gaussian Mixture Models (GMMs) of frame-based cepstral features (MFCCs). The initialization method is a combination of the recently proposed {\^{a}}€{\oe}adaptive seconds per Gaussian{\^{a}}€ (ASPG) method and a new pre-clustering and number of initial clusters estimation method based on prosodic features. The presented initialization method has two important advantages. First, the method requires no manual tuning and is robust against file length and speaker count variations. Second, the method outperforms our previously used initialization methods on all benchmark files that were presented in the 2006, 2007, and 2009 NIST Rich Transcription (RT) evaluations and results in a Diarization Error Rate (DER) improvement of up to 67\% (relative).},
            pdf = {https://publications.idiap.ch/attachments/reports/2009/Imseng_Idiap-RR-02-2010.pdf}
}