%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 12:18:03 PM @INPROCEEDINGS{ajmera2002icslp, author = {Ajmera, Jitendra and Bourlard, Herv{\'{e}} and Lapidot, I. and McCowan, Iain A.}, projects = {Idiap}, title = {Unknown-Multiple Speaker clustering using HMM}, booktitle = {ICSLP}, year = {2002}, address = {Denver, Colorado}, note = {IDIAP-RR 02-07}, crossref = {ajmera-rr-02-07}, abstract = {An HMM-based speaker clustering framework is presented, where the number of speakers and segmentation boundaries are unknown \emph{a priori}. Ideally, the system aims to create one pure cluster for each speaker. The HMM is ergodic in nature with a minimum duration topology. The final number of clusters is determined automatically by merging closest clusters and retraining this new cluster, until a decrease in likelihood is observed. In the same framework, we also examine the effect of using only the features from highly voiced frames as a means of improving the robustness and computational complexity of the algorithm. The proposed system is assessed on the 1996 HUB-4 evaluation test set in terms of both cluster and speaker purity. It is shown that the number of clusters found often correspond to the actual number of speakers.}, pdf = {https://publications.idiap.ch/attachments/reports/2002/ajmera2002icslp.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2002/ajmera2002icslp.ps}, ipdmembership={speech}, } crossreferenced publications: @TECHREPORT{ajmera-rr-02-07, author = {Ajmera, Jitendra and Bourlard, Herv{\'{e}} and Lapidot, I. and McCowan, Iain A.}, projects = {Idiap}, title = {Unknown-Multiple Speaker clustering using HMM}, type = {Idiap-RR}, number = {Idiap-RR-07-2002}, year = {2002}, institution = {IDIAP}, address = {Martigny, Switzerland}, note = {ICSLP, Denver, Colorado, 2002}, abstract = {An HMM-based speaker clustering framework is presented, where the number of speakers and segmentation boundaries are unknown \emph{a priori}. Ideally, the system aims to create one pure cluster for each speaker. The HMM is ergodic in nature with a minimum duration topology. The final number of clusters is determined automatically by merging closest clusters and retraining this new cluster, until a decrease in likelihood is observed. In the same framework, we also examine the effect of using only the features from highly voiced frames as a means of improving the robustness and computational complexity of the algorithm. The proposed system is assessed on the 1996 HUB-4 evaluation test set in terms of both cluster and speaker purity. It is shown that the number of clusters found often correspond to the actual number of speakers.}, pdf = {https://publications.idiap.ch/attachments/reports/2002/rr02-07.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2002/rr02-07.ps.gz}, ipdinar={2002}, ipdmembership={speech}, language={English}, }