%Aigaion2 BibTeX export from Idiap Publications
%Thursday 18 July 2024 03:48:23 PM

@INPROCEEDINGS{ajmera2002icslp,
         author = {Ajmera, Jitendra and Bourlard, Herv{\'{e}} and Lapidot, I. and McCowan, Iain A.},
       projects = {Idiap},
          title = {Unknown-Multiple Speaker clustering using HMM},
      booktitle = {ICSLP},
           year = {2002},
        address = {Denver, Colorado},
           note = {IDIAP-RR 02-07},
       crossref = {ajmera-rr-02-07},
       abstract = {An HMM-based speaker clustering framework is presented, where the number of speakers and segmentation boundaries are unknown \emph{a priori}. Ideally, the system aims to create one pure cluster for each speaker. The HMM is ergodic in nature with a minimum duration topology. The final number of clusters is determined automatically by merging closest clusters and retraining this new cluster, until a decrease in likelihood is observed. In the same framework, we also examine the effect of using only the features from highly voiced frames as a means of improving the robustness and computational complexity of the algorithm. The proposed system is assessed on the 1996 HUB-4 evaluation test set in terms of both cluster and speaker purity. It is shown that the number of clusters found often correspond to the actual number of speakers.},
            pdf = {https://publications.idiap.ch/attachments/reports/2002/ajmera2002icslp.pdf},
     postscript = {ftp://ftp.idiap.ch/pub/reports/2002/ajmera2002icslp.ps},
ipdmembership={speech},
}



crossreferenced publications: 
@TECHREPORT{ajmera-rr-02-07,
         author = {Ajmera, Jitendra and Bourlard, Herv{\'{e}} and Lapidot, I. and McCowan, Iain A.},
       projects = {Idiap},
          title = {Unknown-Multiple Speaker clustering using HMM},
           type = {Idiap-RR},
         number = {Idiap-RR-07-2002},
           year = {2002},
    institution = {IDIAP},
        address = {Martigny, Switzerland},
           note = {ICSLP, Denver, Colorado, 2002},
       abstract = {An HMM-based speaker clustering framework is presented, where the number of speakers and segmentation boundaries are unknown \emph{a priori}. Ideally, the system aims to create one pure cluster for each speaker. The HMM is ergodic in nature with a minimum duration topology. The final number of clusters is determined automatically by merging closest clusters and retraining this new cluster, until a decrease in likelihood is observed. In the same framework, we also examine the effect of using only the features from highly voiced frames as a means of improving the robustness and computational complexity of the algorithm. The proposed system is assessed on the 1996 HUB-4 evaluation test set in terms of both cluster and speaker purity. It is shown that the number of clusters found often correspond to the actual number of speakers.},
            pdf = {https://publications.idiap.ch/attachments/reports/2002/rr02-07.pdf},
     postscript = {ftp://ftp.idiap.ch/pub/reports/2002/rr02-07.ps.gz},
ipdinar={2002},
ipdmembership={speech},
language={English},
}