%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 03:23:09 PM @INPROCEEDINGS{BenZeghiba_eurospeech-03, author = {BenZeghiba, Mohamed Faouzi and Bourlard, Herv{\'{e}}}, projects = {Idiap}, title = {On the {C}ombination of {S}peech and {S}peaker {R}ecognition}, booktitle = {European Conference On Speech, Communication and Technology (EUROSPEECH'03)}, year = {2003}, address = {Geneva, Switzerland}, note = {IDIAP-RR 03-19}, crossref = {benzeghiba-03-19}, abstract = {This paper investigates an approach that maximizes the joint posterior probabil ity of the pronounced word and the speaker identity given the observed data. This probability can be expressed as a product of the posterior probability of the pronounced word estimated through an artificial neural network (ANN,',','), and the likelihood of the data estimated through a Gaussian mixture model (GMM). We show that the posterior probabilities estimated through a speaker-dependent ANN, as usually done in the hybrid HMM/ANN systems, are reliable for speech recognition but they are less reliable for speaker recognition. To alleviate this problem, we thus study how this posterior probability can be combined with the likelihood derived from a speaker-dependent GMM model to improve the speaker recognition performance. We thus end up with a joint model that can be used for text-dependent speaker identification and for speech recognition (and mutually benefiting from each other).}, pdf = {https://publications.idiap.ch/attachments/reports/2003/rr03-19.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2003/rr03-19.ps.gz}, ipdmembership={speech}, } crossreferenced publications: @TECHREPORT{BenZeghiba-03-19, author = {BenZeghiba, Mohamed Faouzi and Bourlard, Herv{\'{e}}}, projects = {Idiap}, title = {On the {C}ombination of {S}peech and {S}peaker {R}ecognition}, type = {Idiap-RR}, number = {Idiap-RR-19-2003}, year = {2003}, institution = {IDIAP}, note = {in Proceedings of the 8th European Conference on Speech,Communication and Technology (EUROSPEECH'03)}, abstract = {This paper investigates an approach that maximizes the joint posterior probabil ity of the pronounced word and the speaker identity given the observed data. This probability can be expressed as a product of the posterior probability of the pronounced word estimated through an artificial neural network (ANN,',','), and the likelihood of the data estimated through a Gaussian mixture model (GMM). We show that the posterior probabilities estimated through a speaker-dependent ANN, as usually done in the hybrid HMM/ANN systems, are reliable for speech recognition but they are less reliable for speaker recognition. To alleviate this problem, we thus study how this posterior probability can be combined with the likelihood derived from a speaker-dependent GMM model to improve the speaker recognition performance. We thus end up with a joint model that can be used for text-dependent speaker identification and for speech recognition (and mutually benefiting from each other).}, pdf = {https://publications.idiap.ch/attachments/reports/2003/rr03-19.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2003/rr03-19.ps.gz}, ipdmembership={speech}, }