%Aigaion2 BibTeX export from Idiap Publications %Saturday 21 December 2024 05:52:03 PM @ARTICLE{weber-ar-01-42, author = {Weber, Katrin and Ikbal, Shajith and Bengio, Samy and Bourlard, Herv{\'{e}}}, projects = {Idiap}, title = {{R}obust {S}peech {R}ecognition and {F}eature Extraction Using {HMM2}}, journal = {Computer Speech & Language}, volume = {17}, number = {2-3}, year = {2003}, crossref = {weber-rr-01-42}, abstract = {This paper presents the theoretical basis and preliminary experimental results of a new HMM model, referred to as HMM2, which can be considered as a mixture of HMMs. In this new model, the emission probabilities of the temporal (primary) HMM are estimated through secondary, state specific, HMMs working in the acoustic feature space. Thus, while the primary HMM is performing the usual time warping and integration, the secondary HMMs are responsible for extracting/modeling the possible feature dependencies, while performing frequency warping and integration. Such a model has several potential advantages, such as a more flexible modeling of the time/frequency structure of the speech signal. When working with spectral features, such a system can also perform nonlinear spectral warping, effectively implementing a form of nonlinear vocal tract normalization. Furthermore, it will be shown that HMM2 can be used to extract noise robust features, supposed to correspond to formant regions, which can be used as extra features for traditional HMM recognizers to improve their performance. These issues are evaluated in the present paper, and different experimental results are reported on the Numbers95 database.}, ipdmembership={speech}, } crossreferenced publications: @TECHREPORT{weber-rr-01-42, author = {Weber, Katrin and Ikbal, Shajith and Bengio, Samy and Bourlard, Herv{\'{e}}}, projects = {Idiap}, title = {{R}obust {S}peech {R}ecognition and {F}eature Extraction Using {HMM2}}, type = {Idiap-RR}, number = {Idiap-RR-42-2001}, year = {2001}, institution = {IDIAP}, address = {Martigny, Switzerland}, note = {Published in Computer Speech and Language}, abstract = {This paper presents the theoretical basis and preliminary experimental results of a new HMM model, referred to as HMM2, which can be considered as a mixture of HMMs. In this new model, the emission probabilities of the temporal (primary) HMM are estimated through secondary, state specific, HMMs working in the acoustic feature space. Thus, while the primary HMM is performing the usual time warping and integration, the secondary HMMs are responsible for extracting/modeling the possible feature dependencies, while performing frequency warping and integration. Such a model has several potential advantages, such as a more flexible modeling of the time/frequency structure of the speech signal. When working with spectral features, such a system can also perform nonlinear spectral warping, effectively implementing a form of nonlinear vocal tract normalization. Furthermore, it will be shown that HMM2 can be used to extract noise robust features, supposed to correspond to formant regions, which can be used as extra features for traditional HMM recognizers to improve their performance. These issues are evaluated in the present paper, and different experimental results are reported on the Numbers95 database.}, pdf = {https://publications.idiap.ch/attachments/reports/2001/rr01-42.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2001/rr01-42.ps.gz}, ipdinar={2001}, ipdmembership={speech}, language={English}, }