%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 12:01:08 PM @INPROCEEDINGS{aradilla:icslp:2006, author = {Aradilla, Guillermo and Vepa, Jithendra and Bourlard, Herv{\'{e}}}, projects = {Idiap}, title = {Using Posterior-Based Features in Template Matching for Speech Recognition}, booktitle = {International Conference on Spoken Language Processing}, year = {2006}, note = {IDIAP-RR 06-23}, crossref = {aradilla:rr06-23}, abstract = {Given the availability of large speech corpora, as well as the increasing of memory and computational resources, the use of template matching approaches for automatic speech recognition (ASR) have recently attracted new attention. In such template-based approaches, speech is typically represented in terms of acoustic vector sequences, using spectral-based features such as MFCC of PLP, and local distances are usually based on Euclidean or Mahalanobis distances. In the present paper, we further investigate template-based ASR and show (on a continuous digit recognition task) that the use of posterior-based features significantly improves the standard template-based approaches, yielding to systems that are very competitive to state-of-the-art HMMs, even when using a very limited number (e.g., 10) of reference templates. Since those posteriors-based features can also be interpreted as a probability distribution, we also show that using Kullback-Leibler (KL) divergence as a local distance further improves the performance of the template-based approach, now beating state-of-the-art of more complex posterior-based HMMs systems (usually referred to as "Tandem").}, pdf = {https://publications.idiap.ch/attachments/papers/2006/aradilla-icslp-2006.pdf}, postscript = {ftp://ftp.idiap.ch/pub/papers/2006/aradilla-icslp-2006.ps.gz}, ipdmembership={speech}, } crossreferenced publications: @TECHREPORT{aradilla:rr06-23, author = {Aradilla, Guillermo and Vepa, Jithendra and Bourlard, Herv{\'{e}}}, projects = {Idiap}, title = {Using Posterior-Based Features in Template Matching for Speech Recognition}, type = {Idiap-RR}, number = {Idiap-RR-23-2006}, year = {2006}, institution = {IDIAP}, note = {Published in ICSLP 2006}, abstract = {Given the availability of large speech corpora, as well as the increasing of memory and computational resources, the use of template matching approaches for automatic speech recognition (ASR) have recently attracted new attention. In such template-based approaches, speech is typically represented in terms of acoustic vector sequences, using spectral-based features such as MFCC of PLP, and local distances are usually based on Euclidean or Mahalanobis distances. In the present paper, we further investigate template-based ASR and show (on a continuous digit recognition task) that the use of posterior-based features significantly improves the standard template-based approaches, yielding to systems that are very competitive to state-of-the-art HMMs, even when using a very limited number (e.g., 10) of reference templates. Since those posteriors-based features can also be interpreted as a probability distribution, we also show that using Kullback-Leibler (KL) divergence as a local distance further improves the performance of the template-based approach, now beating state-of-the-art of more complex posterior-based HMMs systems (usually referred to as "Tandem").}, pdf = {https://publications.idiap.ch/attachments/reports/2006/aradilla-idiap-rr-06-23.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2006/aradilla-idiap-rr-06-23.ps.gz}, ipdmembership={speech}, }