%Aigaion2 BibTeX export from Idiap Publications %Sunday 22 December 2024 02:50:49 AM @INPROCEEDINGS{stephenson02e, author = {Stephenson, Todd Andrew and Magimai.-Doss, Mathew and Bourlard, Herv{\'{e}}}, projects = {Idiap}, month = {9}, title = {Auxiliary Variables in Conditional {G}aussian Mixtures for Automatic Speech Recognition}, booktitle = {Seventh International Conference on Spoken Language Processing (ICSLP~2002)}, volume = {4}, year = {2002}, address = {Denver, CO, USA}, crossref = {stephenson02d}, abstract = {In previous work, we presented a case study using an estimated pitch value as the conditioning variable in conditional Gaussians that showed the utility of hiding the pitch values in certain situations or in modeling it independently of the hidden state in others. Since only single conditional Gaussians were used in that work, we extend that work here to using conditional Gaussian mixtures in the emission distributions to make this work more comparable to state-of-the-art automatic speech recognition. We also introduce a rate-of-speech (ROS) variable within the conditional Gaussian mixtures. We find that, under the current methods, using observed pitch or ROS in the recognition phase does not provide improvement. However, systems trained on pitch or ROS may provide improvement in the recognition phase over the baseline when the pitch or ROS is marginalized out.}, pdf = {https://publications.idiap.ch/attachments/papers/2002/todd-icslp2002.pdf}, postscript = {ftp://ftp.idiap.ch/pub/papers/2002/todd-icslp2002.ps.gz}, ipdmembership={speech}, } crossreferenced publications: @TECHREPORT{stephenson02d, author = {Stephenson, Todd Andrew and Magimai.-Doss, Mathew and Bourlard, Herv{\'{e}}}, projects = {Idiap}, title = {Auxiliary Variables in Conditional {G}aussian Mixtures for Automatic Speech Recognition}, type = {Idiap-RR}, number = {Idiap-RR-25-2002}, year = {2002}, institution = {IDIAP}, note = {In ``Seventh International Conference on Spoken Language Processing (ICSLP~2002)'', 2002}, abstract = {In previous work, we presented a case study using an estimated pitch value as the conditioning variable in conditional Gaussians that showed the utility of hiding the pitch values in certain situations or in modeling it independently of the hidden state in others. Since only single conditional Gaussians were used in that work, we extend that work here to using conditional Gaussian mixtures in the emission distributions to make this work more comparable to state-of-the-art automatic speech recognition. We also introduce a rate-of-speech (ROS) variable within the conditional Gaussian mixtures. We find that, under the current methods, using observed pitch or ROS in the recognition phase does not provide improvement. However, systems trained on pitch or ROS may provide improvement in the recognition phase over the baseline when the pitch or ROS is marginalized out.}, pdf = {https://publications.idiap.ch/attachments/reports/2002/rr02-25.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2002/rr02-25.ps.gz}, ipdmembership={speech}, }