%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 11:54:41 AM @INPROCEEDINGS{kurimo-esca99b, author = {Kurimo, Mikko and Mokbel, Chafic}, projects = {Idiap}, month = {4}, title = {Latent Semantic Indexing by Self-Organizing Map}, booktitle = {ESCA ETRW workshop on Accessing Information in Spoken Audio}, year = {1999}, address = {Cambridge, UK}, note = {IDIAP-RR 99-12}, crossref = {kurimo-esca99}, abstract = {An important problem for the information retrieval from spoken documents is how to extract those relevant documents which are poorly decoded by the speech recognizer. In this paper we propose a stochastic index for the documents based on the Latent Semantic Analysis (LSA) of the decoded document contents. The original LSA approach uses Singular Value Decomposition to reduce the dimensionality of the documents. As an alternative, we propose a computationally more feasible solution using Random Mapping (RM) and Self-Organizing Maps (SOM). The motivation for clustering the documents by SOM is to reduce the effect of recognition errors and to extract new characteristic index terms. Experimental indexing results are presented using relevance judgments for the retrieval results of test queries and using a document perplexity defined in this paper to measure the power of the index models.}, pdf = {https://publications.idiap.ch/attachments/papers/1999/kurimo99.pdf}, postscript = {ftp://ftp.idiap.ch/pub/papers/speech/kurimo-esca99.ps.gz}, ipdmembership={speech}, } crossreferenced publications: @TECHREPORT{kurimo-esca99, author = {Kurimo, Mikko and Mokbel, Chafic}, projects = {Idiap}, title = {Latent Semantic Indexing by Self-Organizing Map}, type = {Idiap-RR}, number = {Idiap-RR-12-1999}, year = {1999}, institution = {IDIAP}, note = {Published in Proceedings of the ESCA ETRW workshop on Accessing Information in Spoken Audio, Cambridge, UK, 1999}, abstract = {An important problem for the information retrieval from spoken documents is how to extract those relevant documents which are poorly decoded by the speech recognizer. In this paper we propose a stochastic index for the documents based on the Latent Semantic Analysis (LSA) of the decoded document contents. The original LSA approach uses Singular Value Decomposition to reduce the dimensionality of the documents. As an alternative, we propose a computationally more feasible solution using Random Mapping (RM) and Self-Organizing Maps (SOM). The motivation for clustering the documents by SOM is to reduce the effect of recognition errors and to extract new characteristic index terms. Experimental indexing results are presented using relevance judgments for the retrieval results of test queries and using a document perplexity defined in this paper to measure the power of the index models.}, pdf = {https://publications.idiap.ch/attachments/reports/1999/rr99-12.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/1999/rr99-12.ps.gz}, ipdmembership={speech}, }