%Aigaion2 BibTeX export from Idiap Publications
%Monday 04 March 2024 09:58:47 AM

         author = {Grangier, David and Vinciarelli, Alessandro},
       projects = {Idiap},
          title = {Noisy Text Clustering},
           type = {Idiap-RR},
         number = {Idiap-RR-31-2004},
           year = {2004},
    institution = {IDIAP},
       abstract = {This work presents document clustering experiments performed over noisy texts (i.e. text that have been extracted through an automatic process like speech or character recognition). The effect of recognition errors on different clustering techniques is measured through the comparison of the results obtained with clean (manually typed texts) and noisy (automatic speech transcripts affected by $30\%$ Word Error Rate) versions of the TDT2 corpus ($\sim600$ hours of spoken data from broadcast news). The results suggest that clustering can be performed over noisy data with an acceptable performance degradation.},
            pdf = {https://publications.idiap.ch/attachments/reports/2004/rr04-31.pdf},
     postscript = {ftp://ftp.idiap.ch/pub/reports/2004/rr04-31.ps.gz},