%Aigaion2 BibTeX export from Idiap Publications
%Monday 27 July 2026 10:19:59 PM

@TECHREPORT{Naderi_Idiap-Com-02-2024,
                      author = {Naderi, Maryam},
                    keywords = {Automatic Speech Recognition, Large Language Models, Prompting},
                    projects = {Idiap},
                       month = {7},
                       title = {Integrating large language models and ASR systems using confidence measures and prompting},
                        type = {Idiap-Com},
                      number = {Idiap-Com-02-2024},
                        year = {2024},
                 institution = {Idiap},
                    abstract = {As large language models (LLMs) grow in parameter size and capabilities, such as interaction
through prompting, they open up new ways of interfacing with automatic speech recognition
(ASR) systems beyond rescoring n-best lists. This work investigates post-hoc correction of ASR
transcripts with LLMs. To avoid introducing errors into likely accurate transcripts, we propose
a variety of confidence-based filtering methods: sentence-level confidence, lowest-word
confidence, and correction of specific low-confidence words.
In the first method, sentence-level confidence, we utilize LLM to correct transcription errors
if the sentence confidence score of the transcription falls below a certain threshold. For the
lowest-word confidence method, we submit transcriptions for correction by LLM only if the
lowest-word confidence in the sentence is below a certain threshold. Finally, in the last filtering
method, we instruct the LLM to correct only certain words with confidence scores lower than
a specific threshold.
We determine the optimal threshold for each approach using the Librispeech dev-clean and
dev-other datasets and evaluate them using the Librispeech test-clean and test-other
datasets. Additionally, we examine the impact of various prompts, language models (LMs),
and Whisper models on the quality of corrected ASR transcriptions. Our findings suggest that
these methods can enhance the performance of less competitive ASR systems. Furthermore,
we conduct an error analysis and explore the detailed modifications made by the LLM and the
corresponding parts of speech in the sentence, evaluating whether each modification corrects
existing errors or introduces new errors into the transcription.},
                         pdf = {https://publications.idiap.ch/attachments/reports/2024/Naderi_Idiap-Com-02-2024.pdf}
}