<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Prasad_Idiap-RR-05-2020/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Language model domain adaptation for automatic speech recognition</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Prasad, Amrutha</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Motlicek, Petr</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Nanchen, Alexandre</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Automatic Speech Recognition</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">domain adaptation</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">language modeling</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2018/Prasad_Idiap-RR-05-2020.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-05-2020</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2020</subfield>
			<subfield code="b">Idiap</subfield>
			<subfield code="a">Rue Marconi 19, Martigny, Switzerland</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">January 2020</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This report provides an overview of the work carried out in improving Language Model (LM) development used during the decoding of an Automatic Speech Recognition (ASR) system. The goal of this work is to develop a robust language model that can be adapted to multiple domains (ex: talks), offering better accuracies of the ASR system when applied to an adapted domain. By exploring and exploiting various datasets like Common Crawl, Europarl, news and TEDLIUM and by experimenting different techniques in training a model, we achieve the goal of adapting a general purpose LM to a domain like talks. This also significantly improves the ASR performance compared to the existing (generic version) LM.</subfield>
		</datafield>
	</record>
</collection>