<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Li_Idiap-RR-17-2014/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Feature Mapping of Multiple Beamformed Sources for Robust Overlapping Speech Recognition Using a Microphone Array</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Li, Weifeng</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Wang, Longbiao</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Zhou, Yicong</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Dines, John</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Liao, Qingmin</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-17-2014</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2014</subfield>
			<subfield code="b">Idiap</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">October 2014</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">IEEE/ACM Trans. on Audio, Speech and Language Processing</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This paper introduces a non-linear vector-based feature mapping approach to extract robust features for au- tomatic speech recognition (ASR) of overlapping speech using a microphone array. We explore different configurations and additional sources of information to improve the effectiveness of the feature mapping. Firstly, we investigate the full-vector based mapping of different sources in a log mel-filterbank energy (log MFBE) domain, and demonstrate that re-training the acoustic model using the generated training data can help improve the recognition performance. Then we investigate the feature mapping between different domains. Finally in order to improve the qualities of the mapping inputs we propose a non-linear mapping of the features from multiple beamformed sources, which are directed at the target and interfering speakers respectively. We demonstrate the effectiveness of the proposed approach through extensive evaluations on the MONC corpus, which includes non-overlapping single speaker and overlapping multi-speaker conditions.</subfield>
		</datafield>
	</record>
</collection>