<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Taghizadeh_HSCMA_2011/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">An Integrated Framework for Multi-Channel Multi-Source Localization and Voice Activity Detection</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Taghizadeh, Mohammad J.</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Garner, Philip N.</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Abutalebi, Hamid Reza</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Asaei, Afsaneh</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2011/Taghizadeh_HSCMA_2011.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/Taghizadeh_Idiap-RR-16-2011</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">The Third Joint Workshop on Hands-free Speech Communication and Microphone Arrays</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2011</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Two of the major challenges in microphone array based adaptive beamforming, speech enhancement and distant speech recognition, are robust and accurate source localization and voice activity detection. This paper introduces a spatial gradient steered response power using the phase transform (SRP-PHAT) method which is capable of localization of competing speakers in overlapping conditions. We further investigate the behaviour of the SRP function and characterize theoretically a fixed point in its search space for the diffuse noise field. We call this fixed point the null position in the SRP search space. Building on this evidence, we propose a technique for multi- channel voice activity detection (MVAD) based on detection of a maximum power corresponding to the null position. The gradient SRP-PHAT in tandem with the MVAD form an integrated framework of multi-source localization and voice activity detection. The experiments carried out on real data recordings show that this framework is very effective in practical applications of hands-free communication.</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Taghizadeh_Idiap-RR-16-2011/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">AN INTEGRATED FRAMEWORK FOR MULTI-CHANNEL MULTI-SOURCE LOCALIZATION AND VOICE ACTIVITY DETECTION</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Taghizadeh, Mohammad J.</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Garner, Philip N.</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Bourlard, Hervé</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Abutalebi, Hamid Reza</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Asaei, Afsaneh</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2011/Taghizadeh_Idiap-RR-16-2011.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-16-2011</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2011</subfield>
			<subfield code="b">Idiap</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">June 2011</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Two of the major challenges in microphone array based adap-
tive beamforming, speech enhancement and distant speech
recognition, are robust and accurate source localization and
voice activity detection. This paper introduces a spatial gra-
dient steered response power using the phase transform (SRP-
PHAT) method which is capable of localization of competing
speakers in overlapping conditions. We further investigate the
behavior of the SRP function and characterize theoretically a
fixed point in its search space for the diffuse noise field. We
call this fixed point the null position in the SRP search space.
Building on this evidence, we propose a technique for multi-
channel voice activity detection (MVAD) based on detection
of a maximum power corresponding to the null position. The
gradient SRP-PHAT in tandem with the MVAD form an inte-
grated framework of multi-source localization and voice ac-
tivity detection. The experiments carried out on real data
recordings show that this framework is very effective in prac-
tical applications of hands-free communication.</subfield>
		</datafield>
	</record>
</collection>