<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Oualil_ICASSP2013_2013/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">A Probabilistic Framework for Multiple Speaker Localization</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Oualil, Youssef</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Faubel, Friedrich</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Klakow, Dietrich</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2=" ">
			<subfield code="u">http://publications.idiap.ch/index.php/publications/showcite/Oualil_Idiap-RR-37-2012</subfield>
			<subfield code="z">Related documents</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2013</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This paper presents a novel probabilistic framework for localizing multiple
speakers with a microphone array. In this framework, the generalized cross
correlation function (GCC) of each microphone pair is interpreted as a probability
distribution of the time difference of arrival (TDOA) and subsequently approximated as
a Gaussian mixture. The distribution parameters are estimated with a weighted expectation maximization algorithm. Then, the joint distribution of the TDOA Gaussian
mixtures is mapped to a multimodal distribution in the location space, where
each mode represents a potential source location. The approach taken here
performs the localization by 1) reducing the search space to some regions
that are likely to contain a source and then 2) extracting the actual
speaker locations with a numerical optimization algorithm. The effectiveness
of the proposed approach is shown using the AV16.3 corpus.</subfield>
		</datafield>
	</record>
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Oualil_Idiap-RR-37-2012/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">A Probabilistic Framework for Multiple Speaker Localization</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Oualil, Youssef</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Faubel, Friedrich</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Klakow, Dietrich</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Oualil, Youssef</subfield>
			<subfield code="e">Ed.</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Magimai-Doss, Mathew</subfield>
			<subfield code="e">Ed.</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Gaussian mixture</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">localization</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">microphone arrays</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">multiple speakers</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">Steered response power</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2012/Oualil_Idiap-RR-37-2012.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-37-2012</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2012</subfield>
			<subfield code="b">Idiap</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">December 2012</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">Submitted to ICASSP'13</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">This paper presents a novel probabilistic framework for localizing multiple
speakers with a microphone array. In this framework, the generalized cross
correlation function (GCC) of each microphone pair is interpreted as a probability 
distribution of the time difference of arrival (TDOA) and subsequently approximated as 
a Gaussian mixture. The distribution parameters are estimated with a weighted expectation
maximization algorithm. Then, the joint distribution of the TDOA Gaussian
mixtures is mapped to a multimodal distribution in the location space, where
each mode represents a potential source location. The approach taken here
performs the localization by 1) reducing the search space to some regions
that are likely to contain a source and then 2) extracting the actual
speaker locations with a numerical optimization algorithm. The effectiveness
of the proposed approach is shown using the AV16.3 corpus.</subfield>
		</datafield>
	</record>
</collection>