%Aigaion2 BibTeX export from Idiap Publications %Thursday 21 November 2024 12:42:39 PM @INPROCEEDINGS{Taghizadeh_HSCMA_2011, author = {Taghizadeh, Mohammad J. and Garner, Philip N. and Bourlard, Herv{\'{e}} and Abutalebi, Hamid Reza and Asaei, Afsaneh}, projects = {Idiap, IM2}, title = {An Integrated Framework for Multi-Channel Multi-Source Localization and Voice Activity Detection}, booktitle = {The Third Joint Workshop on Hands-free Speech Communication and Microphone Arrays}, year = {2011}, crossref = {Taghizadeh_Idiap-RR-16-2011}, abstract = {Two of the major challenges in microphone array based adaptive beamforming, speech enhancement and distant speech recognition, are robust and accurate source localization and voice activity detection. This paper introduces a spatial gradient steered response power using the phase transform (SRP-PHAT) method which is capable of localization of competing speakers in overlapping conditions. We further investigate the behaviour of the SRP function and characterize theoretically a fixed point in its search space for the diffuse noise field. We call this fixed point the null position in the SRP search space. Building on this evidence, we propose a technique for multi- channel voice activity detection (MVAD) based on detection of a maximum power corresponding to the null position. The gradient SRP-PHAT in tandem with the MVAD form an integrated framework of multi-source localization and voice activity detection. The experiments carried out on real data recordings show that this framework is very effective in practical applications of hands-free communication.}, pdf = {https://publications.idiap.ch/attachments/papers/2011/Taghizadeh_HSCMA_2011.pdf} } crossreferenced publications: @TECHREPORT{Taghizadeh_Idiap-RR-16-2011, author = {Taghizadeh, Mohammad J. and Garner, Philip N. and Bourlard, Herv{\'{e}} and Abutalebi, Hamid Reza and Asaei, Afsaneh}, projects = {Idiap, IM2}, month = {6}, title = {AN INTEGRATED FRAMEWORK FOR MULTI-CHANNEL MULTI-SOURCE LOCALIZATION AND VOICE ACTIVITY DETECTION}, type = {Idiap-RR}, booktitle = {IEEE Workshop on Hands-free Speech Communication and Microphone Arrays}, number = {Idiap-RR-16-2011}, year = {2011}, location = {30 May – 1 June 2011, Edinburgh, Scotland}, institution = {Idiap}, abstract = {Two of the major challenges in microphone array based adap- tive beamforming, speech enhancement and distant speech recognition, are robust and accurate source localization and voice activity detection. This paper introduces a spatial gra- dient steered response power using the phase transform (SRP- PHAT) method which is capable of localization of competing speakers in overlapping conditions. We further investigate the behavior of the SRP function and characterize theoretically a fixed point in its search space for the diffuse noise field. We call this fixed point the null position in the SRP search space. Building on this evidence, we propose a technique for multi- channel voice activity detection (MVAD) based on detection of a maximum power corresponding to the null position. The gradient SRP-PHAT in tandem with the MVAD form an inte- grated framework of multi-source localization and voice ac- tivity detection. The experiments carried out on real data recordings show that this framework is very effective in prac- tical applications of hands-free communication.}, pdf = {https://publications.idiap.ch/attachments/reports/2011/Taghizadeh_Idiap-RR-16-2011.pdf} }