%Aigaion2 BibTeX export from Idiap Publications
%Friday 05 December 2025 07:53:23 AM
@INPROCEEDINGS{Korchagin_ICASSP_2010,
author = {Korchagin, Danil and Garner, Philip N. and Dines, John},
keywords = {pattern matching, reliability estimation, time synchronization, time-frequency analysis},
projects = {Idiap, TA2},
month = {3},
title = {Automatic Temporal Alignment of AV Data with Confidence Estimation},
booktitle = {Proceedings IEEE International Conference on Acoustics, Speech and Signal Processing},
year = {2010},
location = {Dallas, USA},
address = {P.O. Box 592, CH-1920 Martigny, Switzerland},
crossref = {Korchagin_Idiap-RR-40-2009},
abstract = {In this paper, we propose a new approach for the automatic audio-based temporal alignment with confidence estimation of audio-visual data, recorded by different cameras, camcorders or mobile phones during social events. All recorded data is temporally aligned based on ASR-related features with a common master track, recorded by a reference camera, and the corresponding confidence of alignment is estimated. The core of the algorithm is based on perceptual time-frequency analysis with a precision of 10 ms. The results show correct alignment in 99\% of cases for a real life dataset and surpass the performance of cross correlation while keeping lower system requirements.},
pdf = {https://publications.idiap.ch/attachments/papers/2009/Korchagin_ICASSP_2010.pdf}
}
crossreferenced publications:
@TECHREPORT{Korchagin_Idiap-RR-40-2009,
author = {Korchagin, Danil and Garner, Philip N. and Dines, John},
keywords = {pattern matching, reliability estimation, time synchronisation, time-frequency analysis},
projects = {Idiap, TA2},
month = {12},
title = {Automatic Temporal Alignment of AV Data with Confidence Estimation},
type = {Idiap-RR},
number = {Idiap-RR-40-2009},
year = {2009},
institution = {Idiap},
address = {CH-1920 Martigny, Switzerland},
abstract = {In this paper, we propose a new approach for the automatic audio-based temporal alignment with confidence estimation of audio-visual data, recorded by different cameras, camcorders or mobile phones during social events. All recorded data is temporally aligned based on ASR-related features with a common master track, recorded by a reference camera, and the corresponding confidence of alignment is estimated. The core of the algorithm is based on perceptual time-frequency analysis with a precision of 10 ms. The results show correct alignment in 99\% of cases for a real life dataset and surpass the performance of cross correlation while keeping lower system requirements.},
pdf = {https://publications.idiap.ch/attachments/reports/2009/Korchagin_Idiap-RR-40-2009.pdf}
}