%Aigaion2 BibTeX export from Idiap Publications
%Friday 06 March 2026 10:45:26 PM

@INPROCEEDINGS{gatica05a-conf,
                      author = {Gatica-Perez, Daniel and McCowan, Iain A. and Zhang, Dong and Bengio, Samy},
                    projects = {Idiap},
                       title = {Detecting Group Interest-level in Meetings},
                   booktitle = {IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP)},
                        year = {2005},
                    crossref = {gatica-rr-04-51},
                    abstract = {Finding relevant segments in meeting recordings is important for summarization, browsing, and retrieval purposes. In this paper, we define relevance as the interest-level that meeting participants manifest as a group during the course of their interaction (as perceived by an external observer,',','),
 and investigate the automatic detection of segments of high-interest from audio-visual cues. This is motivated by the assumption that there is a relationship between segments of interest to participants, and those of interest to the end user, e.g. of a meeting browser. We first address the problem of human annotation of group interest-level. On a 50-meeting corpus, recorded in a room equipped with multiple cameras and microphones, we found that the annotations generated by multiple people exhibit a good degree of consistency, providing a stable ground-truth for automatic methods. For the automatic detection of high-interest segments, we investigate a methodology based on Hidden Markov Models (HMMs) and a number of audio and visual features. Single- and multi-stream approaches were studied. Using precision and recall as performance measures, the results suggest that the automatic detection of group interest-level is promising, and that while audio in general constitutes the predominant modality in meetings, the use of a multi-modal approach is beneficial.},
                         pdf = {https://publications.idiap.ch/attachments/reports/2004/gatica-rr-04-51.pdf},
                  postscript = {ftp://ftp.idiap.ch/pub/reports/2004/gatica-rr-04-51.ps.gz},
ipdmembership={speech, learning, vision},
}



crossreferenced publications: 
@TECHREPORT{gatica-rr-04-51,
                      author = {Gatica-Perez, Daniel and McCowan, Iain A. and Zhang, Dong and Bengio, Samy},
                    projects = {Idiap},
                       title = {Detecting Group Interest-level in Meetings},
                        type = {Idiap-RR},
                      number = {Idiap-RR-51-2004},
                        year = {2004},
                 institution = {IDIAP},
                     address = {Martigny, Switzerland},
                        note = {Submitted for publication.},
                    abstract = {Finding relevant segments in meeting recordings is important for summarization, browsing, and retrieval purposes. In this paper, we define relevance as the interest-level that meeting participants manifest as a group during the course of their interaction (as perceived by an external observer,',','),
 and investigate the automatic detection of segments of high-interest from audio-visual cues. This is motivated by the assumption that there is a relationship between segments of interest to participants, and those of interest to the end user, e.g. of a meeting browser. We first address the problem of human annotation of group interest-level. On a 50-meeting corpus, recorded in a room equipped with multiple cameras and microphones, we found that the annotations generated by multiple people exhibit a good degree of consistency, providing a stable ground-truth for automatic methods. For the automatic detection of high-interest segments, we investigate a methodology based on Hidden Markov Models (HMMs) and a number of audio and visual features. Single- and multi-stream approaches were studied. Using precision and recall as performance measures, the results suggest that (i) the automatic detection of group interest-level is promising, and (ii) while audio in general constitutes the predominant modality in meetings, the use of a multi-modal approach is beneficial.},
                         pdf = {https://publications.idiap.ch/attachments/reports/2004/gatica-rr-04-51.pdf},
                  postscript = {ftp://ftp.idiap.ch/pub/reports/2004/gatica-rr-04-51.ps.gz},
ipdmembership={speech, learning, vision},
language={English},
}