%Aigaion2 BibTeX export from Idiap Publications
%Tuesday 29 April 2025 03:40:53 PM

@INPROCEEDINGS{zhang-rr-04-24b,
         author = {Zhang, Dong and Gatica-Perez, Daniel and Bengio, Samy and McCowan, Iain A. and Lathoud, Guillaume},
       projects = {Idiap},
          title = {{Multimodal Group Action Clustering in Meetings}},
      booktitle = {ACM 2nd International Workshop on Video Surveillance & Sensor Networks in conjunction with 12th ACM International Conference on Multimedia},
           year = {2004},
           note = {IDIAP-RR 04-24},
       crossref = {zhang-rr-04-24},
       abstract = {We address the problem of clustering multimodal group actions in meetings using a two-layer HMM framework. Meetings are structured as sequences of group actions. Our approach aims at creating one cluster for each group action, where the number of group actions and the action boundaries are unknown a priori. In our framework, the first layer models typical actions of individuals in meetings using supervised HMM learning and low-level audio-visual features. A number of options that explicitly model certain aspects of the data (e.g., asynchrony) were considered. The second layer models the group actions using unsupervised HMM learning. The two layers are linked by a set of probability-based features produced by the individual action layer as input to the group action layer. The methodology was assessed on a set of multimodal turn-taking group actions, using a public five-hour meeting corpus. The results show that the use of multiple modalities and the layered framework are advantageous, compared to various baseline methods.},
            pdf = {https://publications.idiap.ch/attachments/reports/2004/zhang-acm-04.pdf},
     postscript = {ftp://ftp.idiap.ch/pub/reports/2004/rr-04-24.ps.gz},
ipdmembership={vision},
}



crossreferenced publications: 
@TECHREPORT{zhang-rr-04-24,
         author = {Zhang, Dong and Gatica-Perez, Daniel and Bengio, Samy and McCowan, Iain A. and Lathoud, Guillaume},
       projects = {Idiap},
          title = {{Multimodal Group Action Clustering in Meetings}},
           type = {Idiap-RR},
         number = {Idiap-RR-24-2004},
           year = {2004},
    institution = {IDIAP},
        address = {Martigny, Switzerland},
           note = {Published in ``ACM 2nd International Workshop on Video Surveillance & Sensor Networks in conjunction with 12th ACM International Conference on Multimedia'', October, 2004},
       abstract = {We address the problem of clustering multimodal group actions in meetings using a two-layer HMM framework. Meetings are structured as sequences of group actions. Our approach aims at creating one cluster for each group action, where the number of group actions and the action boundaries are unknown a priori. In our framework, the first layer models typical actions of individuals in meetings using supervised HMM learning and low-level audio-visual features. A number of options that explicitly model certain aspects of the data (e.g., asynchrony) were considered. The second layer models the group actions using unsupervised HMM learning. The two layers are linked by a set of probability-based features produced by the individual action layer as input to the group action layer. The methodology was assessed on a set of multimodal turn-taking group actions, using a public five-hour meeting corpus. The results show that the use of multiple modalities and the layered framework are advantageous, compared to various baseline methods.},
            pdf = {https://publications.idiap.ch/attachments/reports/2004/rr-04-24.pdf},
     postscript = {ftp://ftp.idiap.ch/pub/reports/2004/rr-04-24.ps.gz},
ipdinar={2004},
ipdmembership={vision},
language={English},
}