%Aigaion2 BibTeX export from Idiap Publications
%Thursday 26 December 2024 11:42:34 AM

@INPROCEEDINGS{motlicek24_odyssey,
         author = {Motlicek, Petr and Dikici, Erinc and Madikeri, Srikanth and Rangappa, Pradeep and Janosik, Miroslav and Backfried, Gerhard and Thomas-Aniola, Dorothea and Schurz, Maximilian and Rohdin, Johan and Schwarz, Petr and Kovac, Marek and Mal{\'{y}}, Květoslav and Bobo{\v s}, Dominik and Leibiger, Mathias and Kalogiros, Costas and Alexopoulos, Andreas and Kudenko, Daniel and Ahmadi, Zahra and Nguyen, Hoang H. and Krishnan, Aravind and Zhu, Dawei and Klakow, Dietrich and Jofre, Maria and Calderoni, Francesco and Marraud, Denis and Koutras, Nikolaos and Nikolau, Nikos and Apostiki, Christiana and Douris, Panagiotis and Gkountas, Konstantinos and Sergidou, Eleni and Bosma, Wauter and Hughues, Joshua and Team, Hellenic Police},
       keywords = {ROXANNE, ROXSD},
       projects = {EC H2020-ROXANNE, TRACY},
          month = jun,
          title = {ROXSD: The ROXANNE Multimodal and Simulated Dataset for Advancing Criminal Investigations},
      booktitle = {Odyssey 2024: The Speaker and Language Recognition Workshop},
           year = {2024},
          pages = {17-24},
            url = {https://www.isca-archive.org/odyssey_2024/motlicek24_odyssey.pdf},
            doi = {10.21437/odyssey.2024-3},
       abstract = {The ROXANNE project, conducted under the European Union’s Horizon 2020 Programme, aimed to revolutionize criminal investigations by integrating speech, language, and video technologies with criminal network analysis. Despite the success in technology development, the project faced evaluation challenges due to the scarcity and legal restrictions surrounding real-world criminal activity datasets. In response, we introduce ROXSD, a simulated dataset of communication in organized crime. ROXSD is a set of wiretapped conversations (collected through communication service providers) between drug dealing suspects, following a realistic screenplay (incl. realistic conditions and constraints of a real investigation) prepared by Law Enforcement Agencies (LEAs). With a focus on multimodality and multilinguality, the dataset comprises 20 hours of telephone and video conversations involving 104 speakers, and is further aligned with ground-truth annotations for each modality involved, enabling precise evaluation and development of technologies. In addition, the multimodal data are enhanced with metadata and prior knowledge (e.g., suspects’ biometric profiles) which is typically available as a result of lawfully intercepted communication. This paper introduces ROXSD as a pivotal resource for advancing technology in criminal research (specifically in domain of speech, text and network analysis). ROXSD not only facilitates in the domain of technology development and evaluation but also showcases the potential of simulated datasets in advancing the field of organized crime analytics, emphasizing the importance of such datasets in the absence of comprehensive real-world alternatives.},
            pdf = {https://publications.idiap.ch/attachments/papers/2024/Motlicek_ODYSSEY-2024_2024.pdf}
}