         author = {Mohammadshahi, Alireza and Aberer, Karl and Lebret, R{\'{e}}mi},
       projects = {Idiap},
          month = nov,
          title = {Aligning Multilingual Word Embeddings for Cross-Modal Retrieval Task},
      booktitle = {Proceedings of the Second Workshop on Fact Extraction and VERification (FEVER)},
           year = {2019},
          pages = {27-33},
      publisher = {Association for Computational Linguistics},
       location = {Hong Kong},
        address = {Hong Kong, China},
            url = {https://www.aclweb.org/anthology/D19-6605},
            doi = {10.18653/v1/D19-6605},
       abstract = {In this paper, we propose a new approach to learn multimodal multilingual embeddings for matching images and their relevant captions in two languages. We combine two existing objective functions to make images and captions close in a joint embedding space while adapting the alignment of word embeddings between existing languages in our model. We show that our approach enables better generalization, achieving state-of-the-art performance in text-to-image and image-to-text retrieval task, and caption-caption similarity task. Two multimodal multilingual datasets are used for evaluation: Multi30k with German and English captions and Microsoft-COCO with English and Japanese captions.}