<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">REPORT</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Mrini_Idiap-RR-26-2017/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Cross-lingual Transfer for News Article Labeling: Benchmarking Statistical and Neural Models</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Mrini, Khalil</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Pappas, Nikolaos</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Popescu-Belis, Andrei</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">document labeling</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">multilingual hierarchical networks</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/reports/2017/Mrini_Idiap-RR-26-2017.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="088" ind1=" " ind2=" ">
			<subfield code="a">Idiap-RR-26-2017</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2017</subfield>
			<subfield code="b">Idiap</subfield>
			<subfield code="a">Rue Marconi 19, CH-1920 Martigny</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">September 2017</subfield>
		</datafield>
		<datafield tag="500" ind1=" " ind2=" ">
			<subfield code="a">Report of EPFL semester project done by Khalil Mrini (1st year I&amp;C MSc student), supervised by N. Pappas and A. Popescu-Belis.</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Cross-lingual transfer has been shown to increase the performance of a text classification model thanks to the use of Multilingual Hierarchical Attention Networks (MHAN), on which this work is based.  Firstly, we compared the performance of monolingual and mulitilingual HANs with three types of bag-of-words models.  We found that the Binary Unigram model outperforms the HAN model with Dense encoders on the full vocabulary in 6 out of 8 languages, and ties against MHAN with the Dense encoders, when it uses the full vocabulary i.e.~many more parameters than neural models. However, this is not true when we limit the number of parameters and (or) we increase the sophistication of the neural encoders to GRU or biGRU. Secondly, new configurations of parameter sharing were tested.  We found that sharing attention at the sentence level was the best configuration by a small margin when transferring from 5 out of 7 languages to English, as well as for cross-lingual transfer between English and Spanish, Russian, and Arabic. The tests were performed on the Deutsche Welle news corpus with 8 languages and 600k documents.</subfield>
		</datafield>
	</record>
</collection>