<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Vasquez-Rodriguez_SWISSTEXT2026_2026/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Skill Extraction from Resumes and Job Offers across Six Languages</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Vásquez-Rodríguez, Laura</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Audrin, Bertrand</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Michel, Samuel</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Galli, Samuele</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Rogenhofer, Julneth</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Negro Cusa, Jacopo</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">van der Plas, Lonneke</subfield>
		</datafield>
		<datafield tag="856" ind1="4" ind2="0">
			<subfield code="i">EXTERNAL</subfield>
			<subfield code="u">http://publications.idiap.ch/attachments/papers/2026/Vasquez-Rodriguez_SWISSTEXT2026_2026.pdf</subfield>
			<subfield code="x">PUBLIC</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">Proceedings of the 11th edition of the Swiss Text Analytics Conference</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">2026</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">We comprehensively evaluate multiple skill extraction approaches, including rule-based, semantic, and supervised methods, using resumes and job offers in English, French, German, Italian, Spanish, and Portuguese. Due to inherent
privacy concerns in Human Resources (HR) data and the high cost of manual annotations, research on identifying relevant skills for the job market remains limited, often restricted to specific domains, datasets, and entity types, and is available in only a few languages. In the context of an industrial project, we have annotated 1,200 job offers and resumes across diverse domains and six languages, through a multidisciplinary collaboration among HR researchers, NLP researchers, and HR tech professionals. Our evaluation assesses the effectiveness of these systems in a multilingual, multidomain setting, capturing both standardized job offers and highly variable resumes. The results show
that supervised models achieve F1 scores of up to 0.6, while rule-based methods offer better interpretability. Furthermore, we find large differences between how skills are formulated in job offers and resumes, while the latter is understudied in academic research.</subfield>
		</datafield>
	</record>
</collection>