%Aigaion2 BibTeX export from Idiap Publications %Saturday 21 December 2024 05:35:38 PM @ARTICLE{Fajcik_ACL2023_2023, author = {Fajcik, Martin and Motlicek, Petr and Smrz, Pavel}, projects = {Idiap, CRITERIA}, month = jul, title = {Claim-Dissector: An Interpretable Fact-Checking System with Joint Re-ranking and Veracity Prediction}, journal = {Association for Computational Linguistics}, volume = {Findings of the Association for Computational Linguistics: ACL 2023}, year = {2023}, pages = {10184–10205}, note = {https://aclanthology.org/2023.findings-acl.647/}, url = {https://aclanthology.org/2023.findings-acl.647.pdf}, crossref = {Fajcik_Idiap-Com-03-2022}, abstract = {We present Claim-Dissector: a novel latent variable model for fact-checking and analysis, which given a claim and a set of retrieved evidence jointly learns to identify: (i) the relevant evidences to the given claim (ii) the veracity of the claim. We propose to disentangle the per-evidence relevance probability and its contribution to the final veracity probability in an interpretable way — the final veracity probability is proportional to a linear ensemble of per-evidence relevance probabilities. In this way, the individual contributions of evidences towards the final predicted probability can be identified. In per-evidence relevance probability, our model can further distinguish whether each relevant evidence is supporting (S) or refuting (R) the claim. This allows to quantify how much the S/R probability contributes to final verdict or to detect disagreeing evidence. Despite its interpretable nature, our system achieves results competetive with state-of-the-art on the FEVER dataset, as compared to typical two-stage system pipelines, while using significantly fewer parameters. Furthermore, our analysis shows that our model can learn fine-grained relevance cues while using coarse-grained supervision and we demonstrate it in 2 ways. (i) We show that our model can achieve competitive sentence recall while using only paragraph-level relevance supervision. (ii) Traversing towards the finest granularity of relevance, we show that our model is capable of identifying relevance at the token level. To do this, we present a new benchmark TLR-FEVER focusing on token-level interpretability — humans annotate tokens in relevant evidences they considered essential when making their judgment. Then we measure how similar are these annotations to the tokens our model is focusing on.} } crossreferenced publications: @TECHREPORT{Fajcik_Idiap-Com-03-2022, author = {Fajcik, Martin and Motlicek, Petr and Smrz, Pavel}, editor = {Fajcik, Martin}, keywords = {claim verification, fact checking, Interpretability, verification}, projects = {Idiap, CRITERIA}, month = {9}, title = {Claim-Dissector: An Interpretable Fact-Checking System with Joint Re-ranking and Veracity Prediction}, type = {Idiap-Com}, number = {Idiap-Com-03-2022}, year = {2022}, institution = {Idiap}, url = {https://arxiv.org/abs/2207.14116}, abstract = {We present Claim-Dissector: a novel latent variable model for fact-checking and fact-analysis, which given a claim and a set of retrieved provenances allows learning jointly: (i) what are the relevant provenances to this claim (ii) what is the veracity of this claim. We propose to disentangle the per-provenance relevance probability and its contribution to the final veracity probability in an interpretable way - the final veracity probability is proportional to a linear ensemble of per-provenance relevance probabilities. This way, it can be clearly identified the relevance of which sources contributes to what extent towards the final probability. We show that our system achieves state-of-the-art results on FEVER dataset comparable to two-stage systems typically used in traditional fact-checking pipelines, while it often uses significantly less parameters and computation. Our analysis shows that proposed approach further allows to learn not just which provenances are relevant, but also which provenances lead to supporting and which toward denying the claim, without direct supervision. This not only adds interpretability, but also allows to detect claims with conflicting evidence automatically. Furthermore, we study whether our model can learn fine-grained relevance cues while using coarse-grained supervision. We show that our model can achieve competitive sentence-recall while using only paragraph-level relevance supervision. Finally, traversing towards the finest granularity of relevance, we show that our framework is capable of identifying relevance at the token-level. To do this, we present a new benchmark focusing on token-level interpretability - humans annotate tokens in relevant provenances they considered essential when making their judgement. Then we measure how similar are these annotations to tokens our model is focusing on. Our code, and dataset will be released online.}, pdf = {https://publications.idiap.ch/attachments/reports/2022/Fajcik_Idiap-Com-03-2022.pdf} }