@misc{johansson-stymne-2025-proceedings-350486, title = {Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)}, author = {Johansson, Richard and Stymne, Sara}, year = {2025}, publisher = {University of Tartu Library}, address = {Tartu}, ISBN = {978-9908-53-109-0}, } @inProceedings{hagstrom-etal-2025-language-352367, title = {Language Model Re-rankers are Fooled by Lexical Similarities}, abstract = {Language model (LM) re-rankers are used to refine retrieval results for retrieval-augmented generation (RAG). They are more expensive than lexical matching methods like BM25 but assumed to better process semantic information and the relations between the query and the retrieved answers. To understand whether LM re-rankers always live up to this assumption, we evaluate 6 different LM re-rankers on the NQ, LitQA2 and DRUID datasets. Our results show that LM re-rankers struggle to outperform a simple BM25 baseline on DRUID. Leveraging a novel separation metric based on BM25 scores, we explain and identify re-ranker errors stemming from lexical dissimilarities. We also investigate different methods to improve LM re-ranker performance and find these methods mainly useful for NQ. Taken together, our work identifies and explains weaknesses of LM re-rankers and points to the need for more adversarial and realistic datasets for their evaluation.}, booktitle = {Proceedings of the Eighth Fact Extraction and VERification Workshop (FEVER), pages 18–33, Vienna, Austria}, author = {Hagström, Lovisa and Nie, Ercong and Halifa, Ruben and Schmid, Helmut and Johansson, Richard and Junge, Alexander}, year = {2025}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, USA}, pages = {18--33}, } @inProceedings{saynova-etal-2025-fact-352276, title = {Fact Recall, Heuristics or Pure Guesswork? Precise Interpretations of Language Models for Fact Completion}, abstract = {Language models (LMs) can make a correct prediction based on many possible signals in a prompt, not all corresponding to recall of factual associations. However, current interpretations of LMs fail to take this into account. For example, given the query “Astrid Lindgren was born in” with the corresponding completion “Sweden”, no difference is made between whether the prediction was based on knowing where the author was born or assuming that a person with a Swedish-sounding name was born in Sweden. In this paper, we present a model-specific recipe - PrISM - for constructing datasets with examples of four different prediction scenarios: generic language modeling, guesswork, heuristics recall and exact fact recall. We apply two popular interpretability methods to the scenarios: causal tracing (CT) and information flow analysis. We find that both yield distinct results for each scenario. Results for exact fact recall and generic language modeling scenarios confirm previous conclusions about the importance of mid-range MLP sublayers for fact recall, while results for guesswork and heuristics indicate a critical role of late last token position MLP sublayers. In summary, we contribute resources for a more extensive and granular study of fact completion in LMs, together with analyses that provide a more nuanced understanding of how LMs process fact-related queries.}, booktitle = {Findings of the Association for Computational Linguistics: ACL 2025, Vienna, Austria}, author = {Saynova, Denitsa and Hagström, Lovisa and Johansson, Moa and Johansson, Richard and Kuhlmann, Marco}, year = {2025}, publisher = {Association for Computational Linguistics}, pages = {18322–18349}, } @edited_book{johansson-2025-from-350489, title = {From Electrophoresis to Wikidata: Festschrift in honor of Pierre Nugues}, abstract = {This Festschrift is a tribute to professor Pierre Nugues on the occasion of his 65th birthday.}, editor = {Johansson, Richard}, year = {2025}, publisher = {University of Tartu Library}, address = {Tartu}, ISBN = {978-9908-53-286-8}, }