@inProceedings{schlechtweg-etal-2025-automatic-356608,
title = {Automatic Non-recorded Sense Detection for Swedish through Word Sense Induction with fine-tuned Word-in-Context models, i },
abstract = {Finding non-recorded senses is important for dictionary maintenance, where using automatic methods helps reduce manual efforts. We use automatic Word Sense Induction (WSI) to compare recorded sense numbers among a sample of headwords in a comprehensive Swedish monolingual dictionary with induced sense numbers for the same words in a Swedish corpus. We propose this as a simple technique to find words to prioritize for post-hoc manual checks, which can be done in a simple Online-User-Interface bypassing the need for programming knowledge. We perform a thorough manual evaluation of the proposed methodology enabling us to show statistically that using automatic WSI increases the odds of finding non-recorded senses compared to a random selection of words. We further (i) evaluate predictions according to potential inclusion in the dictionary providing strong evidence for usefulness in practical lexicography, and (ii) analyze model predictions in-depth to point towards future improvements. We, finally, integrate lessons learned from our analysis into a large-scale prediction effort, providing the first high-quality large-scale WSI predictions for Swedish. These are a valuable resource for future research in Swedish lexicography.},
booktitle = {Electronic lexicography in the 21st century (eLex 2025): Intelligent lexicography. Proceedings of the eLex 2025 conference. Bled, 18–20 November 2025. (2025). Bled: Lexical Computing CZ s.r.o. Eds.: Kosem, I., Jakubíček, M., Medveď, M., Zgaga, K., Arhar Holdt, Š., Munda, T. & Salgado, A.},
author = {Schlechtweg, Dominik and Sköldberg, Emma and Virk, Shafqat Mumtaz and White, James and Hengchen, Simon},
year = {2025},
pages = {159--173},
}