@article{cassotti-tahmasebi-2025-sense-353573,
title = {Sense-specific Historical Word Usage Generation},
abstract = {Large-scale sense-annotated corpora are important for a range of tasks but are hard to come by. Dictionaries that record and describe the vocabulary of a language often offer a small set of real-world example sentences for each sense of a word. However, on their own, these sentences are too few to be used as diachronic sense-annotated corpora. We propose a targeted strategy for training and evaluating generative models producing historically and semantically accurate word usages given any word, sense definition, and year triple. Our results demonstrate that fine-tuned models can generate usages with the same properties as real-world example sentences from a reference dictionary. Thus the generated usages will be suitable for training and testing computational models where large-scale sense-annotated corpora are needed but currently unavailable.},
journal = {Transactions of the Association for Computational Linguistics},
author = {Cassotti, Pierluigi and Tahmasebi, Nina},
year = {2025},
volume = {13},
pages = {690--708},
}