@incollection{tahmasebi-etal-2021-survey-307058, title = {Survey of computational approaches to lexical semantic change detection}, abstract = {Our languages are in constant flux driven by external factors such as cultural, societal and technological changes, as well as by only partially understood internal motivations. Words acquire new meanings and lose old senses, new words are coined or borrowed from other languages and obsolete words slide into obscurity. Understanding the characteristics of shifts in the meaning and in the use of words is useful for those who work with the content of historical texts, the interested general public, but also in and of itself. The findings from automatic lexical semantic change detection and the models of diachronic conceptual change are also currently being incorporated in approaches for measuring document across-time similarity, information retrieval from long-term document archives, the design of OCR algorithms, and so on. In recent years we have seen a surge in interest in the academic community in computational methods and tools supporting inquiry into diachronic conceptual change and lexical replacement. This article provides a comprehensive survey of recent computational techniques to tackle both.}, booktitle = {Computational approaches to semantic change / Nina Tahmasebi, Lars Borin, Adam Jatowt, Yang Xu, Simon Hengchen (eds.) }, author = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam}, year = {2021}, publisher = { Language Science Press}, address = {Berlin}, ISBN = {978-3-96110-312-6 }, pages = {1--91}, } @incollection{hengchen-etal-2021-challenges-306972, title = {Challenges for computational lexical semantic change}, abstract = {The computational study of lexical semantic change (LSC) has taken off in the past few years and we are seeing increasing interest in the field, from both computational sciences and linguistics. Most of the research so far has focused on methods for modelling and detecting semantic change using large diachronic textual data, with the majority of the approaches employing neural embeddings. While methods that offer easy modelling of diachronic text are one of the main reasons for the spiking interest in LSC, neural models leave many aspects of the problem unsolved. The field has several open and complex challenges. In this chapter, we aim to describe the most important of these challenges and outline future directions.}, booktitle = {Computational approaches to semantic change / Tahmasebi, Nina, Borin, Lars, Jatowt, Adam, Yang, Xu, Hengchen, Simon (eds.)}, author = {Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik and Dubossarsky, Haim}, year = {2021}, publisher = {Language Science Press}, address = {Berlin}, ISBN = {978-3-98554-008-2}, pages = {341--372}, } @incollection{jatowt-etal-2021-computational-307061, title = {Computational approaches to lexical semantic change: Visualization systems and novel applications}, abstract = {The purpose of this chapter is to survey visualization and user interface solutions for understanding lexical semantic change as well as to survey a number of applications of techniques developed in computational analysis of lexical semantic change. We first overview approaches aiming to develop systems that support understanding semantic change in an interactive and visual way. It is generally accepted that computational techniques developed for analyzing and uncovering semantic change are beneficial to linguists, historians, sociologists, and practitioners in numerous related fields, especially within the humanities. However, quite a few non-professional users are equally interested in the histories of words. Developing interactive, visual, engaging, and easy-to-understand systems can help them to acquire relevant knowledge. Second, we believe that other fields could benefit from the research outcomes of computational approaches to lexical semantic change. In general, properly representing the meaning of terms used in the past should be important for a range of natural language processing, information retrieval and other tasks that operate on old texts. In the latter part of the chapter, we then focus on current and potential applications related to computer and information science with the underlying question: “How can modeling semantic change benefit wider downstream applications in these disciplines?”}, booktitle = {Computational approaches to semantic change }, author = {Jatowt, Adam and Tahmasebi, Nina and Borin, Lars}, year = {2021}, publisher = { Language Science Press}, address = {Berlin}, ISBN = {978-3-96110-312-6}, pages = {311--339}, } @edited_book{tahmasebi-etal-2021-computational-306968, title = {Computational approaches to semantic change}, abstract = {Semantic change — how the meanings of words change over time — has preoccupied scholars since well before modern linguistics emerged in the late 19th and early 20th century, ushering in a new methodological turn in the study of language change. Compared to changes in sound and grammar, semantic change is the least understood. Ever since, the study of semantic change has progressed steadily, accumulating a vast store of knowledge for over a century, encompassing many languages and language families. Historical linguists also early on realized the potential of computers as research tools, with papers at the very first international conferences in computational linguistics in the 1960s. Such computational studies still tended to be small-scale, method-oriented, and qualitative. However, recent years have witnessed a sea-change in this regard. Big-data empirical quantitative investigations are now coming to the forefront, enabled by enormous advances in storage capability and processing power. Diachronic corpora have grown beyond imagination, defying exploration by traditional manual qualitative methods, and language technology has become increasingly data-driven and semantics-oriented. These developments present a golden opportunity for the empirical study of semantic change over both long and short time spans. A major challenge presently is to integrate the hard-earned knowledge and expertise of traditional historical linguistics with cutting-edge methodology explored primarily in computational linguistics. The idea for the present volume came out of a concrete response to this challenge. The 1st International Workshop on Computational Approaches to Historical Language Change (LChange'19), at ACL 2019, brought together scholars from both fields. This volume offers a survey of this exciting new direction in the study of semantic change, a discussion of the many remaining challenges that we face in pursuing it, and considerably updated and extended versions of a selection of the contributions to the LChange'19 workshop, addressing both more theoretical problems — e.g., discovery of "laws of semantic change" — and practical applications, such as information retrieval in longitudinal text archives.}, editor = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam and Xu, Yang and Hengchen, Simon}, year = {2021}, publisher = {Language Science Press}, address = {Berlin}, ISBN = {978-3-98554-008-2}, } @inProceedings{hengchen-tahmasebi-2021-supersim-305157, title = {SuperSim: a test set for word similarity and relatedness in Swedish}, abstract = {Language models are notoriously difficult to evaluate. We release SuperSim, a large-scale similarity and relatedness test set for Swedish built with expert human judgments. The test set is composed of 1,360 word-pairs independently judged for both relatedness and similarity by five annotators. We evaluate three different models (Word2Vec, fastText, and GloVe) trained on two separate Swedish datasets, namely the Swedish Gigaword corpus and a Swedish Wikipedia dump, to provide a baseline for future comparison. We release the fully annotated test set, code, baseline models, and data.}, booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa), May 31-June 2 2021, Reykjavik, Iceland (online)}, author = {Hengchen, Simon and Tahmasebi, Nina}, year = {2021}, publisher = {Linköping Electronic Conference Proceedings}, address = {Linköping}, ISBN = {978-91-7929-614-8}, } @article{hengchen-tahmasebi-2021-collection-301262, title = {A Collection of Swedish Diachronic Word Embedding Models Trained on Historical Newspaper Data}, abstract = {This paper describes the creation of several word embedding models based on a large collection of diachronic Swedish newspaper material available through Språkbanken Text, the Swedish language bank. This data was produced in the context of Språkbanken Text’s continued mission to collaborate with humanities and natural language processing (NLP) researchers and to provide freely available language resources, for the development of state-of-the-art NLP methods and tools.}, journal = {Journal of Open Humanities Data}, author = {Hengchen, Simon and Tahmasebi, Nina}, year = {2021}, volume = {7}, number = {2}, pages = {1--7}, }