Skip to main content


	title        = {Retrieving Occurrences of Grammatical Constructions},
	abstract     = {Finding authentic examples of grammatical constructions is central in constructionist approaches to linguistics, language processing, and second language learning. In this paper, we address this problem as an information retrieval (IR) task. To facilitate research in this area, we built a benchmark collection by annotating the occurrences of six constructions in a Swedish corpus. Furthermore, we implemented a simple and flexible retrieval system for finding construction occurrences, in which the user specifies a ranking function using lexical-semantic similarities (lexicon-based or distributional). The system was evaluated using standard IR metrics on the new benchmark, and we saw that lexical-semantical rerankers improve significantly over a purely surface-oriented system, but must be carefully tailored for each individual construction.
	booktitle    = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics : Technical Papers, December 11–17; Osaka, Japan},
	author       = {Ehrlemark, Anna and Johansson, Richard and Lyngfelt, Benjamin},
	year         = {2016},
	ISBN         = {978-4-87974-702-0},

	title        = {Embedding Senses for Efficient Graph-based Word Sense Disambiguation},
	abstract     = {We propose a simple graph-based method for word sense disambiguation (WSD) where sense and context embeddings are constructed by applying the Skip-gram method to random walks over the sense graph. We used this method to build a WSD system for Swedish using the SALDO lexicon, and evaluated it on six different annotated test sets. In all cases, our system was several orders of magnitude faster than a state-of-the-art PageRank-based system, while outperforming a random baseline soundly.},
	booktitle    = { Proceedings of TextGraphs-10: the Workshop on Graph-based Methods for Natural Language Processing},
	author       = {Nieto Piña, Luis and Johansson, Richard},
	year         = {2016},
	publisher    = {Association for Computational Linguistics},

	title        = {A Multi-domain Corpus of Swedish Word Sense Annotation},
	abstract     = {We describe the word sense annotation layer in Eukalyptus, a freely available five-domain corpus of contemporary Swedish with several annotation layers. The annotation uses the SALDO lexicon to define the sense inventory, and allows word sense annotation of compound segments and multiword units. We give an overview of the new annotation tool developed for this project, and finally present an analysis of the inter-annotator agreement between two annotators.
	booktitle    = {10th edition of the Language Resources and Evaluation Conference, 23-28 May 2016, Portorož (Slovenia)},
	author       = {Johansson, Richard and Adesam, Yvonne and Bouma, Gerlof and Hedberg, Karin},
	year         = {2016},
	publisher    = {European Language Resources Association},
	ISBN         = {978-2-9517408-9-1},