Skip to main content

BibTeX

@techreport{Ljunglöf-Peter2019-281222,
	title        = {Assessing the quality of Språkbanken’s annotations},
	abstract     = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser.
Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. },
	author       = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars},
	year         = {2019},
}

@inProceedings{Zechner-Niklas2020-296900,
	title        = {Towards a Swedish Roget-Style Thesaurus for NLP},
	abstract     = {Bring’s thesaurus (Bring) is a Swedish counterpart of Roget, and its digitized version could make a valuable language resource for use in many and diverse natural language processing (NLP) applications. Fromlexicon, word sense disambiguation, topic detection the literature we know that Roget-style thesauruses and wordnets have complementary strengths in this context, so both kinds of lexical-semantic resource are good to have. However, Bring was published in 1930, and its lexical items are in the form of lemma–POS pairings. In order to be useful in our NLP systems, polysemous lexical items need to be disambiguated, and a large amount of modern vocabulary must be added in the proper places in Bring. The work presented here describes experiments aiming at automating these two tasks, at least in part, where we use the structure of an existing Swedish semantic lexicon – Saldo – both for disambiguation of ambiguous Bring entries and for addition of new entries to Bring.},
	booktitle    = {Proceedings of the 2020 Globalex Workshop on Linked Lexicography. Language Resources and Evaluation Conference (LREC 2020), Marseille, 11–16 May 2020},
	author       = {Zechner, Niklas and Borin, Lars},
	year         = {2020},
	publisher    = {European Language Resources Association},
	address      = {Paris},
	ISBN         = {979-10-95546-46-7},
}

@incollection{Borin-Lars2021-311385,
	title        = {Swedish FrameNet++ – lexical samsara},
	booktitle    = {The Swedish FrameNet++: Harmonization, integration, method development and practical language technology applications / editor(s): Dana Dannélls, Lars Borin and Karin Friberg Heppin},
	author       = {Borin, Lars and Forsberg, Markus and Lönngren, Lennart and Zechner, Niklas},
	year         = {2021},
	publisher    = {John Benjamins},
	address      = {Amsterdam},
	ISBN         = {9789027209900},
	pages        = {69–95},
}

@article{Zechner-Niklas2020-303708,
	title        = {Derivatives of regular expressions with cuts},
	abstract     = {Derivatives of regular expressions are an operation which for a given expression pro-duces an expression for what remains after a specific symbol has been read. This can be used as a step in the process of transforming an expression into a finite string au-tomaton. Cuts are an extension of the ordinary regular expressions; the cut operator is essentially a concatenation without backtracking, formalising a behaviour found in many programming languages. Just as for concatenation, we can also define an iterated cut operator. We show and derive expressions for the derivatives of regular expressions with cuts and iterated cuts. © Institut für Informatik · Justus-Liebig-Universität Giessen.},
	journal      = {Journal of Automata, Languages and Combinatorics},
	author       = {Zechner, Niklas},
	year         = {2020},
	volume       = {25},
	number       = {4},
	pages        = {349--355},
}