Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{ljunglof-etal-2024-binary-342402,
	title        = {Binary indexes for optimising corpus queries},
	abstract     = {To be able to search for patterns in annotated text corpora is crucial for many different research disciplines. However, searching for complex patterns in large corpora can take long time – sometimes several minutes or even hours.

We investigate how inverted indexes can be used for efficient searching in large annotated corpora, and in particular binary indexes. We show how corpus queries are translated into lookups in unary and binary inverted indexes, and give efficient strategies for combining the results using efficient set operations. In addition we discuss how to make use of binary indexes for more complex query types.},
	booktitle    = {Proceedings of the 20th Conference on Natural Language Processing (KONVENS 2024), September 10-13, 2024, Vienna, Austria},
	author       = {Ljunglöf, Peter and Smallbone, Nicholas and Thoresson, Mijo and Salomonsson, Victor},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {9798331304843},
}

@inProceedings{lorenzi-etal-2024-mocca-338189,
	title        = {MoCCA: A Model of Comparative Concepts for Aligning Constructicons},
	abstract     = {This paper presents MoCCA, a Model of Comparative Concepts for Aligning Constructicons under development by a consortium of research groups building Constructicons of different languages including Brazilian Portuguese, English, German and Swedish. The Constructicons will be aligned by using comparative concepts (CCs) providing language-neutral definitions of linguistic properties. The CCs are drawn from typological research on grammatical categories and constructions, and from FrameNet frames, organized in a conceptual network. Language-specific constructions are linked to the CCs in accordance with general principles. MoCCA is organized into files of two types: a largely static CC Database file and multiple Linking files containing relations between constructions in a Constructicon and the CCs. Tools are planned to facilitate visualization of the CC network and linking of constructions to the CCs. All files and guidelines will be versioned, and a mechanism is set up to report cases where a language-specific construction cannot be easily linked to existing CCs.},
	booktitle    = {Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @LREC-COLING-2024, 20 May, 2024, Torino, Italia},
	author       = {Lorenzi, Arthur and Ljunglöf, Peter and Lyngfelt, Benjamin and Torrent, Tiago Timponi and Croft, William and Ziem, Alexander and Böbel, Nina and Bäckström, Linnéa and Uhrig, Peter and Matos, Ely},
	year         = {2024},
	publisher    = {ELRA},
	ISBN         = {978-2-493814-32-6},
}

@inProceedings{lyngfelt-etal-2024-flersprakig-338191,
	title        = {Flerspråkig konstruktikografi med hjälp av språkneutrala jämförelsebegrepp},
	booktitle    = {Svenskans beskrivning. Förhandlingar vid trettioåttonde sammankomsten, del 1, Örebro 4–6 maj 2022},
	author       = {Lyngfelt, Benjamin and Andréasson, Maia and Blensenius, Kristian and Bäckström, Linnéa and Höder, Steffen and Ljunglöf, Peter and Uppström, Jonatan},
	year         = {2024},
	publisher    = {Örebro universitet },
	address      = {Örebro},
	ISBN         = {978-91-87789-89-2},
}