@inProceedings{ahlberg-etal-2016-karp-246072, title = {Karp: Språkbanken’s Open Lexical Infrastructure}, booktitle = {Globalex 2016, May 24, Portorož, Slovenia}, author = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Olsson, Olof and Schumacher, Anne and Uppström, Jonatan}, year = {2016}, } @inProceedings{ahlberg-etal-2013-korp-178355, title = {Korp and Karp – a bestiary of language resources: the research infrastructure of Språkbanken}, abstract = {A central activity in Språkbanken, an R&D unit at the University of Gothenburg, is the systematic construction of a research infrastructure based on interoperability and widely accepted standards for metadata and data. The two main components of this infrastructure deal with text corpora and with lexical resources. For modularity and flexibility, both components have a backend, or server-side part, accessed through an API made up of a set of well-defined web services. This means that there can be any number of different user interfaces to these components, corresponding, e.g., to different research needs. Here, we will demonstrate the standard corpus and lexicon search interfaces, designed primarily for linguistic searches: Korp and Karp.}, booktitle = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013), May 22–24, 2013, Oslo University, Norway. NEALT Proceedings Series 16}, author = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Hammarstedt, Martin and Olsson, Leif-Jöran and Olsson, Olof and Roxendal, Johan and Uppström, Jonatan}, year = {2013}, publisher = {Linköping University Electronic Press}, address = {Linköping}, } @inProceedings{ahlberg-etal-2016-sprakbanken's-246063, title = {Språkbanken’s Open Lexical Infrastructure}, abstract = {Karp is an open lexical infrastructure and a web based tool for searching, exploring and developing lexical resources. Språkbanken currently hosts a number of lexicons in Karp and on-going work aims at broadening the type of resources that can be developed in the system. This abstract gives a short overview of Karp's basic functionality, and describes some current projects and on-going work.}, booktitle = {SLTC 2016. The Sixth Swedish Language Technology Conference. Umeå University, 17-18 November, 2016}, author = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Olsson, Olof and Schumacher, Anne and Uppström, Jonatan}, year = {2016}, } @inProceedings{malm-etal-2018-uneek-267351, title = {Uneek: a Web Tool for Comparative Analysis of Annotated Texts}, abstract = {In this paper, we present Uneek, a web based linguistic tool that performs set operations on raw or annotated texts. The tool may be used for automatic distributional analysis, and for disambiguating polysemy with a method that we refer to as semi-automatic uniqueness differentiation (SUDi). Uneek outputs the intersection and differences between their listed attributes, e.g. POS, dependencies, word forms, frame elements. This makes it an ideal supplement to methods for lumping or splitting in frame development processes. In order to make some of Uneek’s functions more clear, we employ SUDi on a small data set containing the polysemous verb "bake". As of now, Uneek may only run two files at a time, but there are plans to develop the tool so that it may simultaneously operate on multiple files. Finally, we relate the developmental plans for added functionality, to how such functions may support FrameNet work in the future.}, booktitle = {Proceedings of the LREC 2018 Workshop International FrameNetWorkshop 2018: Multilingual Framenets and Constructicons, 7-12 May 2018, Miyazaki (Japan) / [ed] Tiago Timponi Torrent, Lars Borin & Collin F. Baker, 2018}, author = {Malm, Per and Ahlberg, Malin and Rosén, Dan}, year = {2018}, ISBN = {979-10-95546-04-7}, } @inProceedings{adesam-etal-2018-fsvreader-267311, title = {FSvReader – Exploring Old Swedish Cultural Heritage Texts}, abstract = {This paper describes FSvReader, a tool for easier access to Old Swedish (13th–16th century) texts. Through automatic fuzzy linking of words in a text to a dictionary describing the language of the time, the reader has direct access to dictionary pop-up definitions, in spite of the large amount of morphological and spelling variation. The linked dictionary entries can also be used for simple searches in the text, highlighting possible further instances of the same entry. }, booktitle = {CEUR Workshop Proceedings, vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018. Edited by Eetu, Mäkelä Mikko, Tolonen Jouni Tuominen}, author = {Adesam, Yvonne and Ahlberg, Malin and Bouma, Gerlof}, year = {2018}, publisher = {University of Helsinki, Faculty of Arts}, address = {Helsinki}, } @inProceedings{ahlberg-etal-2015-paradigm-217987, title = {Paradigm classification in supervised learning of morphology}, abstract = {Supervised morphological paradigm learning by identifying and aligning the longest common subsequence found in inflection tables has recently been proposed as a simple yet competitive way to induce morphological patterns. We combine this non-probabilistic strategy of inflection table generalization with a discriminative classifier to permit the reconstruction of complete inflection tables of unseen words. Our system learns morphological paradigms from labeled examples of inflection patterns (inflection tables) and then produces inflection tables from unseen lemmas or base forms. We evaluate the approach on datasets covering 11 different languages and show that this approach results in consistently higher accuracies vis-a-vis other methods on the same task, thus indicating that the general method is a viable approach to quickly creating high-accuracy morphological resources.}, booktitle = {Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, author = {Ahlberg, Malin and Forsberg, Markus and Huldén, Måns}, year = {2015}, } @inProceedings{ahlberg-etal-2015-case-217988, title = {A case study on supervised classification of Swedish pseudo-coordination}, abstract = {We present a case study on supervised classification of Swedish pseudo-coordination (SPC). The classification is attempted on the type-level with data collected from two data sets: a blog corpus and a fiction corpus. Two small experiments were designed to evaluate the feasability of this task. The first experiment explored a classifier’s ability to discriminate pseudo-coordinations from ordinary verb coordinations, given a small labeled data set created during the experiment. The second experiment evaluated how well the classifier performed at detecting and ranking SPCs in a set of unlabeled verb coordinations, to investigate if it could be used as a semi-automatic discovery procedure to find new SPCs.}, booktitle = {Proceedings of the 20th Nordic Conference of Computational Linguistics, NODALIDA 2015, May 11-13, 2015, Vilnius, Lithuania}, author = {Ahlberg, Malin and Andersson, Peter and Forsberg, Markus and Tahmasebi, Nina}, year = {2015}, publisher = {Linköping University Electronic Press}, address = {Linköpings universitet}, ISBN = {978-91-7519-098-3}, } @article{adesam-etal-2016-sprakteknologi-237884, title = {Språkteknologi för svenska språket genom tiderna}, abstract = {Språkbanken, the Swedish Language Bank, is a language technology research unit at the Department of Swedish, University of Gothenburg. We develop language resources – such as corpora, lexical resources, and analytical tools – for all variants of Swedish, from Old Swedish laws to present-day social media. Historical texts offer exciting theoretical and methodological challenges for language technology because they often defy the assumption inherent in most automatic analysis tools that the texts contain a standardized written language. In this article, we describe our ongoing work on the development of annotated historical corpora, as well as our efforts on linking various resources (both corpora and lexical resources). This research advances the state of the art of language technology as well as enables new research for scholars in other disciplines.}, journal = {Kungliga Skytteanska Samfundets Handlingar}, author = {Adesam, Yvonne and Ahlberg, Malin and Andersson, Peter and Borin, Lars and Bouma, Gerlof and Forsberg, Markus}, year = {2016}, volume = {76}, number = {Studier i svensk språkhistoria 13}, pages = {65--87}, } @inProceedings{ahlberg-etal-2014-swedish-210083, title = {Swedish FrameNet++ The Beginning of the End and the End of the Beginning}, booktitle = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014}, author = {Ahlberg, Malin and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Friberg Heppin, Karin and Johansson, Richard and Kokkinakis, Dimitrios and Olsson, Leif-Jöran and Uppström, Jonatan}, year = {2014}, } @inProceedings{adesam-etal-2014-computer-198794, title = {Computer-aided Morphology Expansion for Old Swedish}, abstract = {In this paper we describe and evaluate a tool for paradigm induction and lexicon extraction that has been applied to Old Swedish. The tool is semi-supervised and uses a small seed lexicon and unannotated corpora to derive full inflection tables for input lemmata. In the work presented here, the tool has been modified to deal with the rich spelling variation found in Old Swedish texts. We also present some initial experiments, which are the first steps towards creating a large-scale morphology for Old Swedish.}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14) May 26-31, 2014 Reykjavik, Iceland }, author = {Adesam, Yvonne and Ahlberg, Malin and Andersson, Peter and Bouma, Gerlof and Forsberg, Markus and Hulden, Mans}, year = {2014}, ISBN = { 978-2-9517408-8-4}, pages = {1102--1105}, } @inProceedings{ahlberg-etal-2014-semi-198791, title = {Semi-supervised learning of morphological paradigms and lexicons}, abstract = {We present a semi-supervised approach to the problem of paradigm induction from inflection tables. Our system extracts generalizations from inflection tables, representing the resulting paradigms in an abstract form. The process is intended to be language-independent, and to provide human-readable generalizations of paradigms. The tools we provide can be used by linguists for the rapid creation of lexical resources. We evaluate the system through an inflection table reconstruction task using Wiktionary data for German, Spanish, and Finnish. With no additional corpus information available, the evaluation yields per word form accuracy scores on inflecting unseen base forms in different lan guages ranging from 87.81% (German nouns) to 99.52% (Spanish verbs); with additional unlabeled tex t corpora available for training the scores range from 91.81% (German nouns) to 99.58% (Spanish verbs). We separately evaluate the system in a simulated task of Swedish lexicon creation, and show that on the basis of a small number of inflection tables, the system can accurately collect from a list of noun forms a lexicon with inflection information ranging from 100.0% correct (collect 100 words), to 96.4% correct (collect 1000 words).}, booktitle = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, Gothenburg, Sweden 26–30 April 2014 }, author = {Ahlberg, Malin and Forsberg, Markus and Hulden, Mans}, year = {2014}, ISBN = {978-1-937284-78-7}, pages = {569--578}, } @inProceedings{ahlberg-bouma-2012-best-172769, title = { A best-first anagram hashing filter for approximate string matching with generalized edit distance}, abstract = {This paper presents an efficient method for approximate string matching against a lexicon. We define a filter that for each source word selects a small set of target lexical entries, from which the best match is then selected using generalized edit distance, where edit operations can be assigned an arbitrary weight. The filter combines a specialized hash function with best-first search. Our work extends and improves upon a previously proposed hash-based filter, developed for matching with uniform-weight edit distance. We evaluate an approximate matching system implemented with the new best-first filter, by conducting several experiments on a historical corpus and a set of weighted rules taken from the literature. We present running times and discuss how performance varies using different stopping criteria and target lexica. The results show that the filter is suitable for large rule sets and million word corpora, and encourage further development. }, booktitle = {24th International Conference on Computational Linguistics COLING, 8-15 December 2012, Mumbai, India. Proceedings}, author = {Ahlberg, Malin and Bouma, Gerlof}, year = {2012}, } @article{andersson-ahlberg-2013-towards-181972, title = {Towards automatic tracking of lexical change: linking historical lexical resources}, journal = {NEALT Proceedings Series}, author = {Andersson, Peter and Ahlberg, Malin}, year = {2013}, volume = {18}, } @inProceedings{ahlberg-enache-2012-type-166722, title = {A Type-Theoretical Wide-Coverage Computational Grammar for Swedish}, booktitle = {Proceedings of the 15th International Conference, TSD(Text, Speech and Dialogue) 2012, Brno, Czech Republic, September 3-7, 2012,LNCS series "Text, Speech and Dialogue"}, author = {Ahlberg, Malin and Enache, Ramona}, year = {2012}, volume = {7499}, ISBN = {978-3-642-32790-2}, pages = {183--190}, } @inProceedings{adesam-etal-2012-processing-166657, title = {Processing spelling variation in historical text}, booktitle = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)}, author = {Adesam, Yvonne and Ahlberg, Malin and Bouma, Gerlof}, year = {2012}, } @inProceedings{adesam-etal-2012-bokstaffua-163218, title = {bokstaffua, bokstaffwa, bokstafwa, bokstaua, bokstawa... Towards lexical link-up for a corpus of Old Swedish}, booktitle = {Proceedings of the LTHist workshop at Konvens}, author = {Adesam, Yvonne and Ahlberg, Malin and Bouma, Gerlof}, year = {2012}, }