Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{ahlberg-etal-2013-korp-178355,
	title        = {Korp and Karp – a bestiary of language resources: the research infrastructure of Språkbanken},
	abstract     = {A central activity in Språkbanken, an R&D unit at the University of Gothenburg, is the systematic construction of a research infrastructure based on interoperability and widely accepted standards for metadata and data. The two main components of this infrastructure deal with text corpora and with lexical resources. For modularity and flexibility, both components have a backend, or server-side part, accessed through an API made up of a set of well-defined web services. This means that there can be any number of different user interfaces to these components, corresponding, e.g., to different research needs. Here, we will demonstrate the standard corpus and lexicon search interfaces, designed primarily for linguistic searches: Korp and Karp.},
	booktitle    = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013), May 22–24, 2013, Oslo University, Norway. NEALT Proceedings Series 16},
	author       = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Hammarstedt, Martin and Olsson, Leif-Jöran and Olsson, Olof and Roxendal, Johan and Uppström, Jonatan},
	year         = {2013},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
}

@inProceedings{borin-etal-2013-lexical-186032,
	title        = {The lexical editing system of Karp},
	abstract     = {Karp is the open lexical infrastructure of Språkbanken (the Swedish Language Bank). The infrastructure has three main functions: (1) to support the work on creating, curating, and integrating our various lexical resources; (2) to publish the resources, making them 
searchable and downloadable; and (3) to offer advanced editing functionalities. An important feature of the lexical infrastructure is also that we maintain a strong bidirectional connection to our corpus infrastructure. At the heart of the infrastructure is the SweFN++ project with the goal to create free Swedish lexical resources geared towards language technology applications. The infrastructure currently hosts 23 Swedish lexical resources. The resources are integrated through links to a pivot lexical resource, SALDO, a large morphological and lexical-semantic resource for modern Swedish.},
	booktitle    = {Kosem, I., Kallas, J., Gantar, P., Krek, S., Langemets, M., Tuulik, M. (eds.) 2013. Electronic lexicography in the 21st century: thinking outside the paper. Proceedings of the eLex 2013 conference, 17-19 October 2013, Tallinn, Estonia.},
	author       = {Borin, Lars and Forsberg, Markus and Olsson, Leif-Jöran and Olsson, Olof and Uppström, Jonatan},
	year         = {2013},
	publisher    = {Trojina, Institute for Applied Slovene Studies / Eesti Keele Instituut },
	address      = {Ljubljana/Tallinn},
	ISBN         = { 978-961-93594-0-2},
}

@inProceedings{borin-etal-2013-mining-188846,
	title        = {Mining semantics for culturomics: towards a knowledge-based approach},
	abstract     = {The massive amounts of text data made available through the Google Books digitization project have inspired a new field of big-data textual research. Named culturomics, this field has attracted the attention of a growing number of scholars over recent years. However, initial studies based on these data have been criticized for not referring to relevant work in linguistics and language technology. This paper provides some ideas, thoughts and first steps towards a new culturomics initiative, based this time on Swedish data, which pursues a more knowledge-based approach than previous work in this emerging field. The amount of new Swedish text produced daily and older texts being digitized in cultural heritage projects grows at an accelerating rate. These volumes of text being available in digital form have grown far beyond the capacity of human readers, leaving automated semantic processing of the texts as the only realistic option for accessing and using the information contained in them. The aim of our recently initiated research program is to advance the state of the art in language technology resources and methods for semantic processing of Big Swedish text and focus on the theoretical and methodological advancement of the state of the art in extracting and correlating information from large volumes of Swedish text using a combination of knowledge-based and statistical methods.},
	booktitle    = {2013 ACM International Workshop on Mining Unstructured Big Data Using Natural Language Processing, UnstructureNLP 2013, Held at 22nd ACM International Conference on Information and Knowledge Management, CIKM 2013; San Francisco, CA; United States; 28 October 2013 through 28 October 2013},
	author       = {Borin, Lars and Dubhashi, Devdatt and Forsberg, Markus and Johansson, Richard and Kokkinakis, Dimitrios and Nugues, Pierre},
	year         = {2013},
	ISBN         = {978-1-4503-2415-1},
	pages        = {3--10},
}

@article{borin-etal-2013-saldo-188604,
	title        = {SALDO: a touch of yin to WordNet's yang},
	abstract     = {The English-language Princeton WordNet (PWN) and some wordnets for other languages have been extensively used as lexical–semantic knowledge sources in language technology applications, due to their free availability and their size. The ubiquitousness of PWN-type wordnets tends to overshadow the fact that they represent one out of many possible choices for structuring a lexical-semantic resource, and it could be enlightening to look at a differently structured resource both from the point of view of theoretical–methodological considerations and from the point of view of practical text processing requirements. The resource described here—SALDO—is such a lexical–semantic resource, intended primarily for use in language technology applications, and offering an alternative organization to PWN-
style wordnets. We present our work on SALDO, compare it with PWN, and discuss some implications of the differences. We also describe an integrated infrastructure for computational lexical resources where SALDO forms the central component.},
	journal      = {Language resources and evaluation},
	author       = {Borin, Lars and Forsberg, Markus and Lönngren, Lennart},
	year         = {2013},
	volume       = {47},
	number       = {4},
	pages        = {1191--1211},
}

@inProceedings{skoldberg-etal-2013-between-186041,
	title        = {Between Grammars and Dictionaries: a Swedish Constructicon },
	abstract     = {This paper introduces the Swedish Constructicon (SweCxn), a database of Swedish constructions currently under development. We also present a small study of the treatment of constructions in Swedish (paper) dictionaries, thus illustrating the need for a constructionist approach, and discuss three different methods used to identify potential constructions for inclusion in the constructicon. SweCxn is a freely available electronic resource, with a particular focus on semi-general linguistic patterns of the type that are difficult to account for from a purely lexicographic or a purely grammatical perspective, and which therefore have tended to be neglected in both dictionaries and grammars. Far from being a small set of borderline cases, such constructions are both numerous and common. They are also quite problematic for second language acquisition as well as LT applications. Accordingly, various kinds of multi-word units have received more attention in recent years, not least from a lexicographic perspective. The coverage, however, is only partial, and the productivity of many constructions is hard to capture from a lexical viewpoint. To identify constructions for SweCxn, we use a combination of methods, such as working from existing construction descriptions for Swedish and other languages, applying LT tools to discover recurring patterns in texts, and extrapolating constructional information from dictionaries.   

},
	booktitle    = {Kosem, I., Kallas, J., Gantar, P., Krek, S., Langemets, M., Tuulik, M. (eds.) 2013. Electronic lexicography in the 21st century: thinking outside the paper. Proceedings of the eLex 2013 conference, 17-19 October 2013, Tallinn, Estonia. Ljubljana/Tallinn: Trojina, Institute for Applied Slovene Studies/Eesti Keele Instituut.},
	author       = {Sköldberg, Emma and Bäckström, Linnéa and Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Olsson, Leif-Jöran and Prentice, Julia and Rydstedt, Rudolf and Tingsell, Sofia and Uppström, Jonatan},
	year         = {2013},
	pages        = {310--327},
}

@edited_book{borin-etal-2013-proceedings-190260,
	title        = {Proceedings of the workshop on lexical semantic resources for NLP at NODALIDA 2013, May 22-24, 2013, Oslo, Norway},
	editor       = {Borin, Lars and Fjeld, Ruth Vatvedt and Forsberg, Markus and Nimb, Sanni and Nugues, Pierre and Pedersen, Bolette Sandford},
	year         = {2013},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7519-586-5},
}

@article{borin-etal-2013-close-187063,
	title        = {Close encounters of the fifth kind: Some linguistic and computational aspects of the Swedish FrameNet++ project},
	abstract     = {The Swedish FrameNet++ (SweFN++) project aims at developing an integrated Swedish lexical macro-resource to be used primarily in language technology R&D to build natural language processing (NLP) applications. Most of the component resources making up SweFN++ are existing digital lexical resources; in their case the central project effort is directed at making them interoperable on as many levels as possible. An important new resource being created in the project is a Swedish framenet. Now a sister project is starting with the aim of adding a Swedish constructicon (SweCxn) to the macro-resource. In this paper, we discuss some theoretical and conceptual issues which have arisen in the course of our work on the SweFN++ and the planning of the SweCxn, in the close encounter between the practical requirements of NLP and the theory and practice of linguistic – lexical and grammatical – description.
},
	journal      = {Veredas},
	author       = {Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin},
	year         = {2013},
	volume       = {17},
	number       = {1},
	pages        = {28--43},
}

@inProceedings{pedersen-etal-2013-nordic-178357,
	title        = {Nordic and Baltic wordnets aligned and compared through “WordTies”},
	abstract     = {During the last few years, extensive wordnets have been built locally for the Nordic and Baltic languages applying very different compilation strategies. The aim of the present investigation is to consolidate and examine these wordnets through an alignment via Princeton Core WordNet and thereby compare them along the measures of taxonomical structure, synonym structure, and assigned relations to approximate to a best practice. A common web interface and visualizer “WordTies” is developed to facilitate this purpose. Four bilingual wordnets are automatically processed and evaluated exposing interesting differences between the wordnets. Even if the alignments are judged to be of a good quality, the precision of the translations vary due to considerable differences in hyponymy depth and interpretation of the synset. All seven monolingual and four bilingual wordnets as well as WordTies have been made available via META-SHARE through the META-NORD project.},
	booktitle    = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013), May 22–24, 2013, Oslo University, Norway. NEALT Proceedings Series 16},
	author       = {Pedersen, Bolette and Borin, Lars and Forsberg, Markus and Kahusk, Neeme and Lindén, Krister and Niemi, Jyrki and Nisbeth, Niklas and Nygaard, Lars and Orav, Heili and Rögnvaldsson, Eiríkur and Seaton, Mitchel and Vider, Kadri and Voionmaa, Kaarlo},
	year         = {2013},
	number       = {16},
	pages        = {147--162},
}

@inProceedings{backstrom-etal-2013-automatic-178351,
	title        = {Automatic identification of construction candidates for a Swedish constructicon},
	abstract     = {We present an experiment designed for extracting construction candidates for a Swedish constructicon from text corpora. We have explored the use of hybrid n-grams with the practical goal to discover previously undescribed partially schematic constructions. The experiment was successful, in that quite a few new constructions were discovered. The precision is low, but as a push-button tool for construction discovery, it has proven a valuable tool for the work on a Swedish constructicon.},
	booktitle    = {Proceedings of the workshop on lexical semantic resources for NLP at NODALIDA 2013, May 22-24, 2013, Oslo, Norway. NEALT Proceedings Series 19},
	author       = {Bäckström, Linnéa and Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Prentice, Julia and Sköldberg, Emma},
	year         = {2013},
	pages        = {2--11},
}