@inProceedings{kageback-etal-2014-extractive-210878, title = {Extractive Summarization using Continuous Vector Space Models}, abstract = {Automatic summarization can help users extract the most important pieces of information from the vast amount of text digitized into electronic form everyday. Central to automatic summarization is the notion of similarity between sentences in text. In this paper we propose the use of continuous vector representations for semantically aware representations of sentences as a basis for measuring similarity. We evaluate different compositions for sentence representation on a standard dataset using the ROUGE evaluation measures. Our experiments show that the evaluated methods improve the performance of a state-of-the-art summarization framework and strongly indicate the benefits of continuous word vector representations for automatic summarization.}, booktitle = {Proceedings of the 2nd Workshop on Continuous Vector Space Models and their Compositionality (CVSC) EACL, April 26-30, 2014 Gothenburg, Sweden}, author = {Kågebäck, Mikael and Mogren, Olof and Tahmasebi, Nina and Dubhashi, Devdatt}, year = {2014}, ISBN = {978-1-937284-94-7}, pages = {31--39}, } @inProceedings{kokkinakis-etal-2014-vocation-209808, title = {Vocation Identification in Swedish Fiction. }, abstract = {This paper presents a system for automatic annotation of vocational signals in 19th century Swedish prose fiction. Besides vocation identification, the system assigns gender (male, female, unknown) to the vocation words. Since gender is a prominent attribute of first names, we apply a named-entity recognizer (NER) that uses first name gazetteers where each name has been pre-assigned gender, which aids gender assignment to vocations with unknown gender if appropriate context is available. We also use a statistical modelling method, conditional random fields (CRF), for learning gender-assigned vocations in combination with the results of the NER and other pattern matching techniques. The purpose of this work is to develop and apply tools to literature as means to expand our understanding of history in the area of literature-based gender studies, e.g. investigate how women enter literature, which functions do they assume and their working patterns. Vocation identification can be used as one such indicator for achieving some these goals.}, booktitle = {Proceedings of the Fifth Swedish Language Technology Conference (SLTC)}, author = {Kokkinakis, Dimitrios and Ighe, Ann and Malm, Mats}, year = {2014}, pages = {3}, } @inProceedings{kokkinakis-etal-2014-semantics-209802, title = {Semantics in Storytelling in Swedish Fiction}, abstract = {In this paper, we aim to define foundations and research questions for future large scale exploration of various types of semantic relationships in literature, namely Swedish prose fiction. More specifically, we are interested to get an in-depth understanding of storytelling in Swedish fiction by analyzing and mining the narrative discourse in a small sample of such data, focusing on interpersonal relationships and answering various questions such as how to recognize and assess gender patterns. Our intention is to apply our findings into a much larger scale in the near future in order to obtain useful insights about the social relations, structures, behavior and everyday life of characters found in literary works, thus enhancing the use of prose fiction as a source for research within the humanities and social sciences. Our work is inspired by the notions of distant reading and macroanalysis, a relatively new and often contested paradigm of literary research. In order to achieve our goal we strive for a combination of natural language processing techniques and simple visualizations that allow the user to rapidly focus on key areas of interest and provide the ability to discover latent semantic patterns and structures. }, booktitle = {Proceedings of the Digital Access to textual Cultural Heritage (DATeCH).}, author = {Kokkinakis, Dimitrios and Malm, Mats and Bergenmar, Jenny and Ighe, Ann}, year = {2014}, ISBN = {978-1-4503-2588-2}, pages = {6}, } @article{johansson-2014-automatic-201874, title = {Automatic Expansion of the Swedish FrameNet Lexicon}, abstract = {We evaluate several lexicon-based and corpus-based methods to automatically induce new lexical units for the Swedish FrameNet, and we see that the best-performing setup uses a combination of both types of methods. A particular challenge for Swedish is the absence of a lexical resource such as WordNet; however, we show that the semantic network SALDO, which is organized according to lexicographical principles quite different from those of WordNet, is very useful for our purposes.}, journal = {Constructions and Frames}, author = {Johansson, Richard}, year = {2014}, volume = {6}, number = {1}, pages = {92--113}, } @article{borin-johansson-2014-kulturomik-192931, title = {Kulturomik: Att spana efter språkliga och kulturella förändringar i digitala textarkiv}, journal = {Historia i en digital värld}, author = {Borin, Lars and Johansson, Richard}, year = {2014}, } @inProceedings{ahlberg-etal-2014-semi-198791, title = {Semi-supervised learning of morphological paradigms and lexicons}, abstract = {We present a semi-supervised approach to the problem of paradigm induction from inflection tables. Our system extracts generalizations from inflection tables, representing the resulting paradigms in an abstract form. The process is intended to be language-independent, and to provide human-readable generalizations of paradigms. The tools we provide can be used by linguists for the rapid creation of lexical resources. We evaluate the system through an inflection table reconstruction task using Wiktionary data for German, Spanish, and Finnish. With no additional corpus information available, the evaluation yields per word form accuracy scores on inflecting unseen base forms in different lan guages ranging from 87.81% (German nouns) to 99.52% (Spanish verbs); with additional unlabeled tex t corpora available for training the scores range from 91.81% (German nouns) to 99.58% (Spanish verbs). We separately evaluate the system in a simulated task of Swedish lexicon creation, and show that on the basis of a small number of inflection tables, the system can accurately collect from a list of noun forms a lexicon with inflection information ranging from 100.0% correct (collect 100 words), to 96.4% correct (collect 1000 words).}, booktitle = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, Gothenburg, Sweden 26–30 April 2014 }, author = {Ahlberg, Malin and Forsberg, Markus and Hulden, Mans}, year = {2014}, ISBN = {978-1-937284-78-7}, pages = {569--578}, }