@article{Tahmasebi-Nina2015-212969, title = {Visions and open challenges for a knowledge-based culturomics}, abstract = {The concept of culturomics was born out of the availability of massive amounts of textual data and the interest to make sense of cultural and language phenomena over time. Thus far however, culturomics has only made use of, and shown the great potential of, statistical methods. In this paper, we present a vision for a knowledge-based culturomics that complements traditional culturomics. We discuss the possibilities and challenges of combining knowledge-based methods with statistical methods and address major challenges that arise due to the nature of the data; diversity of sources, changes in language over time as well as temporal dynamics of information in general. We address all layers needed for knowledge-based culturomics, from natural language processing and relations to summaries and opinions.}, author = {Tahmasebi, Nina and Borin, Lars and Capannini, Gabriele and Dubhashi, Devdatt and Exner, Peter and Forsberg, Markus and Gossen, Gerhard and Johansson, Fredrik and Johansson, Richard and Kågebäck, Mikael and Mogren, Olof and Nugues, Pierre and Risse, Thomas}, year = {2015}, volume = {15}, number = {2-4}, pages = {169--187}, } @misc{Andersen-Christiane2015-215757, title = {Sibirientyska kvinnor (Siberian German women)}, abstract = {Siberian German women The corpus consists of dialogs between four women born in 1927 to 1937 in the Soviet Volga Republic. Their mother tongue is a German variety spoken in Russia since the second half of the 18th century. Since the end of the Second World War, the women have lived in the region of Krasnoyarsk. They talk about their backgrounds and their everyday lives in the village. The corpus consists of about 16 000 words. Russian words and hybrids are given in [brackets], the turns of the interviewers are in {brackets}; all verb forms have got the attribute FINIT or INFINIT. More information on the research project see Syntax in contact. }, author = {Andersen, Christiane and Forsberg, Markus and Hammarstedt, Martin and Pankow, Alexander}, year = {2015}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @inProceedings{Ahlberg-Malin2015-217988, title = {A case study on supervised classification of Swedish pseudo-coordination}, abstract = {We present a case study on supervised classification of Swedish pseudo-coordination (SPC). The classification is attempted on the type-level with data collected from two data sets: a blog corpus and a fiction corpus. Two small experiments were designed to evaluate the feasability of this task. The first experiment explored a classifier’s ability to discriminate pseudo-coordinations from ordinary verb coordinations, given a small labeled data set created during the experiment. The second experiment evaluated how well the classifier performed at detecting and ranking SPCs in a set of unlabeled verb coordinations, to investigate if it could be used as a semi-automatic discovery procedure to find new SPCs.}, booktitle = {Proceedings of the 20th Nordic Conference of Computational Linguistics, NODALIDA 2015, May 11-13, 2015, Vilnius, Lithuania}, author = {Ahlberg, Malin and Andersson, Peter and Forsberg, Markus and Tahmasebi, Nina}, year = {2015}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7519-098-3}, } @inProceedings{Ahlberg-Malin2015-217987, title = {Paradigm classification in supervised learning of morphology}, abstract = {Supervised morphological paradigm learning by identifying and aligning the longest common subsequence found in inflection tables has recently been proposed as a simple yet competitive way to induce morphological patterns. We combine this non-probabilistic strategy of inflection table generalization with a discriminative classifier to permit the reconstruction of complete inflection tables of unseen words. Our system learns morphological paradigms from labeled examples of inflection patterns (inflection tables) and then produces inflection tables from unseen lemmas or base forms. We evaluate the approach on datasets covering 11 different languages and show that this approach results in consistently higher accuracies vis-a-vis other methods on the same task, thus indicating that the general method is a viable approach to quickly creating high-accuracy morphological resources.}, booktitle = {Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, author = {Ahlberg, Malin and Forsberg, Markus and Huldén, Måns}, year = {2015}, }