Språkbanken Text är en avdelning inom Språkbanken.
BibTeX

@inProceedings{ahlberg-etal-2015-case-217988,
	title        = {A case study on supervised classification of Swedish pseudo-coordination},
	abstract     = {We present a case study on supervised classification of Swedish pseudo-coordination (SPC). The classification is attempted on the type-level with data collected from two data sets: a blog corpus and a fiction corpus. Two small experiments were designed to evaluate the feasability of this task. The first experiment explored a classifier’s ability to discriminate pseudo-coordinations from ordinary verb coordinations, given a small labeled data set created during the experiment. The second experiment evaluated how well the classifier performed at detecting and ranking SPCs in a set of unlabeled verb coordinations, to investigate if it could be used as a semi-automatic discovery procedure to find new SPCs.},
	booktitle    = {Proceedings of the 20th Nordic Conference of Computational Linguistics, NODALIDA 2015, May 11-13, 2015, Vilnius, Lithuania},
	author       = {Ahlberg, Malin and Andersson, Peter and Forsberg, Markus and Tahmasebi, Nina},
	year         = {2015},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7519-098-3},
}

@article{tahmasebi-etal-2015-visions-212969,
	title        = {Visions and open challenges for a knowledge-based culturomics},
	abstract     = {The concept of culturomics was born out of the availability of massive amounts of textual data and the interest to make sense of cultural and language phenomena over time. Thus far however, culturomics has only made use of, and shown the great potential of, statistical methods. In this paper, we present a vision for a knowledge-based culturomics that complements traditional culturomics. We discuss the possibilities and challenges of combining knowledge-based methods with statistical methods and address major challenges that arise due to the nature of the data; diversity of sources, changes in language over time as well as temporal dynamics of information in general. We address all layers needed for knowledge-based culturomics, from natural language processing and relations to summaries and opinions.},
	journal      = {International Journal on Digital Libraries},
	author       = {Tahmasebi, Nina and Borin, Lars and Capannini, Gabriele and Dubhashi, Devdatt and Exner, Peter and Forsberg, Markus and Gossen, Gerhard and Johansson, Fredrik and Johansson, Richard and Kågebäck, Mikael and Mogren, Olof and Nugues, Pierre and Risse, Thomas},
	year         = {2015},
	volume       = {15},
	number       = {2-4},
	pages        = {169--187},
}

@inProceedings{ahlberg-etal-2015-paradigm-217987,
	title        = {Paradigm classification in supervised learning of morphology},
	abstract     = {Supervised morphological paradigm learning by identifying and aligning the longest common subsequence found in inflection tables has recently been proposed as a simple yet competitive way to induce morphological patterns. We combine this non-probabilistic strategy of inflection table generalization with a discriminative classifier to permit the reconstruction of complete inflection tables of unseen words. Our system learns morphological paradigms from labeled examples of inflection patterns (inflection tables) and then produces inflection tables from unseen lemmas or base forms. We evaluate the approach on datasets covering 11 different languages and show that this approach results in consistently higher accuracies vis-a-vis other methods on the same task, thus indicating that the general method is a viable approach to quickly creating high-accuracy morphological resources.},
	booktitle    = {Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
	author       = {Ahlberg, Malin and Forsberg, Markus and Huldén, Måns},
	year         = {2015},
}

@inProceedings{johansson-nietopina-2015-embedding-217863,
	title        = {Embedding a Semantic Network in a Word Space},
	abstract     = {We present a framework for using continuous-
space vector representations of word meaning
to derive new vectors representing the meaning of senses listed in a semantic network. It is a post-processing approach that can be applied to several types of word vector representations. It uses two ideas: first, that vectors for polysemous words can be decomposed into a convex combination of sense vectors; secondly, that the vector for a sense is kept similar to those of its neighbors in the network.This leads to a constrained optimization problem, and we present an approximation for the case when the distance function is the squared Euclidean.

We applied this algorithm on a Swedish semantic network, and we evaluate the quality
of the resulting sense representations extrinsically by showing that they give large improvements when used in a classifier that creates lexical units for FrameNet frames.
},
	booktitle    = {Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. Denver, United States, May 31 – June 5, 2015},
	author       = {Johansson, Richard and Nieto Piña, Luis},
	year         = {2015},
	ISBN         = {978-1-941643-49-5},
	pages        = {1428--1433},
}

@inProceedings{kageback-etal-2015-neural-217864,
	title        = {Neural context embeddings for automatic discovery of word senses},
	abstract     = {Word sense induction (WSI) is the problem of
automatically building an inventory of senses
for a set of target words using only a text
corpus. We introduce a new method for embedding word instances and their context, for use in WSI. The method, Instance-context embedding (ICE), leverages neural word embeddings, and the correlation statistics they capture, to compute high quality embeddings of word contexts. In WSI, these context embeddings are clustered to find the word senses present in the text. ICE is based on a novel method for combining word embeddings using continuous Skip-gram, based on both se-
mantic and a temporal aspects of context
words. ICE is evaluated both in a new system, and in an extension to a previous system
for WSI. In both cases, we surpass previous
state-of-the-art, on the WSI task of SemEval-2013, which highlights the generality of ICE. Our proposed system achieves a 33% relative improvement.},
	booktitle    = {Proceedings of the 1st Workshop on Vector Space Modeling for Natural Language Processing. Denver, United States},
	author       = {Kågebäck, Mikael and Johansson, Fredrik and Johansson, Richard and Dubhashi, Devdatt},
	year         = {2015},
	pages        = {25--32},
}

@inProceedings{borin-etal-2015-here-217351,
	title        = {Here be dragons? The perils and promises of inter-resource lexical-semantic mapping},
	abstract     = {Lexical-semantic knowledges sources are a stock item in the language technologist’s toolbox, having proved their practical worth in many and diverse natural language processing (NLP) applications. In linguistics, lexical semantics comes in many flavors, but in the NLP world, wordnets reign more or less supreme. There has been some promising work utilizing Roget-style thesauruses instead, but wider experimentation is hampered by the limited availability of such resources. The work presented here is a first step in the direction of creating a freely available Roget-style lexical resource for modern Swedish. Here, we explore methods for automatic disambiguation of interresource mappings with the longer-term goal of utilizing similar techniques for automatic enrichment of lexical-semantic resources.},
	booktitle    = {Linköping Electronic Conference Proceedings. Semantic resources and semantic annotation for Natural Language Processing and the Digital Humanities. Workshop at NODALIDA , May 11, 13-18 2015, Vilnius},
	author       = {Borin, Lars and Nieto Piña, Luis and Johansson, Richard},
	year         = {2015},
	volume       = {112},
	ISBN         = {978-91-7519-049-5},
	pages        = {1--11},
}

@inProceedings{johansson-nietopina-2015-combining-216865,
	title        = {Combining Relational and Distributional Knowledge for Word Sense Disambiguation},
	abstract     = {We present a new approach to word sense
disambiguation derived from recent ideas
in distributional semantics. The input to
the algorithm is a large unlabeled corpus and a graph describing how senses
are related; no sense-annotated corpus is
needed. The fundamental idea is to embed meaning representations of senses in
the same continuous-valued vector space
as the representations of
words. In this way, the knowledge encoded in the lexical resource is combined with the infor-
mation derived by the distributional methods. Once this step has been carried out,
the sense representations can be plugged
back into e.g. the skip-gram model, which
allows us to compute scores for the different possible senses of a word in a given
context.

We evaluated the new word sense disambiguation system on two Swedish test
sets annotated with senses defined by the
SALDO lexical resource. In both evaluations, our system soundly outperformed
random and first-sense baselines. Its accuracy was slightly above that of a well-
known graph-based system, while being
computationally much more efficient,},
	booktitle    = {Proceedings of the 20th Nordic Conference of Computational Linguistics, May 12-13, Vilnius, Lithuania. Linköping Electronic Conference Proceedings 109, Linköping University Electronic Press..},
	author       = {Johansson, Richard and Nieto Piña, Luis},
	year         = {2015},
	ISBN         = {978-91-7519-098-3},
	pages        = {69--78},
}
Sidansvarig: sb-webb