Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{ahlberg-etal-2014-semi-198791,
	title        = {Semi-supervised learning of morphological paradigms and lexicons},
	abstract     = {We present a semi-supervised approach to the problem of paradigm induction from inflection tables.  
Our system extracts generalizations from inflection tables, representing the resulting paradigms in 
an abstract form.  The process is intended to be language-independent, and to provide human-readable
 generalizations of paradigms.  The tools we provide can be used by linguists for the rapid creation
 of lexical resources.  We evaluate the system through an inflection table reconstruction task using
 Wiktionary data for German, Spanish, and Finnish. With no additional corpus information available, 
the evaluation yields per word form accuracy scores on inflecting unseen base forms in different lan
guages ranging from 87.81% (German nouns) to 99.52% (Spanish verbs); with additional unlabeled tex
t corpora available for training the scores range from 91.81% (German nouns) to 99.58% (Spanish verbs).  We separately evaluate the system in a simulated task of Swedish lexicon creation, and show that on the basis of a small number of inflection tables, the system can accurately collect from a list of noun forms a lexicon with inflection information ranging from 100.0% correct (collect 100 words), to 96.4% correct (collect 1000 words).},
	booktitle    = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, Gothenburg, Sweden 26–30 April 2014 },
	author       = {Ahlberg, Malin and Forsberg, Markus and Hulden, Mans},
	year         = {2014},
	ISBN         = {978-1-937284-78-7},
	pages        = {569--578},
}