BibTeX

@inProceedings{falkenjack-etal-2013-features-178257,
	title        = {Features indicating readability in Swedish text},
	abstract     = {Studies have shown that modern methods of readability assessment, using automated linguistic analysis and machine learning (ML), is a viable road forward for readability classification and ranking. In this paper we present a study of different levels of analysis and a large number of features and how they affect an ML-system’s accuracy when it comes to readability assessment. We test a large number of features proposed for different languages (mainly English) and evaluate their usefulness for readability assessment for Swedish as well as comparing their performance to that of established metrics. We find that the best performing features are language models based on part-of-speech and dependency type.},
	booktitle    = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013)},
	author       = {Falkenjack, Johan and Heimann Mühlenbock, Katarina and Jönsson, Arne},
	year         = {2013},
	number       = {085},
	ISBN         = {978-91-7519-589-6},
	pages        = {27--40},
}

@book{heimannmuhlenbock-2013-what-177599,
	title        = {I see what you mean},
	abstract     = {This thesis aims to identify linguistic factors that affect readability and text comprehension, viewed as a function of text complexity. Features at various linguistic levels suggested in existing literature are evaluated, including the Swedish readability formula LIX. Natural language processing methods and resources are employed to investigate characteristics that go beyond traditional superficial measures. A comparable corpus of eay-to-read and ordinary texts from three genres is investigated, and it is shown how features present at various levels of representation differ quantitatively across text types and genres. The findings are confirmed in significance tests as well as principal component analysis. Three machine learning algorithms are employed and evaluated in order to build a statistical model for text classification. The results demonstrate that a proposed language model for Swedish (SVIT), utilizing a combination of linguistic features, actually predicts text complexity and genre with a higher accuracy than LIX. It is suggested that the SVIT language model should be adopted to assess surface language properties, vocabulary load, sentence structure, idea density levels as well as the personal interests of different texts. Specific target groups of readers may then be provided with materials tailored to their level of proficiency.},
	author       = {Heimann Mühlenbock, Katarina},
	year         = {2013},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
	ISBN         = {978-91-87850-50-9},
}
Sidansvarig: sb-webb