Skip to main content


	title        = {Investigating the importance of linguistic complexity features across different datasets related to language learning.},
	abstract     = {We present the results of our investigations aiming at identifying the most informative linguistic complexity features for classifying language learning levels in three different datasets. The datasets vary across two dimensions: the size of the instances (texts vs. sentences) and the language learning skill they involve (reading comprehension texts vs. texts written by learners themselves). We present a subset of the most predictive features for each dataset, taking into consid- eration significant differences in their per-class mean values and show that these subsets lead not only to simpler models, but also to an improved classification performance. Furthermore, we pin-point fourteen central features that are good predictors regardless of the size of the linguistic unit analyzed or the skills involved, which include both morpho-syntactic and lexical dimensions.
	booktitle    = {Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing, COLING, Santa Fe, New Mexico, USA, August 25, 2018.},
	author       = {Pilán, Ildikó and Volodina, Elena},
	year         = {2018},
	publisher    = {Association of Computational Linguistics },
	ISBN         = {978-1-948087-62-9},