Skip to main content


	title        = {Predicting proficiency levels in learner writings by transferring a linguistic complexity model from expert-written coursebooks},
	abstract     = {The lack of a sufficient amount of data tailored for a task is a well-recognized problem for many
statistical NLP methods.   In this paper,  we explore whether data sparsity can be successfully
tackled  when  classifying  language  proficiency  levels  in  the  domain  of  learner-written  output
texts.   We  aim  at  overcoming  data  sparsity  by  incorporating  knowledge  in  the  trained  model
from another domain consisting of input texts written by teaching professionals for learners. We
compare different domain adaptation techniques and find that a weighted combination of the two
types of data performs best, which can even rival systems based on considerably larger amounts
of in-domain data. Moreover, we show that normalizing errors in learners’ texts can substantially
improve classification when in-domain data with annotated proficiency levels is not available.},
	booktitle    = {Proceedings of the 26th International Conference on Computational Linguistics (COLING), December 13-16, 2016, Osaka},
	author       = {Pilán, Ildikó and Volodina, Elena and Zesch, Torsten},
	year         = {2016},
	ISBN         = {978-4-87974-702-0},