Skip to main content


	title        = {Rule-based and machine learning approaches for second language sentence-level readability},
	abstract     = {We present approaches for the identification
of sentences understandable by second
language learners of Swedish, which
can be used in automatically generated exercises based on corpora. In this work we
merged methods and knowledge from machine
learning-based readability research,
from rule-based studies of Good Dictionary
Examples and from second language
learning syllabuses. The proposed selection
methods have also been implemented
as a module in a free web-based language
learning platform. Users can use
different parameters and linguistic filters
to personalize their sentence search with
or without a machine learning component
assessing readability. The sentences selected
have already found practical use as
multiple-choice exercise items within the
same platform. Out of a number of deep
linguistic indicators explored, we found
mainly lexical-morphological and semantic
features informative for second language
sentence-level readability. We obtained
a readability classification accuracy
result of 71%, which approaches the performance of other models used in similar
tasks. Furthermore, during an empirical
evaluation with teachers and students,
about seven out of ten sentences selected
were considered understandable, the rule-based approach slightly outperforming the
method incorporating the machine learning
	booktitle    = {Proceedings of the Ninth Workshop on Innovative Use of NLP for Building Educational Applications, June 26, 2014 Baltimore, Maryland, USA},
	author       = {Pilán, Ildikó and Volodina, Elena and Johansson, Richard},
	year         = {2014},
	ISBN         = {978-1-941643-03-7},
	pages        = {174----184},