@book{Volodina-Elena2015-226574, title = {Proceedings of the 4th workshop on NLP for computer assisted language learning at Nodalida 2015, Vilnius, 11th May, 2015}, author = {Volodina, Elena and Borin, Lars and Pilán, Ildikó}, year = {2015}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7519-036-5}, } @article{Pilán-Ildikó2015-226565, title = {A readable read: Automatic Assessment of Language Learning Materials based on Linguistic Complexity.}, abstract = {Corpora and web texts can become a rich language learning resource if we have a means of assessing whether they are linguistically appropriate for learners at a given proficiency level. In this paper, we aim at addressing this issue by presenting the first approach for predicting linguistic complexity for Swedish second language learning material on a 5-point scale. After showing that the traditional Swedish readability measure, Läsbarhetsindex (LIX), is not suitable for this task, we propose a supervised machine learning model, based on a range of linguistic features, that can reliably classify texts according to their difficulty level.Our model obtained an accuracy of 81.3% and an F-score of 0.8, which is comparable to the state of the art in English and is considerably higher than previously reported results for other languages. We further studied the utility of our features with single sentences instead of full texts since sentences are a common linguistic unit in language learning exercises. We trained a separate model on sentence-level data with five classes, which yielded 63.4% accuracy. Although this is lower than the document level performance, we achieved an adjacent accuracy of 92%. Furthermore, we found that using a combination of different features, compared to using lexical features alone, resulted in 7% improvement in classification accuracy at the sentence level, whereas at the document level, lexical features were more dominant. Our models are intended for use in a freely accessible web-based language learning platform for the automatic generation of exercises, and they will be available also in the form of web-services.}, author = {Pilán, Ildikó and Vajjala, Sowmya and Volodina, Elena}, year = {2015}, volume = {Epub ahead of print}, } @inProceedings{Pilán-Ildikó2015-227313, title = {Helping Swedish words come to their senses: word-sense disambiguation based on sense associations from the SALDO lexicon}, abstract = {This paper describes a knowledge-based approach to word-sense disambiguation using a lexical-semantic resource, SALDO. This hierarchically organized lexicon defining senses in terms of other related senses has not been previously explored for this purpose. The proposed method is based on maximizing the overlap between associated word senses of nouns and verbs co-occuring within a sentence. The results of a small-scale experiment using this method are also reported. Overall, the approach proved more efficient for nouns, since not only was the accuracy score higher for this category (56%) than for verbs (46%), but for nouns in 22% more of the cases was a sense overlap found. As a result of an in-depth analysis of the predictions, we identified a number of ways the system could be modified or extended for an improved performance.}, booktitle = {Proceedings of the 20th Nordic Conference of Computational Linguistics (NODALIDA 2015). May 11–13, 2015, Vilnius, Lithuania / edited by Beáta Megyesi }, author = {Pilán, Ildikó}, year = {2015}, number = {109}, ISBN = {9789175190983}, pages = {275--279}, }