@inProceedings{Alfter-David2016-241664, title = {Learning the Learner: User Modeling in Intelligent Computer Assisted Language Learning Systems}, booktitle = {CEUR Workshop Proceedings, v.1618. UMAP 2016 Extended Proceedings. Halifax, Canada, July 13-16, 2016. Edited by : Federica Cena, Michel Desmarais, Darina Dicheva, Jie Zhang}, author = {Alfter, David}, year = {2016}, } @misc{Pilán-Ildikó2016-247241, title = "Coursebook texts as a helping hand for classifying linguistic complexity in language learners' writings", author = "Pilán, Ildikó and Alfter, David and Volodina, Elena", year = "2016", isbn = "978-4-87974-709-9", } @inProceedings{Pilán-Ildikó2016-246349, title = {Coursebook texts as a helping hand for classifying linguistic complexity in language learners' writings}, abstract = {We bring together knowledge from two different types of language learning data, texts learners read and texts they write, to improve linguistic complexity classification in the latter. Linguistic complexity in the foreign and second language learning context can be expressed in terms of proficiency levels. We show that incorporating features capturing lexical complexity information from reading passages can boost significantly the machine learning based classification of learner-written texts into proficiency levels. With an F1 score of .8 our system rivals state-of-the-art results reported for other languages for this task. Finally, we present a freely available web-based tool for proficiency level classification and lexical complexity visualization for both learner writings and reading texts. }, booktitle = {Proceedings of the workshop on Computational Linguistics for Linguistic Complexity}, author = {Pilán, Ildikó and Alfter, David and Volodina, Elena}, year = {2016}, ISBN = {978-4-87974-709-9}, } @inProceedings{Alfter-David2016-246348, title = {Hybrid Language Segmentation for Historical Documents}, booktitle = {CEUR Workshop Proceedings, vol.1749. Proceedings of Third Italian Conference on Computational Linguistics & Evaluation Campaign of Natural Language Processing and Speech Tools for Italian Final Workshop (CLiC-it & EVALITA 2016), Napoli, Italy, December 5-7, 2016. Edited by: Pierpaolo Basile, Anna Corazza, Franco Cutugno, Simonetta Montemagni, Malvina Nissim, Viviana Patti, Giovanni Semeraro, Rachele Sprugnoli}, author = {Alfter, David and Bizzoni, Yuri}, year = {2016}, } @inProceedings{Alfter-David2016-246347, title = {Modeling Individual Learner Knowledge in a Computer Assisted Language Learning System}, booktitle = {Proceedings of the Sixth Swedish Language Technology Conference. Umeå University, 17-18 November, 2016}, author = {Alfter, David and Volodina, Elena}, year = {2016}, } @inProceedings{Volodina-Elena2016-246346, title = {Classification of Swedish learner essays by CEFR levels}, abstract = {The paper describes initial efforts on creating a system for the automatic assessment of Swedish second language (L2) learner essays from two points of view: holistic evaluation of the reached level according to the Common European Framework of Reference (CEFR), and the lexical analysis of texts for receptive and productive vocabulary per CEFR level. We describe the data and resources that our experiments were based on, provide a short introduction to the algorithm for essay classification and experiment results, present the user interface we developed for testing new essays and outline future work. }, booktitle = {Proceedings of EuroCALL 2016. 24-27th August 2016, Cyprus.}, author = {Volodina, Elena and Pilán, Ildikó and Alfter, David}, year = {2016}, publisher = {Research-publishing.net}, ISBN = { 978-1-908416-44-5}, } @inProceedings{Alfter-David2016-246345, title = {From Distributions to Labels: A Lexical Proficiency Analysis using Learner Corpora}, abstract = {In this work we look at how information from second language learner essay corpora can be used for the evaluation of unseen learner essays. Using a corpus of learner essays which have been graded by well-trained human assessors using the CEFR scale, we extract a list of word distributions over CEFR levels. For the analysis of unseen essays, we want to map each word to a so-called target CEFR level using this word list. However, the task of mapping from a distribution to a single label is not trivial. We are also investigating how we can evaluate the mapping from distribution to label. We show that the distributional profile of words from the essays, informed with the essays’ levels, consistently overlaps with our frequency-based method, in the sense that words holding the same level of proficiency as predicted by our mapping tend to cluster together in a semantic space. In the absence of a gold standard, this information can be useful to see how often a word is associated with the same level in two different models. Also, in this case we have a similarity measure that can show which words are more central to a given level and which words are more peripheral. }, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Bizzoni, Yuri and Agebjörn, Anders and Volodina, Elena and Pilán, Ildikó}, year = {2016}, publisher = {Linköping University Electronic Press}, ISBN = {978-91-7685-633-8}, }