@inProceedings{pilan-volodina-2018-exploring-275366, title = {Exploring word embeddings and phonological similarity for the unsupervised correction of language learner errors.}, abstract = {The presence of misspellings and other errors or non-standard word forms poses a consider- able challenge for NLP systems. Although several supervised approaches have been proposed previously to normalize these, annotated training data is scarce for many languages. We in- vestigate, therefore, an unsupervised method where correction candidates for Swedish language learners’ errors are retrieved from word embeddings. Furthermore, we compare the usefulness of combining cosine similarity with orthographic and phonological similarity based on a neural grapheme-to-phoneme conversion system we train for this purpose. Although combinations of similarity measures have been explored for finding correction candidates, it remains unclear how these measures relate to each other and how much they contribute individually to identifying the correct alternative. We experiment with different combinations of these and find that integrating phonological information is especially useful when the majority of learner errors are related to misspellings, but less so when errors are of a variety of types including, e.g. grammatical errors. }, booktitle = {Proceedings of the Second Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, COLING, Santa Fe, New Mexico, USA, August 25, 2018.}, author = {Pilán, Ildikó and Volodina, Elena}, year = {2018}, publisher = {Association of Computation Linguistics }, ISBN = {978-1-948087-61-2}, } @inProceedings{pilan-volodina-2018-investigating-275367, title = {Investigating the importance of linguistic complexity features across different datasets related to language learning.}, abstract = {We present the results of our investigations aiming at identifying the most informative linguistic complexity features for classifying language learning levels in three different datasets. The datasets vary across two dimensions: the size of the instances (texts vs. sentences) and the language learning skill they involve (reading comprehension texts vs. texts written by learners themselves). We present a subset of the most predictive features for each dataset, taking into consid- eration significant differences in their per-class mean values and show that these subsets lead not only to simpler models, but also to an improved classification performance. Furthermore, we pin-point fourteen central features that are good predictors regardless of the size of the linguistic unit analyzed or the skills involved, which include both morpho-syntactic and lexical dimensions. }, booktitle = {Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing, COLING, Santa Fe, New Mexico, USA, August 25, 2018.}, author = {Pilán, Ildikó and Volodina, Elena}, year = {2018}, publisher = {Association of Computational Linguistics }, ISBN = {978-1-948087-62-9}, } @misc{pilan-etal-2018-proceedings-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, address = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{alfter-pilan-2018-complex-276407, title = {SB@ GU at the Complex Word Identification 2018 Shared Task}, booktitle = {Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications, New Orleans, Louisiana, June 5, 2018}, author = {Alfter, David and Pilán, Ildikó}, year = {2018}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA, USA}, ISBN = {978-1-948087-11-7}, } @inProceedings{alfter-etal-2018-from-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, }