@inProceedings{volodina-etal-2020-towards-300069, title = {Towards Privacy by Design in Learner Corpora Research: A Case of On-the-fly Pseudonymization of Swedish Learner Essays}, abstract = {This article reports on an ongoing project aiming at automatization of pseudonymization of learner essays. The process includes three steps: identification of personal information in an unstructured text, labeling for a category, and pseudonymization. We experiment with rule-based methods for detection of 15 categories out of the suggested 19 (Megyesi et al., 2018) that we deem important and/or doable with automatic approaches. For the detection and labeling steps, we use resources covering personal names, geographic names, company and university names and others. For the pseudonymization step, we replace the item using another item of the same type from the above-mentioned resources. Evaluation of the detection and labeling steps are made on a set of manually anonymized essays. The results are promising and show that 89% of the personal information can be successfully identified in learner data, and annotated correctly with an inter-annotator agreement of 86% measured as Fleiss kappa and Krippendorff's alpha.}, booktitle = {Proceedings of the 28th International Conference on Computational Linguistics (COLING), December 8-13, 2020, Barcelona, Spain (Online)}, author = {Volodina, Elena and Ali Mohammed, Yousuf and Derbring, Sandra and Matsson, Arild and Megyesi, Beata}, year = {2020}, publisher = {International Committee on Computational Linguistics}, ISBN = {978-1-952148-27-9}, } @misc{alfter-etal-2020-proceedings-300071, title = {Proceedings of the 9th Workshop on Natural Language Processing for Computer Assisted Language Learning 2020}, abstract = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures. This collection presents four selected papers describing use of Language Technology for language learning.}, author = {Alfter, David and Volodina, Elena and Pilán, Ildikó and Lange, Herbert and Borin, Lars}, year = {2020}, publisher = {Linköping University Electronic Press}, address = {Linköping}, ISBN = {978-91-7929-732-9}, } @inProceedings{alfter-etal-2020-expert-300074, title = {Expert judgments versus crowdsourcing in ordering multi-word expressions}, abstract = {In this study we investigate to which degree experts and non-experts agree on questions of linguistic complexity in a crowdsourcing experiment. We ask non-experts (second language learners of Swedish) and two groups of experts (teachers of Swedish as a second/foreign language and CEFR experts) to rank multi-word expressions in a crowdsourcing experiment. We find that the resulting rankings by all the three tested groups correlate to a very high degree, which suggests that judgments produced in a comparative setting are not influenced by professional insights into Swedish as a second language. }, booktitle = {Proceedings of the Swedish Language Technology Conference (SLTC), 25–27 November 2020, (Online)}, author = {Alfter, David and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2020}, } @article{arharholdt-etal-2020-language-300072, title = {Language teachers and crowdsourcing: Insights from a cross-European survey.}, abstract = {he paper presents a cross-European survey on teachers and crowdsourcing. The survey examines how familiar language teachers are with the concept of crowdsourcing and addresses their attitude towards including crowdsourcing into language teaching activities. The survey was administrated via an online questionnaire and collected volunteers’ data on: (a) teachers’ experience with organizing crowdsourcing activities for students/pupils, (b) the development of crowdsourced resources and materials as well as (c) teachers’ motivation for participating in or employing crowdsourcing activities. The questionnaire was disseminated in over 30 European countries. The final sample comprises 1129 language teachers aged 20 to 65, mostly working at institutions of tertiary education. The data indicates that many participants are not familiar with the concept of crowdsourcing resulting in a low rate of crowdsourcing activities in the classroom. However, a high percentage of responding teachers is potentially willing to crowdsource teaching materials for the language(s) they teach. They are particularly willing to collaborate with other teachers in the creation of interactive digital learning materials, and to select, edit, and share language examples for exercises or tests. Since the inclusion of crowdsourcing activities in language teaching is still in its initial stage, steps for further research are highlighted.}, journal = {Rasprave: Časopis Instituta za hrvatski jezik i jezikoslovlje}, author = {Arhar Holdt, Špela and Zviel-Girshin, Rina and Gajek, Elżbieta and Durán-Muñoz, Isabel and Bago, Petra and Fort, Karën and Hatipoglu, Ciler and Kasperavičienė, Ramunė and Koeva, Svetla and Lazić Konjik, Ivana and Miloshevska, Lina and Ordulj, Antonia and Rodosthenous, Christos and Volodina, Elena and Weber, Tassja and Zanasi, Lorenzo}, year = {2020}, volume = {46}, number = {1}, pages = {1--28}, }