@edited_book{alfter-etal-2021-proceedings-311727, title = {Proceedings of the 10th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2021)}, abstract = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures.}, editor = {Alfter, David and Volodina, Elena and Pilán , Ildikó and Graën, Johannes and Borin, Lars}, year = {2021}, publisher = {Linköping Electronic Conference Proceedings 177}, address = {Linköping, Sweden}, ISBN = {978-91-7929-625-4}, } @article{zanetti-etal-2021-automatic-311723, title = {Automatic Generation of Exercises for Second Language Learning from Parallel Corpus Data}, abstract = {Creating language learning exercises is a time-consuming task and made-up sample sentences frequently lack authenticity. Authentic samples can be obtained from corpora, but it is necessary to identify material that is suitable for language learners. Parallel corpora of written text consist of translated material. Comparing the text in one language with its translation into another (known) language makes the structure accessible to the learner. However, the correspondence of words between the two languages is more important. By carefully selecting well-suited parallel sentences, a learner can explore the target language in a guided way. We present an approach to generate a novel type of language learning exercise from a large parallel corpus based on movie subtitles. The size of the corpus allows for defining selective criteria, favoring precision over recall. It is a non-trivial task to give reliable feedback to automatically generated exercises. ICALL literature often deals with fill-inthe-blanks exercises or multiple-choice questions, which allow for very limited answer options. Our proposed exercise is a special case of sentence reconstruction on bilingual sentence pairs. It combines two elements which have proven to be effective for language learning: a gamified approach, to awaken the students’ competitive desire, and the identification of syntactic structures and vocabulary use, to improve language sensitivity. This article presents the methods used to select example pairs and to implement a prototype. }, journal = {International Journal of TESOL Studies}, author = {Zanetti, Arianna and Volodina, Elena and Graën, Johannes}, year = {2021}, volume = {3}, number = {2}, pages = {55--71}, } @article{alfter-etal-2021-crowdsourcing-311721, title = {Crowdsourcing Relative Rankings of Multi-Word Expressions: Experts versus Non-Experts}, abstract = {In this study we investigate to which degree experts and non-experts agree on questions of difficulty in a crowdsourcing experiment. We ask non-experts (second language learners of Swedish) and two groups of experts (teachers of Swedish as a second/foreign language and CEFR experts) to rank multi-word expressions in a crowdsourcing experiment. We find that the resulting rankings by all the three tested groups correlate to a very high degree, which suggests that judgments produced in a comparative setting are not influenced by professional insights into Swedish as a second language.}, journal = {Northern European Journal of Language Technology (NEJLT)}, author = {Alfter, David and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2021}, volume = {7}, number = {1}, } @inProceedings{volodina-etal-2021-dalaj-311725, title = {DaLAJ - a dataset for linguistic acceptability judgments for Swedish}, abstract = {We present DaLAJ 1.0, a Dataset for Linguistic Acceptability Judgments for Swedish, comprising 9 596 sentences in its first version. DaLAJ is based on the SweLL second language learner data (Volodina et al., 2019), consisting of essays at different levels of proficiency. To make sure the dataset can be freely available despite the GDPR regulations, we have sentence-scrambled learner essays and removed part of the metadata about learners, keeping for each sentence only information about the mother tongue and the level of the course where the essay has been written. We use the normalized version of learner language as the basis for DaLAJ sentences, and keep only one error per sentence. We repeat the same sentence for each individual correction tag used in the sentence. For DaLAJ 1.0 four error categories of 35 available in SweLL are used, all connected to lexical or word-building choices. The dataset is included in the SwedishGlue benchmark. Below, we describe the format of the dataset, our insights and motivation for the chosen approach to data sharing.}, booktitle = {Proceedings of the 10th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2021), Online}, author = {Volodina, Elena and Ali Mohammed, Yousuf and Klezl, Julia }, year = {2021}, publisher = {Linköping University Electronic Press}, address = {Linköping}, ISBN = {978-91-7929-625-4}, } @inProceedings{volodina-etal-2021-coderoomor-311724, title = {CoDeRooMor: A new dataset for non-inflectional morphology studies of Swedish}, abstract = {The paper introduces a new resource, CoDeRooMor, for studying the morphology of modern Swedish word formation. The approximately 16.000 lexical items in the resource have been manually segmented into word-formation morphemes, and labeled for their categories, such as prefixes, suffixes, roots, etc. Word-formation mechanisms, such as derivation and compounding have been associated with each item on the list. The article describes the selection of items for manual annotation and the principles of annotation, reports on the reliability of the manual annotation, and presents tools, resources and some first statistics. Given the”gold” nature of the resource, it is possible to use it for empirical studies as well as to develop linguistically-aware algorithms for morpheme segmentation and labeling (cf statistical subword approach). The resource is freely available through Språkbanken-Text.}, booktitle = { 23rd Nordic Conference on Computational Linguistics (NoDaLiDa) Proceedings, May 31–2 June, 2021, Reykjavik, Iceland Online / Simon Dobnik, Lilja Øvrelid (Editors)}, author = {Volodina, Elena and Ali Mohammed, Yousuf and Lindström Tiedemann, Therese}, year = {2021}, publisher = {Linköping University Electronic Press}, address = {Linköping}, ISBN = {978-91-7929-614-8}, } @techreport{megyesi-etal-2021-swell-311730, title = {SweLL pseudonymization guidelines}, abstract = {The current document is a part of the SweLL guidelines series consisting of four parts which aim to report how we have worked on the material and which decisions we have made. Guidelines are available for each step in the manual annotation process, including: • Transcription guidelines • Pseudonymization guidelines • Normalization guidelines • Correction annotation guidelines We specifically described all processes in English to make sure our principles and experience can be of help to people working on other learner infrastructure projects independent of the language.}, author = {Megyesi, Beáta and Rudebeck, Lisa and Volodina, Elena}, year = {2021}, publisher = {Institutionen för svenska språket, Göteborgs universitet}, address = {Göteborg}, ISBN = {1401-5919}, } @techreport{volodina-megyesi-2021-swell-311729, title = {SweLL transcription guidelines, L2 essays}, abstract = {The current document is a part of the SweLL guidelines series consisting of four parts which aim to report how we have worked on the material and which decisions we have made. Guidelines are available for each step in the manual annotation process, including: • Transcription guidelines • Pseudonymization guidelines • Normalization guidelines • Correction annotation guidelines We specifically described all processes in English to make sure our principles and experience can be of help to people working on other learner infrastructure projects independent of the language.}, author = {Volodina, Elena and Megyesi, Beáta}, year = {2021}, publisher = {Institutionen för svenska språket, Göteborgs universitet}, address = {Göteborg}, } @incollection{prentice-etal-2021-language-310517, title = {Language learning and teaching with Swedish FrameNet++: Two examples}, booktitle = {The Swedish FrameNet++. Harmonization, integration, method development and practical language technology applications}, editor = {Dana Dannélls and Lars Borin and Karin Friberg Heppin}, author = {Prentice, Julia and Håkansson, Camilla and Linström Tiedemann, Therese and Pilán, Ildikó and Volodina, Elena}, year = {2021}, publisher = {John Benjamins Publishing Company}, address = {Amsterdam, Philadelphia}, ISBN = {9789027258489}, pages = {304–329}, }