@article{volodina-etal-2022-crowdsourcing-336551, title = {Crowdsourcing ratings for single lexical items: a core vocabulary perspective}, abstract = {In this study, we investigate theoretical and practical issues connected to differentiating between core and peripheral vocabulary at different levels of linguistic proficiency using statistical approaches combined with crowdsourcing. We also investigate whether crowdsourcing second language learners’ rankings can be used for assigning levels to unseen vocabulary. The study is performed on Swedish single-word items. The four hypotheses we examine are: (1) there is core vocabulary for each proficiency level, but this is only true until CEFR level B2 (upper-intermediate); (2) core vocabulary shows more systematicity in its behavior and usage, whereas peripheral items have more idiosyncratic behavior; (3) given that we have truly core items (aka anchor items) for each level, we can place any new unseen item in relation to the identified core items by using a series of comparative judgment tasks, this way assigning a “target” level for a previously unseen item; and (4) non-experts will perform on par with experts in a comparative judgment setting. The hypotheses have been largely confirmed: In relation to (1) and (2), our results show that there seems to be some systematicity in core vocabulary for early to mid-levels (A1-B1) while we find less systematicity for higher levels (B2-C1). In relation to (3), we suggest crowdsourcing word rankings using comparative judgment with known anchor words as a method to assign a “target” level to unseen words. With regard to (4), we confirm the previous findings that non-experts, in our case language learners, can be effectively used for the linguistic annotation tasks in a comparative judgment setting.}, journal = {Slovenščina 2.0: Empirical, Applied and Interdisciplinary Research}, author = {Volodina, Elena and Alfter, David and Lindström Tiedemann, Therese}, year = {2022}, volume = {10}, number = {2}, pages = {5--61}, } @incollection{volodina-etal-2022-reliability-321988, title = {Reliability of Automatic Linguistic Annotation: Native vs Non-native Texts }, abstract = {We present the results of a manual evaluation of the performance of automatic linguistic annotation on three different datasets: (1) texts written by native speakers, (2) essays written by second language (L2) learners of Swedish in the original form and (3) the normalized versions of learner-written essays. The focus of the evaluation is on lemmatization, POS-tagging, word sense disambiguation, multi-word detection and dependency annotation. Two annotators manually went through the automatic annotation on a subset of the datasets and marked up all deviations based on their expert judgments and the guidelines provided. We report Inter-Annotator Agreement between the two annotators and accuracy for the linguistic annotation quality for the three datasets, by levels and linguistic features.}, booktitle = {Selected Papers from the CLARIN Annual Conference 2021, Virtual Event, 2021, 27–29 September}, editor = {Monica Monachini and Maria Eskevich}, author = {Volodina, Elena and Alfter, David and Lindström Tiedemann, Therese and Lauriala, Maisa and Piipponen, Daniala }, year = {2022}, publisher = {Linköping Electronic Conference }, address = {Linköping, Sweden}, ISBN = { 978-91-7929-444-1}, pages = {151--167}, } @misc{alfter-etal-2022-proceedings-321964, title = {Proceedings of the 11th Workshop on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL 2022) }, abstract = {The volume contains articles reviewed and presented at NLP4CALL workshop. The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical an methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures.}, author = {Alfter, David and Volodina, Elena and François, Thomas and Desmet, Piet and Cornillie, Frederik and Jönsson, Arne and Rennes, Evelina }, year = {2022}, publisher = {Linköping Electronic Conference Proceedings }, address = {Linköping, Sweden}, ISBN = {978-91-7929-460-1}, } @inProceedings{lindstromtiedemann-etal-2022-cefr-321899, title = {CEFR-nivåer och svenska flerordsuttryck}, abstract = {När vi lär oss ett nytt språk ska vi inte bara lära oss enstaka ord och hur vi använder dessa, utan vi måste också lära oss vilka ordkombinationer som är ”fasta uttryck” till betydelsen (t.ex. hälsa på någon) eller till formen (t.ex. lättare sagt än gjort) eller båda delarna (t.ex. huller om buller). Enligt en del studier kan dessa uttryck utgöra så mycket som 50 % av vokabulären i ett språk som förstaspråk (L1) eller ännu mer (Jackendoff 1997; Erman 2007, 28). Men det är möjligt att de är vanligare i vardagligt språk och talspråk (Prentice & Sköldberg 2013). Flerordsenheter kan vara problematiska för andraspråkstalare (Nesselhauf 2003, 223) till och med på avancerad nivå (jfr Pawley & Syder 1983; Wray & Perkins 2000; Nesselhauf 2003; Prentice 2010). Samtidigt är de en helt nödvändig del av språket (Nesselhauf 2003, 223) och kan utmärka andraspråkstalarna som icke-modersmålstalare (Pawley & Syder 1983; Wray 2002). Flerordsuttryck är alltså en värdefull del av andraspråkskompetensen (se även Paquot 2019) och något som är viktigt att studera hur vi på bästa sätt introducerar för L2-talaren och om de kan kopplas till nivåer i bedömning. I den här studien presenterar vi resultat kring förståelsen av flerordsuttryck i svenska som andraspråk i relation till färdighetsnivåerna enligt Gemensam Europeisk Referensram för Språk (GERS eller CEFR, Common European Framework of Reference) (COE 2001; 2018; Skolverket 2009; Utbildningsstyrelsen 2018) genom crowdsourcing experiment.}, booktitle = {Svenskan i Finland 19 : föredrag vid den nittonde sammankomsten för beskrivningen av svenskan i Finland, Vasa den 6-7 maj 2021 / redigerade av Siv Björklund, Bodil Haagensen, Marianne Nordman och Anders Westerlund}, author = {Lindström Tiedemann, Therese and Alfter, David and Volodina, Elena}, year = {2022}, publisher = {Svensk-Österbottniska Samfundet}, address = {Vasa}, ISBN = {978-952-69650-5-5}, } @incollection{volodina-alfter-2022-icall-321984, title = {ICALL: Research versus reality check.}, abstract = {Intelligent Computer-Assisted Language Learning has been one of Lars Borin’s research interests. The work on the Lärka language learning platform has started under his coordination. We see it our mission to make the platform live and prosperous, and through it to stimulate research into Swedish as a second language. Below, we name some weaknesses we have identified in Lärka while working with a course of beginner Swedish and outline our plans for tackling those.}, booktitle = {Live and Learn- Festschrift in honor of Lars Borin}, author = {Volodina, Elena and Alfter, David}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, pages = {145--152}, }