Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@article{volodina-etal-2022-crowdsourcing-336551,
	title        = {Crowdsourcing ratings for single lexical items: a core vocabulary perspective},
	abstract     = {In this study, we investigate theoretical and practical issues connected to differentiating between core and peripheral vocabulary at different levels of linguistic proficiency using statistical approaches combined with crowdsourcing. We also investigate whether crowdsourcing second language learners’ rankings can be used for assigning levels to unseen vocabulary. The study is performed on Swedish single-word items. 
The four hypotheses we examine are: (1) there is core vocabulary for each proficiency  level,  but  this  is  only  true  until  CEFR  level  B2  (upper-intermediate); (2) core vocabulary shows more systematicity in its behavior and usage, whereas  peripheral  items  have  more  idiosyncratic  behavior;  (3)  given  that  we have truly core items (aka anchor items) for each level, we can place any new unseen item in relation to the identified core items by using a series of comparative  judgment  tasks,  this  way  assigning  a  “target”  level  for  a  previously  unseen  item;  and  (4)  non-experts  will  perform  on  par  with  experts in  a  comparative  judgment  setting.  The  hypotheses  have  been  largely  confirmed:  In  relation  to  (1)  and  (2),  our  results  show  that  there  seems  to  be  some systematicity in core vocabulary for early to mid-levels (A1-B1) while we find less systematicity for higher levels (B2-C1). In relation to (3), we suggest crowdsourcing word rankings using comparative judgment with known anchor  words  as  a  method  to  assign  a  “target”  level  to  unseen  words.  With  regard to (4), we confirm the previous findings that non-experts, in our case language learners, can be effectively used for the linguistic annotation tasks in a comparative judgment setting.},
	journal      = {Slovenščina 2.0: Empirical, Applied and Interdisciplinary Research},
	author       = {Volodina, Elena and Alfter, David and Lindström Tiedemann, Therese},
	year         = {2022},
	volume       = {10},
	number       = {2},
	pages        = {5--61},
}

@incollection{volodina-etal-2022-reliability-321988,
	title        = {Reliability of Automatic Linguistic Annotation: Native vs Non-native Texts },
	abstract     = {We present the results of a manual evaluation of the performance of automatic linguistic annotation on three different datasets: (1) texts written by native speakers, (2) essays written by second language (L2) learners of Swedish in the original form and (3) the normalized versions of learner-written essays. The focus of the evaluation is on lemmatization, POS-tagging, word sense disambiguation, multi-word detection and dependency annotation. Two annotators manually went through the automatic annotation on a subset of the datasets and marked up all deviations based on their expert judgments and the guidelines provided. We report Inter-Annotator Agreement between the two annotators and accuracy for the linguistic annotation quality for the three datasets, by levels and linguistic features.},
	booktitle    = {Selected Papers from the CLARIN Annual Conference 2021, Virtual Event, 2021, 27–29 September},
	editor       = {Monica Monachini and Maria Eskevich},
	author       = {Volodina, Elena and Alfter, David and Lindström Tiedemann, Therese and Lauriala, Maisa and Piipponen, Daniala},
	year         = {2022},
	publisher    = {Linköping Electronic Conference },
	address      = {Linköping, Sweden},
	ISBN         = { 978-91-7929-444-1},
	pages        = {151--167},
}

@misc{alfter-etal-2022-proceedings-321964,
	title        = {Proceedings of the 11th Workshop on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL 2022) },
	abstract     = {The volume contains articles reviewed and presented at NLP4CALL workshop. The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical an methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures.},
	author       = {Alfter, David and Volodina, Elena and François, Thomas and Desmet, Piet and Cornillie, Frederik and Jönsson, Arne and Rennes, Evelina},
	year         = {2022},
	publisher    = {Linköping Electronic Conference Proceedings  },
	address      = {Linköping, Sweden},
	ISBN         = {978-91-7929-460-1},
}

@inProceedings{lindstromtiedemann-etal-2022-cefr-321899,
	title        = {CEFR-nivåer och svenska flerordsuttryck},
	abstract     = {När vi lär oss ett nytt språk ska vi inte bara lära oss enstaka ord och hur vi använder dessa, utan vi måste också lära oss vilka ordkombinationer som är ”fasta uttryck” till betydelsen (t.ex. hälsa på någon) eller till formen (t.ex. lättare sagt än gjort) eller båda delarna (t.ex. huller om buller). Enligt en del studier kan dessa uttryck utgöra så mycket som 50 % av vokabulären i ett språk som förstaspråk (L1) eller ännu mer (Jackendoff 1997; Erman 2007, 28). Men det är möjligt att de är vanligare i vardagligt språk och talspråk (Prentice & Sköldberg 2013). Flerordsenheter kan vara problematiska för andraspråkstalare (Nesselhauf 2003, 223) till och med på avancerad nivå (jfr Pawley & Syder 1983; Wray & Perkins 2000; Nesselhauf 2003; Prentice 2010). Samtidigt är de en helt nödvändig del av språket (Nesselhauf 2003, 223) och kan utmärka andraspråkstalarna som icke-modersmålstalare (Pawley & Syder 1983; Wray 2002). Flerordsuttryck är alltså en värdefull del av andraspråkskompetensen (se även Paquot 2019) och något som är viktigt att studera hur vi på bästa sätt introducerar för L2-talaren och om de kan kopplas till nivåer i bedömning.
I den här studien presenterar vi resultat kring förståelsen av flerordsuttryck i svenska som andraspråk i relation till färdighetsnivåerna enligt Gemensam Europeisk Referensram för Språk (GERS eller CEFR, Common European Framework of Reference) (COE 2001; 2018; Skolverket 2009; Utbildningsstyrelsen 2018) genom crowdsourcing experiment.},
	booktitle    = {Svenskan i Finland 19 : föredrag vid den nittonde sammankomsten för beskrivningen av svenskan i Finland, Vasa den 6-7 maj 2021 / redigerade av Siv Björklund, Bodil Haagensen, Marianne Nordman och Anders Westerlund},
	author       = {Lindström Tiedemann, Therese and Alfter, David and Volodina, Elena},
	year         = {2022},
	publisher    = {Svensk-Österbottniska Samfundet},
	address      = {Vasa},
	ISBN         = {978-952-69650-5-5},
}

@incollection{volodina-alfter-2022-icall-321984,
	title        = {ICALL: Research versus reality check.},
	abstract     = {Intelligent Computer-Assisted Language Learning has been one of Lars Borin’s research interests.
The work on the Lärka language learning platform has started under his coordination. We see it
our mission to make the platform live and prosperous, and through it to stimulate research into
Swedish as a second language. Below, we name some weaknesses we have identified in Lärka
while working with a course of beginner Swedish and outline our plans for tackling those.},
	booktitle    = {Live and Learn- Festschrift in honor of Lars Borin},
	author       = {Volodina, Elena and Alfter, David},
	year         = {2022},
	publisher    = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet},
	address      = {Göteborg},
	ISBN         = {978-91-87850-83-7},
	pages        = {145--152},
}