Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{volodina-etal-2014-what-206132,
	title        = {You get what you annotate: a pedagogically annotated corpus of coursebooks for Swedish as a Second Language.},
	abstract     = {We present the COCTAILL corpus, containing over 700.000 tokens of Swedish texts from 12 coursebooks aimed at second/foreign  language (L2) learning. Each text in the corpus is labelled with a proficiency level according to the CEFR proficiency scale. Genres, topics, associated activities, vocabulary lists and other types of information are annotated in the coursebooks to facilitate Second Language Acquisition (SLA)-aware studies and experiments aimed at
Intelligent Computer-Assisted Language Learning (ICALL). Linguistic annotation in the form of parts-of-speech (POS; e.g. nouns, verbs), base forms (lemmas) and syntactic relations (e.g. subject, object) has been also added to the corpus.
In the article we describe our annotation scheme and the editor we have developed for the content mark-up of the coursebooks, including the taxonomy of pedagogical activities and linguistic skills. Inter-annotator agreement has been computed and reported
on a subset of the corpus.
Surprisingly, we have not found any other examples of pedagogically marked-up corpora based on L2 coursebooks to draw on existing experiences. Hence, our work may be viewed as “groping in the darkness” and eventually a starting point for others.
The paper also presents our first quantitative exploration of the corpus where we focus on
textually and pedagogically annotated features of the coursebooks to exemplify what types of studies can be performed using the presented annotation scheme. We explore trends shown in use of topics and genres over proficiency levels and compare pedagogical
focus of exercises across levels.
The final section of the paper summarises the potential this corpus holds for research within SLA and various ICALL tasks. },
	booktitle    = {NEALT Proceedings Series},
	author       = {Volodina, Elena and Pilán, Ildikó and Rødven-Eide, Stian  and Heidarsson, Hannes},
	year         = {2014},
	volume       = {22},
	ISBN         = {978-91-7519-175-1},
	pages        = {128--144},
}

@inProceedings{pilan-etal-2014-rule-210940,
	title        = {Rule-based and machine learning approaches for second language sentence-level readability},
	abstract     = {We present approaches for the identification
of sentences understandable by second
language learners of Swedish, which
can be used in automatically generated exercises based on corpora. In this work we
merged methods and knowledge from machine
learning-based readability research,
from rule-based studies of Good Dictionary
Examples and from second language
learning syllabuses. The proposed selection
methods have also been implemented
as a module in a free web-based language
learning platform. Users can use
different parameters and linguistic filters
to personalize their sentence search with
or without a machine learning component
assessing readability. The sentences selected
have already found practical use as
multiple-choice exercise items within the
same platform. Out of a number of deep
linguistic indicators explored, we found
mainly lexical-morphological and semantic
features informative for second language
sentence-level readability. We obtained
a readability classification accuracy
result of 71%, which approaches the performance of other models used in similar
tasks. Furthermore, during an empirical
evaluation with teachers and students,
about seven out of ten sentences selected
were considered understandable, the rule-based approach slightly outperforming the
method incorporating the machine learning
model.},
	booktitle    = {Proceedings of the Ninth Workshop on Innovative Use of NLP for Building Educational Applications, June 26, 2014 Baltimore, Maryland, USA},
	author       = {Pilán, Ildikó and Volodina, Elena and Johansson, Richard},
	year         = {2014},
	ISBN         = {978-1-941643-03-7},
	pages        = {174----184},
}

@edited_book{volodina-etal-2014-proceedings-206135,
	title        = {Proceedings of the third workshop on NLP for computer-assisted language learning at SLTC 2014, Uppsala University},
	abstract     = {The workshop series on NLP for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The papers in the proceedings volume from the third NLP4CALL workshop cover three main topic areas: resources for development of ICALL applications (e.g., learner corpora and coursebook corpora), tools and algorithms for the analysis of learner language (e.g., focusing on collocations, reading tasks, cloze items, pronunciation, spelling, level classification of learner production), and the generation of learning materials (e.g., exercise generators).},
	editor       = {Volodina, Elena and Borin, Lars and Pilán, Ildikó},
	year         = {2014},
	publisher    = {Linköping University Press},
	address      = {Linköping},
	ISBN         = {978-91-7519-175-1},
}

@inProceedings{volodina-etal-2014-flexible-201885,
	title        = {A flexible language learning platform based on language resources and web services. },
	abstract     = {We present Lärka, the language learning platform of Språkbanken (the Swedish Language Bank). It consists of an exercise generator which reuses resources available through Språkbanken: mainly Korp, the corpus infrastructure, and Karp, the lexical infrastructure.
Through Lärka we reach new user groups – students and teachers of Linguistics as well as second language learners and their teachers
– and this way bring Språkbanken's resources in a relevant format to them.
Lärka can therefore be viewed as a case of a real-life language resource evaluation with end users. In this article we describe Lärka's architecture, its user interface, and the five exercise types that have been released for users so far. The first user evaluation following in-class usage with students of linguistics, speech therapy and teacher candidates are presented. The outline of future work concludes the paper.},
	booktitle    = {Proceedings of LREC 26-31 May 2014, Reykjavik, Iceland },
	author       = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Tiedemann, Therese Lindström},
	year         = {2014},
	ISBN         = {978-2-9517408-8-4},
	pages        = {3973--3978},
}

@inProceedings{pilan-volodina-2014-reusing-200967,
	title        = {Reusing Swedish FrameNet for training semantic roles},
	abstract     = {In this article we present the first experiences of reusing the Swedish FrameNet (SweFN) as a resource for training semantic roles. We
give an account of the procedure we used to adapt SweFN to the needs of students of Linguistics in the form of an automatically
generated exercise. During this adaptation, the mapping of the fine-grained distinction of roles from SweFN into learner-friendlier
coarse-grained roles presented a major challenge. Besides discussing the details of this mapping, we describe the resulting multiple-choice exercise and its graphical user interface. The exercise was made available through Lärka, an online platform for students of Linguistics and learners of Swedish as a second language. We outline also aspects underlying the selection of the incorrect answer
options which include semantic as well as frequency-based criteria. Finally, we present our own observations and initial user feedback
about the applicability of such a resource in the pedagogical domain. Students' answers indicated an overall positive experience, the
majority found the exercise useful for learning semantic roles.
},
	booktitle    = {Proceedings of LREC 2014, May 26-31, 2014, Reykjavik, Iceland},
	author       = {Pilán, Ildikó and Volodina, Elena},
	year         = {2014},
	ISBN         = { 978-2-9517408-8-4},
	pages        = {1359--1363},
}