@incollection{volodina-etal-2025-swell-355697,
title = {SweLL with pride: How to put a learner corpus to good use},
abstract = {Second language (L2) learner corpora are collections of language samples that demonstrate learners’ abilities to perform some learning tasks, e.g. an ability to write essays, answer to reading comprehension questions, or talk on a given topic. Such corpora are necessary for both empirical-based research within Second Language Acquisition (SLA), and for development of methods for automatic processing of such data. L2 corpora are notoriously difficult to collect, and their value depends to a greater degree on the representativeness and balance of the sampled data, type of associated metadata and reliability of manual annotations.
In this chapter we thoroughly describe the SweLL-gold corpus of L2 Swedish, its annotation, statistics and metadata, and showcase main types of its use, such as (1) in research on SLA through detailed instructions on how to perform corpus searches given SweLL-specific annotation, combined with guidelines for SVALA usage, a tool for correction annotation; and (2) in NLP research on problems such as grammatical error correction through guidelines on how to use the different available file formats that the SweLL-gold corpus is released in. Both cases are further supported by case studies and, where available, relevant scripts ready for reuse by researchers.},
booktitle = {Huminfra handbook: Empowering digital and experimental humanities},
author = {Volodina, Elena and Masciolini, Arianna and Megyesi, Beáta and Prentice, Julia and Rudebeck, Lisa and Sundberg, Gunlög and Wirén, Mats},
year = {2025},
publisher = {University of Tartu Library},
address = {Tartu, Estonia},
ISBN = {9789908536125},
}