Skip to main content

BibTeX

@article{volodina-etal-2019-swell-285609,
	title        = {The SweLL Language Learner Corpus: From Design to Annotation},
	abstract     = {The article presents a new language learner corpus for Swedish, SweLL, and the methodology from collection and pesudonymisation to protect personal information of learners to annotation adapted to second language learning. The main aim is to deliver a well-annotated corpus of essays written by second language learners of Swedish and make it available for research through a browsable environment. To that end, a new annotation tool and a new project management tool have been implemented, – both with the main purpose to ensure reliability and quality of the final corpus. In the article we discuss reasoning behind metadata selection, principles of gold corpus compilation and argue for separation of normalization from correction annotation.},
	journal      = {Northern European Journal of Language Technology},
	author       = {Volodina, Elena and Granstedt, Lena and Matsson, Arild and Megyesi, Beáta and Pilán , Ildikó  and Prentice, Julia and Rosén, Dan and Rudebeck, Lisa  and Schenström, Carl-Johan and Sundberg, Gunlög and Wirén, Mats},
	year         = {2019},
	volume       = {6},
	pages        = {67--104},
}

@inProceedings{volodina-etal-2019-svala-285617,
	title        = {SVALA: an Annotation Tool for Learner Corpora generating parallel texts},
	abstract     = {Learner corpora are actively used for research on Language Acquisition and in Learner Corpus Research (LCR).  The  data  is,  however,  very  expensive  to  collect  and  manually  annotate,  and  includes  steps  like  anonymization,  normalization, error annotation, linguistic annotation. In the past, projects often re - used tools from a number of  different projects for the above steps. As a result, various input and output formats between the tools needed to  be converted, which increased the complexity of the task. In  the  present  project,  we  are  developing  a  tool  that  handles  all  of  the  above - mentioned  steps  in  one  environment maintaining a stable interpretable  format between the  steps. A distinguishing feature of the tool is  that users work in a usual environment (plain text) while the tool visualizes all performed edits via a graph that  links an original learner text with an edited one, token by token.},
	booktitle    = {Learner Corpus Research conference (LCR-2019), Warsaw, 12-14 September 2019, Book of abstracts},
	author       = {Volodina, Elena and Matsson, Arild and Rosén, Dan and Wirén, Mats },
	year         = {2019},
}

@inProceedings{wiren-etal-2018-svala-285624,
	title        = {SVALA: Annotation of Second-Language Learner Text Based on Mostly Automatic Alignment of Parallel Corpora},
	abstract     = {Annotation of second-language learner text is a cumbersome manual task which in turn requires interpretation to postulate the intended meaning of the learner’s language. This paper describes SVALA, a tool which separates the logical steps in this process while providing rich visual support for each of them. The first step is to pseudonymize the learner text to fulfil the legal and ethical requirements for a distributable learner corpus. The second step is to correct the text, which is carried out in the simplest possible way by text editing. During the editing, SVALA automatically maintains a parallel corpus with alignments between words in the learner source text and corrected text, while the annotator may repair inconsistent word alignments. Finally, the actual labelling of the corrections (the postulated errors) is performed. We describe the objectives, design and workflow of SVALA, and our plans for further development.
},
	booktitle    = {Selected papers from the CLARIN Annual Conference 2018, Pisa, 8-10 October 2018},
	editor       = {Inguna Skadina and Maria Eskevich},
	author       = {Wirén, Mats and Matsson, Arild and Rosén, Dan and Volodina, Elena},
	year         = {2018},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7685-034-3},
}

@inProceedings{matsson-etal-2019-imagettr-284011,
	title        = {ImageTTR: Grounding Type Theory with Records in Image Classification for Visual Question Answering},
	abstract     = {We present ImageTTR, an extension to the Python implementation of Type Theory with Records (pyTTR) which connects formal record type representation with image classifiers implemented as deep neural networks. The Type Theory with Records framework serves as a knowledge representation system for natural language the representations of which are grounded in perceptual information of neural networks. We demonstrate the benefits of this symbolic and data-driven hybrid approach on the task of visual question answering.},
	booktitle    = {Proceedings of the IWCS 2019 Workshop on Computing Semantics with Types, Frames and Related Structures, May 24, 2019, Gothenburg, Sweden / Rainer Osswald, Christian Retoré, Peter Sutton (Editors)},
	author       = {Matsson, Arild and Dobnik, Simon and Larsson, Staffan},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-950737-25-3},
}

@inProceedings{volodina-etal-2020-towards-300069,
	title        = {Towards Privacy by Design in Learner Corpora Research: A Case of On-the-fly Pseudonymization of Swedish Learner Essays},
	abstract     = {This article reports on an ongoing project aiming at automatization of pseudonymization of learner essays. The process includes three steps: identification of personal information in an unstructured text, labeling for a category, and pseudonymization. We experiment with rule-based methods for detection of 15 categories out of the suggested 19 (Megyesi et al., 2018) that we deem important and/or doable with automatic approaches. For the detection and labeling steps, we use resources covering personal names, geographic names, company and university names and others. For the pseudonymization step, we replace the item using another item of the same type from the above-mentioned resources. Evaluation of the detection and labeling steps are made on a set of manually anonymized essays. The results are promising and show that 89% of the personal information can be successfully identified in learner data, and annotated correctly with an inter-annotator agreement of 86% measured as Fleiss kappa and Krippendorff's alpha.},
	booktitle    = {Proceedings of the 28th International Conference on Computational Linguistics (COLING), December 8-13, 2020, Barcelona, Spain (Online)},
	author       = {Volodina, Elena and Ali Mohammed, Yousuf and Derbring, Sandra and Matsson, Arild and Megyesi, Beata},
	year         = {2020},
	publisher    = {International Committee on Computational Linguistics},
	ISBN         = {978-1-952148-27-9},
}