Skip to main content


	title        = {Reliability of Automatic Linguistic Annotation: Native vs Non-native Texts },
	abstract     = {We present the results of a manual evaluation of the performance of automatic linguistic annotation on three different datasets: (1) texts written by native speakers, (2) essays written by second language (L2) learners of Swedish in the original form and (3) the normalized versions of learner-written essays. The focus of the evaluation is on lemmatization, POS-tagging, word sense disambiguation, multi-word detection and dependency annotation. Two annotators manually went through the automatic annotation on a subset of the datasets and marked up all deviations based on their expert judgments and the guidelines provided. We report Inter-Annotator Agreement between the two annotators and accuracy for the linguistic annotation quality for the three datasets, by levels and linguistic features.},
	booktitle    = {Selected Papers from the CLARIN Annual Conference 2021, Virtual Event, 2021, 27–29 September / edited by Monica Monachini and Maria Eskevich},
	author       = {Volodina, Elena and Alfter, David and Lindström Tiedemann, Therese  and Lauriala, Maisa  and Piipponen, Daniala },
	year         = {2022},
	publisher    = {Linköping Electronic Conference },
	address      = {Linköping, Sweden},
	ISBN         = { 978-91-7929-444-1},
	pages        = {151--167},