Skip to main content

BibTeX

@inProceedings{masciolini-2024-bootstrapping-338425,
	title        = {Bootstrapping the Annotation of UD Learner Treebanks},
	abstract     = {Learner data comes in a variety of formats, making corpora difficult to compare with each other. Universal
Dependencies (UD) has therefore been proposed as a replacement for the various ad-hoc annotation schemes. Nowadays, the time-consuming task of building a UD treebank often starts with a round of automatic annotation. The performance of the currently available tools trained on standard language, however, tends to decline substantially upon application to learner text. Grammatical errors play a major role, but a significant performance gap has been observed even between standard test sets and normalized learner essays. In this paper, we investigate how to best bootstrap the annotation of UD learner corpora. In particular, we want to establish whether Target Hypotheses (THs), i.e. grammar-corrected learner sentences, are suitable training data for fine-tuning a parser aimed for original (ungrammatical) L2 material. We perform experiments using English and Italian data from two of the already available UD learner corpora. Our results show manually annotated THs to be highly beneficial and suggest that even automatically parsed sentences of this kind might be helpful, if available in sufficiently large amounts.},
	booktitle    = {Proceedings of the 17th Workshop on Building and Using Comparable Corpora (BUCC) @ LREC-COLING 2024, 20 May, 2024, Torino, Italia},
	author       = {Masciolini, Arianna},
	year         = {2024},
	publisher    = {ELRA },
	ISBN         = {978-2-493814-31-9},
}