Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{munozsanchez-etal-2024-harnessing-342122,
	title        = {Harnessing GPT to Study Second Language Learner Essays: Can We Use Perplexity to Determine Linguistic Competence?},
	abstract     = {Generative language models have been used to study a wide variety of phenomena in NLP. This allows us to better understand the linguistic capabilities of those models and to better analyse the texts that we are working with. However, these studies have mainly focused on text generated by L1 speakers of English. In this paper we study whether linguistic competence of L2 learners of Swedish (through their performance on essay tasks) correlates with the perplexity of a decoder-only model (GPT-SW3). We run two sets of experiments, doing both quantitative and qualitative analyses for each of them. In the first one, we analyse the perplexities of the essays and compare them with the CEFR level of the essays, both from an essay-wide level and from a token level. In our second experiment, we compare the perplexity of an L2 learner essay with a normalised version of it. We find that the perplexity of essays tends to be lower for higher CEFR levels and that normalised essays have a lower perplexity than the original versions. Moreover, we find that different factors can lead to spikes in perplexity, not all of them being related to L2 learner language.},
	booktitle    = {Proceedings of the 19th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2024), June 20, 2024, Mexico City, Mexico},
	author       = {Muñoz Sánchez, Ricardo and Dobnik, Simon and Volodina, Elena},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = { Mexico City, Mexico},
	ISBN         = {979-8-89176-100-1},
}