BibTeX

@inProceedings{szawerna-etal-2024-detecting-336385,
	title        = {Detecting Personal Identifiable Information in Swedish Learner Essays},
	abstract     = {Linguistic data can — and often does — contain PII (Personal Identifiable Information). Both from a legal and ethical standpoint, the sharing of such data is not permissible. According to the GDPR, pseudonymization, i.e. the replacement of sensitive information with surrogates, is an acceptable strategy for privacy preservation. While research has been conducted on the detection and replacement of sensitive data in Swedish medical data using Large Language Models (LLMs), it is unclear whether these models handle PII in less structured and more thematically varied texts equally well. In this paper, we present and discuss the performance of an LLM-based PII-detection system for Swedish learner essays.},
	booktitle    = {Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024), March 21, 2024, St. Julian’s, Malta},
	author       = {Szawerna, Maria Irena and Dobnik, Simon and Muñoz Sánchez, Ricardo and Lindström Tiedemann, Therese and Volodina, Elena},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {979-8-89176-085-1},
}
Sidansvarig: sb-webb