Skip to main content
Språkbanken Text is a part of Språkbanken.

BibTeX

@inProceedings{szawerna-etal-2024-swedish-346227,
	title        = {Swedish Learner Essays Revisited: Further Insights into Detecting Personal Information},
	abstract     = {Personally Identifiable Information (PII) is pervasive in linguistic data, making open sharing thereof complicated from both the legal and ethical perspective. Simply redacting out the PIIs or replacing them with pseudonyms presupposes a detection step, where the personal information is identified. In this study, we expand the existing research on PII detection in unstructured data (learner essays) in Swedish, testing more Large Language Models (LLMs) on a larger amount of data. We compare three different LLMs, two Swedish (KB-BERT and AI Sweden’s RoBERTa) and one multilingual (M-BERT). We found that KB-BERT tends to be better than the other models but that there is some overlap in their performance. },
	booktitle    = {The Tenth Swedish Language Technology Conference (SLTC), 27-29 November, 2024, Linköping, Sweden},
	author       = {Szawerna, Maria Irena and Dobnik, Simon and Muñoz Sánchez, Ricardo and Volodina, Elena},
	year         = {2024},
}