@inProceedings{ilinykh-szawerna-2025-i-349004, title = {“I Need More Context and an English Translation”: Analysing How LLMs identify Personal Information in Komi, Polish, and English}, abstract = {In this paper we present a pilot study and a qualitative analysis of the errors made by three large language models (LLMs) prompted to identify personal information (PI) in texts written in languages with varying resource availability: Komi (extremely low), Polish (medium), and English (high). Our analysis shows that LLMs perform better in detection of PI when provided with JSON-eliciting prompts. We also conjecture that the rich morphology and inflectionality of languages like Komi and Polish might affect the models’ performance. The small-scale parallel dataset of text that we introduce here can be used as a starting point in developing benchmarks for evaluation of PI detection with longer textual contexts and LLMs. }, booktitle = {Proceedings of the Third Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2025), March 2, 2025, Tallinn, Estonia / Špela Arhar Holdt, Nikolai Ilinykh, Barbara Scalvini, Micaella Bruton, Iben Nyholm Debess, Crina Madalina Tudor (eds.)}, author = {Ilinykh, Nikolai and Szawerna, Maria Irena}, year = {2025}, publisher = {University of Tartu Library}, address = {Tartu, Estonia}, ISBN = {978-9908-53-121-2}, pages = {165–178}, }