@misc{elmerot-etal-2026-voices-360700,
title = {Voices of formerly Enslaved Corpus},
abstract = {A corpus of transcribed and annotated narratives from informants who were formerly enslaved. Texts selected are mainly in vernacular English, but partly also in standard English [eng]. There are two main parts:
Selection of narratives collected in the Federal Writers' Projct (FWP) collected in late 1930's and published in 1941.
Selection of narratives collected as part of the Documenting the American South Collection (DocSouth)
The Corpus has been developed in collaboration with Språkbanken Text.},
author = {Elmerot, Irene and Olsson, Leif-Jöran and Rönnbäck, Klas},
year = {2026},
}
@inProceedings{szawerna-suchardt-2026-fill-362803,
title = {Fill-in-the-Blanks: Automatic Generation and Evaluation of Language Models' Pseudonyms for English and Swedish Texts},
abstract = {While considerable effort has gone into developing solutions for detecting Personally Identifiable Information (PII) in linguistic data, less research has gone into automating the generation of appropriate pseudonyms and developing evaluation methods, both relevant for the creation of privacy-friendly language resources. We conduct pilot experiments using Masked and Generative Large Language Models to generate predictions for redacted PII-spans in a cloze-like fashion for English legal texts and parallel news articles in Swedish and English. Furthermore, we explore metrics for automatic evaluation of the generated pseudonyms in the legal data, and investigate the effect of part-of-speech constraints on performance. For the parallel, multilingual data, we contribute our manual PII-annotation and conduct a fine-grained error analysis across two of our pseudonym generation methods and a baseline. Our results illustrate the complexity of pseudonym evaluation and the particular challenge of automatic, at-scale evaluation as well as the models’ tendency to predict prototypical and even stereotypical answers.},
booktitle = {Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026)},
author = {Szawerna, Maria Irena and Suchardt, Jacob Lee},
year = {2026},
publisher = {European Language Resources Association (ELRA)},
ISBN = {978-2-493814-49-4},
pages = {1155--1169},
}
@inProceedings{szawerna-dobnik-2026-birds-362843,
title = {Birds of a Feather: Do Embedding Representations of Personal Information Flock Together?},
abstract = {Personally identifiable information (PII or PI) can appear in a wide variety of linguistic data, posing both ethical and legal challenges for conducting research and developing applications involving such texts. In this paper, we investigate the alignment between automatic clustering of FastText and Transformer embedding representations of personal information spans sourced from essays written by adult learners of Swedish as a second language and the general and detailed personal information labels assigned to these spans by expert annotators. Our goals are to assess the extent of overlap between the semantic categories and evaluate the semantic coherence of the human-assigned classes, which may have implications for de-identification procedures. We observe that while contextual embeddings, especially ones from a specialized word-in-context model, produce relatively good clustering results, they only partly map to the human understanding of how to classify personal information.},
booktitle = {Proceedings of the Joint Workshop on Legal and Ethical Issues in Human Language Technologies and Computational Approaches to Language Data Pseudonymization, Anonymization, De-identification, and Data Privacy (LEGAL2026 and CALD-pseudo 2026) @ LREC 2026},
author = {Szawerna, Maria Irena and Dobnik, Simon},
year = {2026},
publisher = {ELRA},
ISBN = {978-2-493814-86-9},
pages = {62--72},
}
@edited_book{skoldberg-etal-2026-svensk-360324,
title = {Svensk ordbok utgiven av Svenska Akademien },
abstract = {Denna app är utgiven av Svenska Akademien och innehåller en uppdatering av den andra upplagan av Svensk ordbok utgiven av Svenska Akademien, vilken publicerades 2021. Ordboken utarbetas vid Språkbanken, Institutionen för svenska, flerspråkighet och språkteknologi vid Göteborgs universitet. Appen är utvecklad av Petrus Wang på Wang.se i samarbete med ordboksredaktionen vid Språkbanken, Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet. Appen släpptes 28 januari 2026 i iOS App Store.},
editor = {Sköldberg, Emma and Blensenius, Kristian and Forsberg, Markus and Holmer, Louise and Landqvist, Hans and Lillieström, Ann and Petersson, Stellan and Smallbone, Nicholas and Uppström, Jonatan and Wang, Petrus},
year = {2026},
publisher = {Svenska Akademien},
address = {Stockholm},
}
@inProceedings{landqvist-2026-saol-359297,
title = {SAOL 15 = tradition och innovation!},
abstract = {Den första upplagan av Svenska Akademiens ordlista (SAOL) utkom 1874, och den femtonde upplagan utges 2026. SAOL 15 publiceras som tryckt bok, som appar för smarttelefoner och på Svenska Akademiens ordboksportal svenska.se. Efter en kort presentation av SAOL i allmänhet och SAOL 15 i synnerhet kommer Hans Landqvist att fokusera på tre frågor:
(1) Hur hanteras laddade/kontroversiella ord i SAOL 15?
(2) Hur hanteras finlandismer och andra finländska inslag i SAOL 15?
(3) Är SAOL 15 tänkt att vara en deskriptiv, en normativ eller en deskriptiv OCH normativ ordlista?
},
booktitle = {Mediespråk XXI, 2026, 27–28 januari 2026 i Astra, Åbo Akademi},
author = {Landqvist, Hans},
year = {2026},
}
@incollection{elmerot-etal-2026-person-358926,
title = {A person, a man or something in between — a short study of the gender-neutral personal pronoun “hen” and its translations.},
abstract = {Införandet av det svenska könsneutrala pronomenet ’hen’ utgör en betydande språklig förändring inom de indoeuropeiska språken. Detta pronomen skapades som ett alternativ till de könsspecifika pronomina ’hon’ och ’han’ för de fall där personens kön är okänt eller irrelevant. Även om dess användning har ökat, och det till och med ingår i svenska versioner av EU-dokument, är frågan om hur hen ska översättas till språk utan motsvarande neutralt pronomen fortfarande inte tillräckligt undersökt. Syftet med detta kapitel är att analysera befintliga översättningslösningar av pronominet ’hen’ i parallellkorpusen InterCorp v16, särskilt i tjeckiska.},
booktitle = {Korpus třicetiletý},
author = {Elmerot, Irene and Hedin, Tora and Thál, Jonáš},
year = {2026},
publisher = {Nakladatelství Lidové noviny},
address = {Prag},
ISBN = {978-80-7422-137-8},
pages = {113–124},
}
@article{holmer-2026-cykelgata-359810,
title = {Cykelgata och panelist i nya SAOL},
journal = {Klarspråk. Bulletin från Språkrådet},
author = {Holmer, Louise},
year = {2026},
volume = {2026},
number = {1},
pages = {1},
}
@misc{petersson-2026-digitala-363098,
title = {Digitala metoder för ord inom miljö- och hållbarhetsfältet},
author = {Petersson, Stellan},
year = {2026},
}
@incollection{landqvist-etal-2026-bygga-363139,
title = {Att bygga med termer och med tegel: en fallstudie av nordiskt terminologiskt (sam)arbete},
booktitle = {Elisa Risto, Riitta Kosunen, Kirsi Lepistö, Marjo Ohtamaa & Paula Rossi (red.), Svenskan i Finland 21. FÖREDRAG VID DEN TJUGOFÖRSTA SAMMANKOMSTEN FÖR BESKRIVNINGEN AV SVENSKAN I FINLAND ULEÅBORG DEN 3–4 OKTOBER 2024},
author = {Landqvist, Hans and Pilke, Nina and Nissilä, Niina},
year = {2026},
publisher = {Faculty of Humanities, University of Oulu},
address = {Oulu},
ISBN = {978-952-62-4892-9},
pages = {151–165},
}
@article{landqvist-2026-saol-360405,
title = {SAOL 15 – en uppdaterad klassiker},
abstract = {I artikeln presenteras den femtonde upplagan av Svenska Akademiens ordlista, SAOL 15, utgiven 2026.},
journal = {Språkbruk},
author = {Landqvist, Hans},
year = {2026},
number = {2026-03-05},
}
@inProceedings{greco-etal-2026-stereobusters-361361,
title = {StereoBusters at GSI:detect: LLM-Based Detection and Human Qualitative Analysis of Gender Stereotypes in Italian Short Texts},
booktitle = {Proceedings of the 9th Evaluation Campaign of Natural Language Processing and Speech Tools for Italian. Final Workshop (EVALITA 2026)},
author = {Greco, Salvatore and La Quatra, Moreno and Marchiori Manerba, Marta and Muñoz Sánchez, Ricardo and Cignarella, Alessandra Teresa},
year = {2026},
publisher = {CEUR Workshop Proceedings},
address = {Bari, Italy},
}
@article{elmerot-etal-2026-voices-362492,
title = {Voices of formerly enslaved: A new text corpus of narratives by formerly enslaved persons},
abstract = {These data consist of newly OCRed and annotated narratives, both autobiographical texts written by, and interviews with, formerly enslaved persons of African descent in the United States of America and the Caribbean, including extensive time-related and geographical metadata. The texts authored by these individuals span from the years 1795 to approximately 1900, while the interviews were conducted in the 1930s. The former are written in standardised English from that time, whereas the latter often are written down in mediated, vernacular form, causing issues in lemmatisation and part-of-speech tagging. The aim is to create openly accessible corpora that can be utilised for the purpose of researching how these formerly enslaved persons described their own lives.},
journal = {Scientific Data},
author = {Elmerot, Irene and Olsson, Leif-Jöran and Rönnbäck, Klas},
year = {2026},
volume = {13},
number = {1},
}
@misc{siegert-etal-2026-proceedings-362841,
title = {Proceedings of the Joint Workshop on Legal and Ethical Issues in Human Language Technologies and Computational Approaches to Language Data Pseudonymization, Anonymization, De-identification, and Data Privacy (LEGAL2026 and CALD-pseudo 2026) @ LREC 2026},
author = {Siegert, Ingo and Szawerna, Maria Irena and Choukri, Khalid and Dobnik, Simon and Kamocki, Paweł and Lindström Tiedemann, Therese and Lison, Pierre and Muñoz Sánchez, Ricardo and Pilán, Ildikó and Södergård, Lisa and Talmoudi, Kossay and Volodina, Elena and Vu, Xuan-Son},
year = {2026},
publisher = {ELRA},
address = {Paris},
ISBN = { 978-2-493814-86-9},
}
@inProceedings{johansson-etal-2026-exploring-363172,
title = {Exploring the similarities and differences between VLM-driven and
traditional OCR for Historical Swedish Data},
abstract = {Recent Swedish OCR efforts rely primarily on traditional OCR methods, including deep CNN–LSTM hybrid neural networks and transformer-based models. Some approaches have also demonstrated the applicability of VLM-driven OCR to historical material. However, to date, no studies have examined in depth the performance of VLM-based OCR on historical Swedish sources. In this paper, we ask: How do transformers and VLMs differ in character- and word-level recognition performance across typefaces, and what qualitative differences can be observed in their error patterns? We show that fine-tuned versions of the Alibaba Cloud Qwen3-VL-8B-Instruct and Qwen3-VL-2B-Instruct,
combined with a simple repetition-trimming step, outperform conventional OCR systems. Remaining errors are primarily attributable to challenges associated with the Blackletter typeface and formatting issues, such as missing or extra line breaks, characters, and spaces. Even when characters are correctly recognized, formatting inconsistencies can substantially increase transcription error rates.},
booktitle = {Proceedings of The Fourth Workshop on the Role of Resources in the Age of Large Language Models (RESOURCEFUL 2026), May 11, 2026, Palma de Mallorca, Spain / Felix Morger, Nikolai Ilinykh, Barbara Scalvini, Simon Dobnik, Dana Dannélls (eds.)},
author = {Johansson, Martin and Waginder, Selma and Dannélls, Dana},
year = {2026},
publisher = {ELRA Language Resources Association (ELRA)},
address = {Paris},
ISBN = {978-2-493814-94-4},
pages = {193–199},
}
@edited_book{holmer-etal-2026-svenska-359503,
title = {Svenska Akademiens ordlista över svenska språket, 15 upplagan},
abstract = {SAOL är en ordlista över det svenska samtidsspråket. SAOL 15 ger uppgifter om ordens stavning och böjning och i många fall uttal och betydelse. SAOL 15 finns också tillgänglig som app för Android och iOS och på ordboksportalen svenska.se.},
editor = {Holmer, Louise and Blensenius, Kristian and Borin, Lars and Forsberg, Markus and Landqvist, Hans and Lillieström, Ann and Petersson, Stellan and Sköldberg, Emma and Smallbone, Nicholas and Uppström, Jonatan},
year = {2026},
publisher = {Norstedts},
address = {Stockholm},
ISBN = {978-91-1-314575-4},
}
@inProceedings{elmerot-2026-sprakets-361997,
title = {Språkets makt i praktiken, från pronomen till demokratiskt försvar},
abstract = {En keynote-presentation på konferensen för Sveriges facköversättare och auktoriserade tolkar (SFÖ-SAT) 2026. Fokus ligger på hur språk är makt och vad språkarbetare kan tänka på när de får sådan makt i sina händer eller munnar.},
booktitle = {Center for Open Science},
author = {Elmerot, Irene},
year = {2026},
}
@inProceedings{landqvist-2026-saol-360807,
title = {SAOL 15: några nyheter i den senaste upplagan av Svenska Akademiens ordlista},
abstract = {Den första upplagan av Svenska Akademiens ordlista (SAOL) utkom 1874, och den femtonde upplagan utgavs 2026. SAOL 15 är publicerad som tryckt bok, som appar för smarttelefoner och på Svenska Akademiens ordboksportal svenska.se. Efter en kort presentation av SAOL i allmänhet och SAOL 15 i synnerhet ligger fokus på tre frågor: (1) Hur hanteras känsliga/laddade/kontroversiella ord i SAOL 15? (2) Hur hanteras finlandismer och andra finländska inslag i SAOL 15? (3) Är SAOL 15 tänkt att vara en deskriptiv, en normativ, en deskriptiv OCH normativ ordlista och/eller en ”och så vidare” ordlista? },
booktitle = {Språkvårdsdagen/Hugo Bergroth-dagen 2026},
author = {Landqvist, Hans},
year = {2026},
}