Hoppa till huvudinnehåll
Språkbanken Text är en avdelning inom Språkbanken.

BibTeX

@inProceedings{masciolini-etal-2025-multigec-348546,
	title        = {The MultiGEC-2025 Shared Task on Multilingual Grammatical Error Correction at NLP4CALL},
	abstract     = {This paper reports on MultiGEC-2025, the first shared task in text-level Multilingual Grammatical Error Correction. The shared task features twelve European languages (Czech, English, Estonian, German, Greek, Icelandic, Italian, Latvian, Russian, Slovene, Swedish and Ukrainian) and is organized into two tracks, one for systems producing minimally corrected texts, thus preserving as much as possible of the original language use, and one dedicated to systems that prioritize fluency and idiomaticity. We introduce the task setup, data, evaluation metrics and baseline; present results obtained by the submitted systems and discuss key takeaways and ideas for future work.},
	booktitle    = {Proceedings of the 14th Workshop on Natural Language Processing for Computer Assisted Language Learning},
	author       = {Masciolini, Arianna and Caines, Andrew and De Clercq, Orphée and Kruijsbergen, Joni and Kurfalı, Murathan and Muñoz Sánchez, Ricardo and Volodina, Elena and Östling, Robert},
	year         = {2025},
	publisher    = {University of Tartu Library},
	address      = {Tartu, Tallinn},
	ISBN         = {978-9908-53-112-0},
}

@techreport{masciolini-etal-2025-overview-347102,
	title        = {An overview of Grammatical Error Correction for the twelve MultiGEC-2025 languages},
	abstract     = {This overview is complementary to the comprehensive dataset description article for MultiGEC – a dataset for Multilingual Grammatical Error Correction including data for twelve European languages: Czech, English, Estonian, German, Greek, Icelandic, Italian, Latvian, Russian, Slovene, Swedish and Ukrainian.
It is well-known that in the field of Natural Language Processing (NLP) most publications tend to focus on the English language. While this is due to historical reasons (ease of publication, greater outreach, increased number of citations, etc.), it does leave other languages at a disadvantage across multiple tasks. The MultiGEC dataset was created as an attempt to counteract this effect. This report provides a historical overview of the evolution of GEC for each of the twelve languages in this dataset and provides a context for the work on the dataset and the related MultiGEC-2025 shared task.},
	author       = {Masciolini, Arianna and Caines, Andrew and De Clercq, Orphée and Kruijsbergen, Joni and Kurfalı, Murathan and Muñoz Sánchez, Ricardo and Volodina, Elena and Östling, Robert and Allkivi, Kais and Arhar Holdt, Špela and Auzin̦a, Ilze and Darģis, Roberts and Drakonaki, Elena and Frey, Jennifer-Carmen and Glišic, Isidora and Kikilintza, Pinelopi and Nicolas, Lionel and Romanyshyn, Mariana and Rosen, Alexandr and Rozovskaya, Alla and Suluste, Kristjan and Syvokon, Oleksiy and Tantos, Alexandros and Touriki, Despoina-Ourania and Tsiotskas, Konstantinos and Tsourilla, Eleni and Varsamopoulos, Vassilis and Wisniewski, Katrin and Žagar, Aleš and Zesch, Torsten},
	year         = {2025},
	publisher    = {University of Gothenburg},
	address      = {Gothenburg, Sweden},
}

@article{cousse-adesam-2025-exploring-346846,
	title        = {Exploring the language of Swedish social media: A contrastive corpus analysis},
	abstract     = {This article explores the language of social media by analyzing a selection of linguistic features in four corpora of Swedish social media available at Språkbanken Text: Blog mix, Familjeliv, Flashback, and Twitter. Previous research describes the language of these corpora as informal, spoken-like, unedited, non-standard, and innovative. Our corpus analysis confirms the informal and spoken-like nature of social media, while also showing that these traits are unevenly distributed across the various social media corpora and that they are also present in other traditional written corpora, such as novels. Our findings also reveal that the social media corpora show traits of involved and interactional language.},
	journal      = {Nordic Journal of Linguistics},
	author       = {Coussé, Evie and Adesam, Yvonne},
	year         = {2025},
}

@inProceedings{francis-2025-language-348452,
	title        = {Language of the Swedish Manosphere with Swedish FrameNet},
	abstract     = {The manosphere is a loose group of online communities centralised around the themes of anti-feminism, misogyny, racism, and hetero-masculinity. It has gained a reputation for violent extremism, particularly from members of the involuntary celibate (incel) community. Sweden sees one of the highest volumes of online traffic to well-known incel forums in all of Europe. In spite of this, there is little information on manosphere/incel cultre in Swedish. This paper uses posts from Flashback’s manosphere subforum automatically annotated with Swedish FrameNet to analyse the language community in a Swedish context. To do so, a lexicon for the Swedish manosphere was created and terms of interest were identified in the Swedish discourse. Analysis of prominent semantic frames linked to these terms of interest presents a detailed look into the language of the Swedish manosphere.},
	booktitle    = {25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)},
	author       = {Francis, Emilie},
	year         = {2025},
	publisher    = {University of Tartu Library},
	address      = {Tartu, Estonia},
	pages        = {10},
}

@incollection{borin-lyngfelt-2025-framenets-347629,
	title        = {Framenets and ConstructiCons},
	booktitle    = {The Cambridge Handbook of Construction Grammar, ed. by Mirjam Fried & Kiki Nikoforidou},
	author       = {Borin, Lars and Lyngfelt, Benjamin},
	year         = {2025},
	publisher    = {Cambridge University Press},
	address      = {Cambridge},
	ISBN         = { 9781009049139},
	pages        = {71--100},
}

@inProceedings{szawerna-etal-2025-devils-348547,
	title        = {The Devil’s in the Details: the Detailedness of Classes Influences Personal Information Detection and Labeling},
	abstract     = {In this paper, we experiment with the effect of different levels of detailedness or granularity—understood as i) the number of classes, and ii) the classes’ semantic depth in the sense of hypernym and hyponym relations — of the annotation of Personally Identifiable Information (PII) on automatic detection and labeling of such information. We fine-tune a Swedish BERT model on a corpus of Swedish learner essays annotated with a total of six PII tagsets at varying levels of granularity. We also investigate whether the presence of grammatical and lexical correction annotation in the tokens and class prevalence have an effect on predictions. We observe that the fewer total categories there are, the better the overall results are, but having a more diverse annotation facilitates fewer misclassifications for tokens containing  correction annotation. We also note that the classes’ internal diversity has an effect on labeling. We conclude from the results that while labeling based on the detailed annotation is difficult because of the number of classes, it is likely that models trained on such annotation rely more on the semantic content captured by contextual word embeddings rather than just the form of the tokens, making them more robust against nonstandard language.},
	booktitle    = {Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025), March 3–4, 2025 Tallinn, Estonia) / Richard Johansson and Sara Stymne (eds.)},
	author       = {Szawerna, Maria Irena and Dobnik, Simon and Muñoz Sánchez, Ricardo and Vu, Xuan-Son and Volodina, Elena},
	year         = {2025},
	publisher    = {University of Tartu Library},
	address      = {Tartu, Estonia},
	ISBN         = {978-9908-53-109-0},
	pages        = { 697–708},
}

@misc{munozsanchez-etal-2025-proceedings-348545,
	title        = {Proceedings of the 14th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2025)},
	abstract     = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language
Learning (NLP4CALL) is a meeting place for researchers working on integrating Natural Lan-
guage Processing and Speech Technologies in CALL systems and exploring the theoretical and
methodological issues arising in this connection. The latter includes, among others, the in-
tegration of insights from Second Language Acquisition (SLA) research and the promotion of
“Computational SLA” through setting up Second Language research infrastructures.
The intersection of Natural Language Processing (or Language Technology / Computational
Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings
“understanding” of language to CALL tools, thus making CALL intelligent. This fact has in-
spired the name for this area of research — Intelligent CALL, ICALL for short. As the definition
suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech
Technology, ICALL researchers need good insights into second language acquisition theories and
practices, as well as knowledge of second language pedagogy and didactics. Therefore, this work-
shop invites a wide range of ICALL-relevant research, including studies where NLP-enriched
tools are used for testing SLA and pedagogical theories, and those where SLA theories (peda-
gogical practices or empirical data) and modeled using ICALL tools. The NLP4CALL workshop
series is aimed at bringing together competences from these areas for sharing experiences and
brainstorming around the future of the field.},
	author       = {Muñoz Sánchez, Ricardo and Alfter, David and Volodina, Elena and Kallas, Jelena},
	year         = {2025},
	publisher    = {University of Tartu Library},
	address      = {Tartu, Estonia},
	ISBN         = {978-9908-53-112-0},
}

@incollection{petersson-2025-progressive-346547,
	title        = {Progressive aspect in Swedish and English: a case study of 'ing' and 'hålla på att'},
	booktitle    = {Building meanings, building connections. A festschrift in honor of Makoto Kanazawa and Christopher Tancredi (Sudo, Yasutada & Uegaki, Wataru, eds.).},
	author       = {Petersson, Stellan},
	year         = {2025},
}