BibTeX

@inProceedings{borin-holmer-2024-tradita-333774,
	title        = {Tradita innovare, innovata tradere. The Gothenburg approach to computational lexicography},
	abstract     = {Swedish computational lexicography has a long history at the University of Gothenburg, both in its primary role as a central aspect of the scientific study of vocabulary and also as an infrastructural component for conducting research based on language data. Starting in the 1960s, the Språkdata research group pioneered corpus-supported lexicography for Swedish, forming the basis for successive editions of the two main descriptive dictionaries of contemporary Swedish, SAOL and SO. Language technological lexical resources for Swedish have been developed by the research unit/research infrastructure Språkbanken Text since the turn of the millennium, most recently in the framework of the Swedish FrameNet++initiative. After two decades of separation, these two largely mutually independently developed strands of computational lexicography have now joined forces under the umbrella of Språkbanken’s lexical research infrastructure to advance the field technically, methodologically, and scientifically.},
	booktitle    = {Proceedings of the Huminfra Conference (HiC 2024), Linköping Electronic Conference Proceedings 205: 41–50 // (Eds. Elena Volodina, Gerlof Bouma, Markus Forsberg, Dimitrios Kokkinakis, David Alfter, Mats Fridlund, Christian Horn, Lars Ahrenberg, Anna Blåder)},
	author       = {Borin, Lars and Holmer, Louise},
	year         = {2024},
	publisher    = {LiU Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-512-2},
}

@misc{volodina-etal-2024-proceedings-336386,
	title        = {Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024)},
	author       = {Volodina, Elena and Alfter, David and Dobnik, Simon and Lindström Tiedemann, Therese and Muñoz Sánchez, Ricardo and Szawerna, Maria Irena and  Vu, Xuan-Son},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{szawerna-etal-2024-detecting-336385,
	title        = {Detecting Personal Identifiable Information in Swedish Learner Essays},
	abstract     = {Linguistic data can — and often does — contain PII (Personal Identifiable Information). Both from a legal and ethical standpoint, the sharing of such data is not permissible. According to the GDPR, pseudonymization, i.e. the replacement of sensitive information with surrogates, is an acceptable strategy for privacy preservation. While research has been conducted on the detection and replacement of sensitive data in Swedish medical data using Large Language Models (LLMs), it is unclear whether these models handle PII in less structured and more thematically varied texts equally well. In this paper, we present and discuss the performance of an LLM-based PII-detection system for Swedish learner essays.},
	booktitle    = {Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024)},
	author       = {Szawerna, Maria Irena and Dobnik, Simon and Muñoz Sánchez, Ricardo and Lindström Tiedemann, Therese and Volodina, Elena},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
}

@article{landqvist-2024-finlandssvenska-335636,
	title        = {Finlandssvenska översättare i Svenskt översättarlexikon},
	abstract     = {Svenskt översättarlexikon innehåller artiklar om sverigesvenska och finlandssvenska översättare. Vilka översättare i lexikonet kan sägas vara finlandssvenskar? Och finns det några finlandssvenska översättare som inte ingår i lexikonet – men som borde göra det?

},
	journal      = {Språkbruk},
	author       = {Landqvist, Hans},
	year         = {2024},
	volume       = {2024},
	number       = {2024-03-07},
}

@inProceedings{broden-etal-2024-samforfattande-335726,
	title        = {Samförfattande som datadriven tvärvetenskap: Pragmatiska lärdomar från SweTerror-projektet
},
	abstract     = {Terrorism i svensk politik (SweTerror) är ett storskaligt tvärvetenskapligt forskningsprojekt med forskare från såväl human- och samhällsvetenskaperna som datavetenskaperna. Samtidigt använder och utvecklar SweTerror nationell forskningsinfrastruktur för riksdagsdata. Detta paper beskriver användningen av samförfattande som en datadriven tvärvetenskaplig praktik för att integrera olika vetenskapliga perspektiv och skapa samsyn i projektforskningen. Vi tar fasta på betydelsen av valet att koncentrera samarbetsformen kring konferenspapers inom specifikt digital humaniora och diskuterar erfarenheten av att samskrivande försvagar vetenskapligt revirtänkande, liksom ett iterativt förhållningssätt till forskningsdata kopplade till forskningsinfrastrukturer under uppbyggnad. Avslutningsvis betonar vi datadrivet samförfattande som en pragmatisk praktik för att stärka kollaborativt samarbete och kunskapsbryggor inom en tvärvetenskaplig forskargrupp.},
	booktitle    = {Proceedings of the Huminfra Conference (HiC 2024), 10-11 January, 2024, Gothenburg, Sweden},
	author       = {Brodén, Daniel and Fridlund, Mats and Olsson, Leif-Jöran and Ängsal, Magnus Pettersson and Öhberg, Patrik},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-512-2},
}

@inProceedings{bouma-etal-2024-konsten-333683,
	title        = {Konsten att bedriva svensk ordforskning utan att kränka upphovsrätten},
	abstract     = {Vi beskriver KB-labb och Språkbanken Texts samarbete för att underlätta ordforskning på de upphovsrätts-skyddade korpusar som finns i Kungliga bibliotekets samlingar. Satsningen har hittils lett till två öppna datasamlingar, Kubord 1 och 2, som ger tillgång till ordstatistik och ordsamförekomststatistik. Vi beskriver även Kubord-fastText, en samling vektormodeller som är baserade på samma korpusar, som är underutveckling},
	booktitle    = {Proceedings of the Huminfra Conference (HiC 2024), Gothenburg, 10–11 January, 2024  / eds. Elena Volodina, Gerlof Bouma, Markus Forsberg, Dimitrios Kokkinakis, David Alfter, Mats Fridlund, Christian Horn, Lars Ahrenberg, Anna Blåder},
	author       = {Bouma, Gerlof and Forsberg, Markus and Sikora, Justyna and Sköldberg, Emma},
	year         = {2024},
	publisher    = { Linköping University Electronic Press},
	address      = {Linköping },
	ISBN         = {978-91-8075-512-2},
}

@inProceedings{munozsanchez-etal-2024-names-336384,
	title        = {Did the Names I Used within My Essay Affect My Score? Diagnosing Name Biases in Automated Essay Scoring},
	abstract     = {Automated essay scoring (AES) of second-language learner essays is a high-stakes task as it can affect the job and educational opportunities a student may have access to. Thus, it becomes imperative to make sure that the essays are graded based on the students’ language proficiency as opposed to other reasons, such as personal names used in the text of the essay. Moreover, most of the research data for AES tends to contain personal identifiable information. Because of that, pseudonymization becomes an important tool to make sure that this data can be freely shared. Thus, our systems should not grade students based on which given names were used in the text of the essay, both for fairness and for privacy reasons. In this paper we explore how given names affect the CEFR level classification of essays of second language learners of Swedish. We use essays containing just one personal name and substitute it for names from lists of given names from four different ethnic origins, namely Swedish, Finnish, Anglo-American, and Arabic. We find that changing the names within the essays has no apparent effect on the classification task, regardless of whether a feature-based or a transformer-based model is used.},
	booktitle    = {Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024)},
	author       = {Muñoz Sánchez, Ricardo and Dobnik, Simon and Szawerna, Maria Irena and Lindström Tiedemann, Therese and Volodina, Elena},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{holmer-etal-2024-saol-333679,
	title        = {SAOL och svensk språkvetenskaplig infrastruktur – nu och i framtiden},
	abstract     = {Svenska Akademiens ordlista (SAOL 14, 2015)  spelar en viktig roll inom svensk språkvetenskaplig infrastruktur, något som framkommer i denna artikel. Vidare presenteras preliminära resultat av en undersökning av hur frekventa uppslagsorden i SAOL egentligen är i olika delkorpusar med modern allmänspråklig svenska. För att ordlistan även fortsättningsvis ska kunna användas inom svensk ordforskning, vid språkstudier m.m., men också bli mer central inom språkteknologiska sammanhang, är det avgörande att SAOL:s uppslagsord vilar på vetenskaplig grund, moderna språkteknologiska metoder och uppdaterade korpusmaterial. Fokus i artikeln ligger på de uppslagsord som inte finns belagda i korpusmaterialet, och som därmed kan tänkas mönstras ut inför den kommande femtonde upplagan.},
	booktitle    = {Proceedings of the Huminfra Conference (HiC 2024) (eds. Elena Volodina, Gerlof Bouma, Markus Forsberg, Dimitrios Kokkinakis, David Alfter, Mats Fridlund, Christian Horn, Lars Ahrenberg, Anna Blåder)},
	author       = {Holmer, Louise and Lillieström, Ann and Sköldberg, Emma and Uppström, Jonatan},
	year         = {2024},
	ISBN         = {978-91-8075-512-2},
}

@incollection{hammarlin-etal-2024-fearing-336154,
	title        = {Fearing mRNA - A mixed methods study of vaccine rumours
},
	abstract     = {There are well-spread ideas among vaccine-critical individuals around the
world that “new” vaccines might be more dangerous to health than other, “traditional” vaccines, which can lead to vaccine hesitancy; the “delay in acceptance or refusal of vaccination despite availability of vaccination services”. For example, a recurring
remark made in social media is that mRNA technology resembles a chip that
alters the human DNA, which might permanently and irreparably damage the
immune system. These ideas sometimes take the shape of rumours and conspiracy theories. Drawing on rumour theories and social cognitive perspectives, the aim of this chapter is to account
for the purpose and the spreading of medical rumours that encircle mRNA
COVID-19 vaccines. Our research questions are: How are rumours concerning
mRNA expressed and established? In terms of trust and distrust, what function do the rumours have?},
	booktitle    = {In Vaccine Hesitancy in the Nordic Countries - Trust and Distrust During the COVID-19 Pandemic},
	author       = {Hammarlin, Mia-Marie and Kokkinakis, Dimitrios and Miegel, Fredrik and Stoencheva, Jullietta},
	year         = {2024},
	publisher    = {Routledge - Taylor & Francis Group},
	address      = {New York},
	ISBN         = {978-1-032-30599-8},
	pages        = {157--184},
}

@inProceedings{kokkinakis-2024-from-336089,
	title        = {From Zipf distribution to Universal Dependencies - Interactive Notebooks for Swedish Text Analysis 
},
	abstract     = {Notebook-based  environments  are  powerful  (web-based)  interactive  development  resources  for  conducting exploratory (textual) data analysis (EDA). These environments allow the embedding of code  (code  snippets  in  ‛code  cells’)  which  can  be  easily  executed  with  the  results  immediately  presented  into  the  user’s  window.  This  paper  introduces  some  basic  exploratory  tools  and  techniques using JupyterLab notebooks, applied to Swedish using a subcorpus that address various topics related to the COVID-19 pandemic published during January-December 2021},
	booktitle    = {Proceedings of the Huminfra Conference (HiC 2024)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2024},
	publisher    = {Linköping Electronic Conference Proceedings 205},
	ISBN         = {978-91-8075-512-2},
}

@article{lindahl-borin-2024-annotation-333043,
	title        = {Annotation for computational argumentation analysis: Issues and perspectives},
	abstract     = {Argumentation has long been studied in a number of disciplines, including several branches of linguistics. In recent years, computational processing of argumentation has been added to the list, reflecting a general interest from the field of natural language processing (NLP) in building natural language understanding systems for increasingly intricate language phenomena. Computational argumentation analysis – referred to as argumentation mining in the NLP literature – requires large amounts of real-world text with manually analyzed argumentation. This process is known as annotation in the NLP literature and such annotated datasets are used both as “gold standards” for assessing the quality of NLP applications and as training data for the machine learning algorithms underlying most state of the art approaches to NLP. Argumentation annotation turns out to be complex, both because argumentation can be complex in itself and because it does not come across as a unitary phenomenon in the literature. In this survey we review how argumentation has been studied in other fields, how it has been annotated in NLP and what has been achieved so far. We conclude with describing some important current and future issues to be resolved.},
	journal      = {Language and Linguistics Compass},
	author       = {Lindahl, Anna and Borin, Lars},
	year         = {2024},
	volume       = {18},
	number       = {1},
}

@inProceedings{lofgren-dannells-2024-post-336065,
	title        = {Post-OCR Correction of Digitized Swedish Newspapers with ByT5},
	abstract     = {Many collections of digitized newspapers suffer from poor OCR quality, which impacts readability, information retrieval, and analysis of the material. Errors in OCR output can be reduced by applying machine translation models to translate it into a corrected version. Although transformer models show promising results in post-OCR correction and related tasks in other languages, they have not yet been explored for correcting OCR errors in Swedish texts. This paper presents a post-OCR correction model for Swedish 19th to 21th century newspapers based on the pre-trained transformer model ByT5. Three versions of the model were trained on different mixes of training data. The best model, which achieved a 36\% reduction in CER, is made freely available and will be integrated into the automatic processing pipeline of Språkbanken Text, a Swedish language technology infrastructure containing modern and historical written data.},
	booktitle    = {Proceedings of the 8th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2024)},
	author       = {Löfgren , Viktoria  and Dannélls, Dana},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {United States Pennsylvania East Stroudsburg},
}

@inProceedings{humlesjo-etal-2024-queerlit-334589,
	title        = {Queerlit – a bibliography of Swedish fiction with LGBTQI topics},
	abstract     = {This paper summarizes the project Queerlit: Metadata and Searchability for LGBTQ+ Literary Heritage 2020-2023 and discusses some challenges in the development of this resource. The Queerlit project consist of four parts: 1. Creating a bibliography of Swedish fiction with LGBTQI themes 2. Creating a Swedish thesaurus (QLIT), adapted from the of the linked open data thesaurus Homosaurus 3. Assigning all material in the bibliography with subject headings from QLIT. 4. A web user interface for searching the material All four parts are integrated with the Swedish union catalog, Libris, making the results of the project available for all under a CC0 license. QLIT is the first external thesaurus integrated in the linked open data framework used in the technical platform of Libris, XL. The bibliography spans from rune stones from the 7th century to recently published fiction. When applying subject headings for the material both general aspects of the work and specific LGBTQI topics are described, making this the most comprehensive retrospective indexing project of Swedish literature to date. The underlying knowledge organization is made a prominent method of interacting with the search interface, which is empirically designed around the needs of various user groups.},
	booktitle    = {Proceedings of the Huminfra Conference,  10-11 January 2024, Gothenburg, Sweden / Editors: Elena Volodina, Gerlof Bouma, Markus Forsberg, Dimitrios Kokkinakis, David Alfter, Mats Fridlund, Christian Horn, Lars Ahrenberg, Anna Blåder},
	author       = {Humlesjö, Siska  and Bergenmar, Jenny and Matsson, Arild},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-512-2},
}

@misc{volodina-etal-2024-proceedings-335190,
	title        = {Proceedings of the Huminfra Conference (HiC 2024), 10-11 January, 2024, Gothenburg, Sweden},
	author       = {Volodina, Elena and Bouma, Gerlof and Forsberg, Markus and Kokkinakis, Dimitrios and Alfter, David and Fridlund, Mats and Horn, Christian and Ahrenberg, Lars and Blåder, Anna},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-512-2},
}

@incollection{pfaff-bouma-2024-npegl-335993,
	title        = {The NPEGL noun phrase database: design and construction },
	booktitle    = { Bech, Kristin & Pfaff, Alexander (eds.), Noun phrases in early Germanic languages},
	author       = {Pfaff, Alexander and Bouma, Gerlof},
	year         = {2024},
	publisher    = {Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-96110-467-3},
	pages        = {1–32},
}

@article{berdicevskis-etal-2024-drop-326112,
	title        = {To drop or not to drop? Predicting the omission of the infinitival marker in a Swedish future construction},
	journal      = {Corpus Linguistics and Linguistic Theory},
	author       = {Berdicevskis, Aleksandrs and Coussé, Evie and Koplenig, Alexander and Adesam, Yvonne},
	year         = {2024},
	volume       = {20},
	number       = {1},
	pages        = {219–261},
}

@inProceedings{belmonte-etal-2024-automatic-336253,
	title        = {Automatic Detection of Rhythmic Features in Pathological Speech of MCI and Dementia Patients
},
	abstract     = {The presence of linguistic alterations represents one of the prodromal signs of cognitive decline associated with dementia. In recent years, a growing body of work has been devoted to the development of algorithms for the automatic linguistic analysis of both oral and written texts, with diagnostic purposes. The extraction of Digital Linguistic Biomarkers from patients'
verbal productions can indeed provide a rapid, ecological, and cost-effective system for large-scale screening of the pathology. This article contributes to the ongoing research in the field by exploring a traditionally less studied aspect of language in dementia, namely the rhythmic characteristics of speech. In particular, the paper focuses on the automatic detection of rhythmic features in Italian connected speech. A landmark-based system was developed and evaluated to segment the speech flow into vocalic and consonantal intervals and to calculate several rhythmic metrics. Additionally, the reliability of these metrics in identifying MCI and dementia patients was tested.},
	booktitle    = {RaPID-5: Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various  forms of cognitive/psychiatric/developmental impairments},
	author       = {Belmonte, Marica and Gagliardi, Gloria and Kokkinakis, Dimitrios and Tamburini, Fabio},
	year         = {2024},
	publisher    = {European Language Resources Association (ELRA)},
	ISBN         = {978-2-493814-11-1},
}

@inProceedings{masciolini-toth-2024-stund-335974,
	title        = {STUnD: ett Sökverktyg för Tvåspråkiga Universal Dependencies-trädbanker },
	abstract     = {Föreliggande artikel introducerar STUND, ett Sökverktyg för Tvåspråkiga Universal Dependencies-trädbanker som möjliggör parallella syntaktiska sökningar. Vi demonstrerar dess praktiska tillämpning i en fallstudie på tempusformen presens perfekt i svenska och engelska. Resultaten visar att presens perfekt används i ungefär lika stor utsträckning i båda språken, men att det förekommer viss variation som verkar bero på språkspecifika konventioner och översättningsstrategier. },
	booktitle    = {Proceedings of the Huminfra Conference (HiC 2024) },
	author       = {Masciolini, Arianna and Tóth, Márton András},
	year         = {2024},
	ISBN         = {978-91-8075-512-2},
}

@article{landqvist-skoldberg-2024-interjektioner-336473,
	title        = {Interjektioner som lexikografisk utmaning. En fallstudie av interjektioner med engelskt ursprung utifrån Svensk ordbok utgiven av Svenska Akademien},
	abstract     = {In this article, a qualitative and, to some extent, comparative metalexicographic case study is reported. The study will answer two research questions: (1) How are the interjections "sorry", "shit" and "wow" described in The Contemporary Dictionary of the Swedish Academy (SO) compared to the corresponding dictionary articles in The Danish Dictionary (DDO) and the Norwegian Academy’s Dictionary (NAOB) and how can the SO descriptions be developed?; (2) How can the interjections "yes", "nice/najs" and "woho/wohoo" be analyzed and then described in new dictionary articles in an updated version of SO? The point of departure for answering both RQs is a number of information categories that are common in dictionary articles. Furthermore, the use of the current interjections in contemporary corpora and text collections for Swedish are crucial for the investigation. The results of the study show that interjections as a category implies several challenges for lexicographers regarding information about their spelling, pronunciation, and inflection, meaning, language examples, usage comments as well as information about their establishment, origin, and kinship. Finally, some suggestions are presented for how the description of interjections in the dictionary can be developed.},
	journal      = {ASLA:s skriftserie/ASLA Studies in Applied Linguistics},
	author       = {Landqvist, Hans and Sköldberg, Emma},
	year         = {2024},
	volume       = {31},
	pages        = {26--55},
}

@inProceedings{szawerna-2024-stanza-336413,
	title        = {Can Stanza be Used for Part-of-Speech Tagging Historical Polish?},
	abstract     = {The goal of this paper is to evaluate the performance of Stanza, a part-of-speech (POS) tagger developed for modern Polish, on historical text to assess its possible use for automating the annotation of other historical texts. While the issue of the reliability of utilizing POS taggers on historical data has been previously discussed, most of the research focuses on languages whose grammar differs from Polish, meaning that their results need not be fully applicable in this case. The evaluation of Stanza is conducted on two sets of 10286 and 3270 manually annotated tokens from a piece of historical Polish writing (1899), and the errors are analyzed qualitatively and quantitatively. The results show a good performance of the tagger, especially when it comes to Universal Part-of-Speech (UPOS) tags, which is promising for utilizing the tagger for automatic annotation in larger projects, and pinpoint some common features of misclassified tokens.},
	booktitle    = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop},
	author       = {Szawerna, Maria Irena},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {979-8-89176-090-5},
}

@inProceedings{ahlfeldt-matsson-2024-digarv-334595,
	title        = {The DIGARV Platform: A collaborative platform for working with cultural heritage data and research data},
	abstract     = {This article covers an easy-to-use research tool for collaborative work. The tool has been adapted for structured data and high-resolution images within four research projects at GRIDH. The platform is especially designed for working with temporal and spatial data. Furthermore, the platform gives researchers access to a relational database system through input forms and access to external cultural heritage data including high-resolution images. This way the platform also aims to utilize external data published as Linked Open Data (LOD) and, at the same time, prepare its own research data for publishing as LOD. Because of the spatial and temporal nature of the data, it is visualized in time and space through maps and timelines to give overview and context during the data management phase.},
	booktitle    = {Proceedings of the Huminfra Conference, 10-11 January, 2024, Gothenburg, Sweden},
	editor       = {Elena Volodina and Gerlof Bouma and Markus Forsberg and Dimitrios Kokkinakis and David Alfter and Mats Fridlund and Christian Horn and Lars Ahrenberg and Anna Blåder},
	author       = {Åhlfeldt, Johan  and Matsson, Arild},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping },
	ISBN         = {978-91-8075-512-2},
}
Sidansvarig: sb-webb