@incollection{blensenius-holmer-2022-avokado-321648, title = {avokado-r/-er/-s/-sar}, abstract = {The article discusses lexicographic perspectives of the Swedish plural with the suffix -s. Traditionally, plural nouns ending in -s, for example avokados ‘avocados’, are considered colloquial speech; the formal way of writing the plural in question is avokador or avokadoer. However, since the Swedish Academy grammar included a noun declension indicating plurals with the suffix -s, plural with -s seems to have become more accepted, at least among language planners.}, booktitle = {Live and Learn. Festschrift in honor of Lars Borin (red. Elena Volodina, Dana Dannélls, Aleksandrs Berdicevskis, Markus Forsberg & Shafqat Virk)}, author = {Blensenius, Kristian and Holmer, Louise}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, pages = {13--16}, } @edited_book{blensenius-2022-valency-318684, title = {Valency and constructions. Perspectives on combining words}, abstract = {This volume contains papers on the theme valency and constructions, including papers from an international workshop on the same topic held at the University of Gothenburg. The aim is to cover many aspects of the broad topic of valency and constructions. Different languages are represented, for example Japanese, Brazilian Portuguese, and Swedish. Different perspectives on the topic can be identified: lexicographic, constructionist, event-structure, and frame semantics, to name but a few.}, editor = {Blensenius, Kristian}, year = {2022}, publisher = {Meijerbergs institut för svensk etymologisk forskning, Göteborgs universitet}, address = {Göteborg}, ISBN = {9789198679120}, } @incollection{blensenius-holmer-2022-verbal-318518, title = {How do verbal constructional alternations reflect (sub-)sense distinctions in dictionaries? A case study of a Swedish monolingual dictionary}, booktitle = {Blensenius, Kristian (ed.) Valency and constructions. Perspectives on combining words. Meijerbergs arkiv för svensk ordforskning 46}, author = {Blensenius, Kristian and Holmer, Louise}, year = {2022}, address = {Göteborg}, ISBN = {978-91-986791-2-0}, pages = {9--30}, } @incollection{saxena-etal-2022-clues-317929, title = {Clues to Kanashi prehistory 2: Loanword adaptation in verbs}, booktitle = {Synchronic and Diachronic Aspects of Kanashi}, editor = {Anju Saxena and Lars Borin}, author = {Saxena, Anju and Borin, Lars and Comrie, Bernard}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {9783110703153}, pages = {215--233}, } @article{borin-2022-that-322872, title = {All that glitters . . . : Interannotator agreement in natural language processing}, abstract = {Evaluation has emerged as a central concern in natural language processing (NLP) over the last few decades. Evaluation is done against a gold standard, a manually linguistically annotated dataset, which is assumed to provide the ground truth against which the accuracy of the NLP system can be assessed automatically. In this article, some methodological questions in connection with the creation of gold standard datasets are discussed, in particular (non-)expectations of linguistic expertise in annotators and the interannotator agreement measure standardly but unreflectedly used as a kind of quality index of NLP gold standards.}, journal = {Nordlyd}, author = {Borin, Lars}, year = {2022}, volume = {46}, number = {1}, pages = {19--26}, } @misc{tahmasebi-etal-2022-proceedings-316661, title = {Proceedings of the 3rd Workshop on Computational Approaches to Historical Language Change, May 26-27, 2022, Dublin, Ireland}, author = {Tahmasebi, Nina and Montariol, Syrielle and Kutuzov, Andrey and Hengchen, Simon and Dubossarsky, Haim and Borin, Lars}, year = {2022}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {978-1-955917-42-1}, } @incollection{skoldberg-2022-phraseological-318544, title = {Phraseological theory, evidence in corpora and lexicographical practice. On collocations in a monolingual dictionary of Swedish}, abstract = {In this paper, I examine the treatment of collocations in a comprehensive monolingual dictionary of Swedish, namely Svensk ordbok utgiven av Svenska Akademien (‘Contemporary Dictionary of the Swedish Academy’). Based on research on phraseology, studies of the mastery of Swedish collocations among L2 students, and metalexicographic approaches to collocations, I discuss the identification, selection, lemmatisation, and microstructural presentation of collocations in the dictionary. I also examine and assess corpora findings and the advanced tools provided by the Språkbanken Text research unit. These corpora and advanced tools play an essential role in lexicological and phraseological research of Swedish and in the work done on the Swedish Academy’s dictionaries.}, booktitle = {Valency and constructions. Perspectives on combining words. Ed. by Kristian Blensenius. (Meijerbergs arkiv för svensk ordforskning 46.)}, author = {Sköldberg, Emma}, year = {2022}, publisher = {Meijerbergs institut för svensk etymologisk forskning, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-986791-2-0}, pages = {155--182}, } @incollection{dannells-etal-2022-beyond-321730, title = {Beyond strings of characters: Resources meet NLP – Again}, abstract = {FrameNet (FN) resources have existed for many languages for over a decade but their adoption in real world applications has been limited. To celebrate the 65 anniversary of Lars Borin, the initiator and leader of Swedish FrameNet, among others, we take a standpoint to motivate why language resources are crucial for moving NLP forward. We present our position on (a) the need for language resources to embrace other dimensions of text and language use, and (b) the need for them to relate to other representations through multimodality.}, booktitle = {Live and learn: Festschrift in honor of Lars Borin / Editors: Elena Volodina, Dana Dannélls, Aleksandrs Berdicevskis, Markus Forsberg, Shafqat Virk}, author = {Dannélls, Dana and Torrent, Tiago Timponi and Sigiliano, Natalia Sathler and Dobnik, Simon}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, pages = {29--37}, } @inProceedings{casademontmoner-volodina-2022-swedish-321955, title = {Swedish MuClaGED: A new dataset for Grammatical Error Detection in Swedish}, abstract = {This paper introduces the Swedish MuClaGED dataset, a new dataset specifically built for the task of Multi-Class Grammatical Error Detection (GED). The dataset has been produced as a part of the multilingual Computational SLA shared task initiative. In this paper we elaborate on the generation process and the design choices made to obtain Swedish MuClaGED. We also show initial baseline results for the performance on the dataset in a task of Grammatical Error Detection and Classification on the sentence level, which have been obtained through (Bi)LSTM ((Bidirectional) Long-Short Term Memory) methods.}, booktitle = {Proceedings of the 11th Workshop on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL 2022) }, author = {Casademont Moner, Judit and Volodina, Elena}, year = {2022}, publisher = {Linköping University Electronic Press}, address = {Linköping, Sweden}, ISBN = {978-91-7929-459-5}, } @incollection{zechner-2022-other-322912, title = {The other SAT-Solver: Applying lexicons to SweSAT word questions}, booktitle = {Live and Learn – Festschrift in honor of Lars Borin / editors: Elena Volodina, Dana Dannélls, Aleksandrs Berdicevskis, Markus Forsberg, Shafqat Virk}, author = {Zechner, Niklas}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, pages = {167--169}, } @article{volodina-etal-2022-crowdsourcing-336551, title = {Crowdsourcing ratings for single lexical items: a core vocabulary perspective}, abstract = {In this study, we investigate theoretical and practical issues connected to differentiating between core and peripheral vocabulary at different levels of linguistic proficiency using statistical approaches combined with crowdsourcing. We also investigate whether crowdsourcing second language learners’ rankings can be used for assigning levels to unseen vocabulary. The study is performed on Swedish single-word items. The four hypotheses we examine are: (1) there is core vocabulary for each proficiency level, but this is only true until CEFR level B2 (upper-intermediate); (2) core vocabulary shows more systematicity in its behavior and usage, whereas peripheral items have more idiosyncratic behavior; (3) given that we have truly core items (aka anchor items) for each level, we can place any new unseen item in relation to the identified core items by using a series of comparative judgment tasks, this way assigning a “target” level for a previously unseen item; and (4) non-experts will perform on par with experts in a comparative judgment setting. The hypotheses have been largely confirmed: In relation to (1) and (2), our results show that there seems to be some systematicity in core vocabulary for early to mid-levels (A1-B1) while we find less systematicity for higher levels (B2-C1). In relation to (3), we suggest crowdsourcing word rankings using comparative judgment with known anchor words as a method to assign a “target” level to unseen words. With regard to (4), we confirm the previous findings that non-experts, in our case language learners, can be effectively used for the linguistic annotation tasks in a comparative judgment setting.}, journal = {Slovenščina 2.0: Empirical, Applied and Interdisciplinary Research}, author = {Volodina, Elena and Alfter, David and Lindström Tiedemann, Therese}, year = {2022}, volume = {10}, number = {2}, pages = {5--61}, } @inProceedings{kokkinakis-etal-2022-necessity-321865, title = {The necessity of digital health communication in social media to boost COVID-19 vaccine acceptance. }, booktitle = {ICA Post Conference: Digital Health Communication: Issues and Perspectives. University of Burgundy Franche-Comté, Dijon, France.}, author = {Kokkinakis, Dimitrios and Hammarlin, Mia-Marie and Borin, Lars and Miegel, Fredrik}, year = {2022}, } @inProceedings{ingvarsson-etal-2022-order-323627, title = {The New Order of Criticism. Explorations of Book Reviews Between the Interpretative and Algorithmic}, booktitle = {The 6th Digital Humanities in the Nordic and Baltic Countries Conference (DHNB 2022), Uppsala, Sweden, March 15-18, 2022}, author = {Ingvarsson, Jonas and Brodén, Daniel and Samuelsson, Lina and Wåhlstrand Skärström, Victor and Zechner, Niklas}, year = {2022}, publisher = {CEUR Workshop Proceedings}, } @incollection{volodina-etal-2022-reliability-321988, title = {Reliability of Automatic Linguistic Annotation: Native vs Non-native Texts }, abstract = {We present the results of a manual evaluation of the performance of automatic linguistic annotation on three different datasets: (1) texts written by native speakers, (2) essays written by second language (L2) learners of Swedish in the original form and (3) the normalized versions of learner-written essays. The focus of the evaluation is on lemmatization, POS-tagging, word sense disambiguation, multi-word detection and dependency annotation. Two annotators manually went through the automatic annotation on a subset of the datasets and marked up all deviations based on their expert judgments and the guidelines provided. We report Inter-Annotator Agreement between the two annotators and accuracy for the linguistic annotation quality for the three datasets, by levels and linguistic features.}, booktitle = {Selected Papers from the CLARIN Annual Conference 2021, Virtual Event, 2021, 27–29 September}, editor = {Monica Monachini and Maria Eskevich}, author = {Volodina, Elena and Alfter, David and Lindström Tiedemann, Therese and Lauriala, Maisa and Piipponen, Daniala}, year = {2022}, publisher = {Linköping Electronic Conference }, address = {Linköping, Sweden}, ISBN = { 978-91-7929-444-1}, pages = {151--167}, } @inProceedings{ingvarsson-etal-2022-order-324051, title = {The New Order of Criticism. Explorations of Book Reviews Between the Interpretative and Algorithmic}, abstract = {The New Order of Criticism (2020–2024) is a mixed-methods project combining algorithmic and interpretative approaches to the study of literary criticism. The project expands on a prior study of Swedish book reviews from the years 1906, 1956 and 2006 (‘The Order of Criticism’, Samuelsson 2013), re-examining and re-evaluating the original results through the uses of computational tools, language technology and big data. The aim of the present paper is to discuss early experiences and results from the interdisciplinary approach utilized by the current project, a collaborative process where interpreter and programmer are in dialogue, and where methodologies, and their instantiation in tools, are reflexively discussed from an epistemological point of view. In our analysis we ask: How can insights from working with digital methodologies and tools inform traditional scholarship on literary criticism? How can interpretative approaches and results inform digital methods?}, booktitle = {Digital Humanities in Action: The Sixth Digital Humanities in the Nordic and Baltic Countries conference}, author = {Ingvarsson, Jonas and Brodén, Daniel and Samuelsson, L and Wåhlstrand Skärström, Victor and Zechner, Niklas}, year = {2022}, publisher = {CEUR-WS}, address = {Aachen}, } @incollection{pettersson-borin-2022-swedish-323276, title = {Swedish Diachronic Corpus}, abstract = {The recently compiled Swedish Diachronic Corpus offers access to a total of approximately 16 billion words, covering texts from the 13th century onwards. The corpus contains 14 main genres, with a number of subgenres, compiled from a wide range of sources, including corpus providers and libraries as well as individual researchers and private citizens. All texts in the corpus follow a consistent format, are extensively annotated with metadata, and freely available for download. We firmly believe that the existence of a Swedish diachronic corpus among the resources offered by CLARIN will open up avenues to new, interesting research questions within humanities research, and be a valuable resource for large-scale studies of the Swedish language throughout history – studies that have previously been impossible to conduct in a thorough and consistent manner. Thanks to its embedding in the CLARIN context it also carries the potential to enable broad historical studies from a comparative European perspective.}, booktitle = {CLARIN: The infrastructure for language resources}, editor = {Darja Fišer and Andreas Witt}, author = {Pettersson, Eva and Borin, Lars}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {978-3-11-076734-6}, pages = {561–585}, } @misc{alfter-etal-2022-proceedings-321964, title = {Proceedings of the 11th Workshop on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL 2022) }, abstract = {The volume contains articles reviewed and presented at NLP4CALL workshop. The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical an methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures.}, author = {Alfter, David and Volodina, Elena and François, Thomas and Desmet, Piet and Cornillie, Frederik and Jönsson, Arne and Rennes, Evelina}, year = {2022}, publisher = {Linköping Electronic Conference Proceedings }, address = {Linköping, Sweden}, ISBN = {978-91-7929-460-1}, } @inProceedings{ljunglof-smallbone-2022-efficient-325303, title = {Efficient corpus search using unary and binary indexes}, abstract = {We investigate how disk-based inverted indexes can be used for efficient searching in large annotated corpora. We give a formal semantics for simple corpus queries, and show how they can be translated into lookups in unary and binary indexes.}, booktitle = {Swedish Language Technology Conference}, author = {Ljunglöf, Peter and Smallbone, Nicholas}, year = {2022}, } @article{cousse-bouma-2022-semantic-311027, title = {Semantic scope restrictions in complex verb constructions in Dutch}, abstract = {This article addresses the question of how and why verbs combine in complex verb constructions in Dutch. We discuss introspective data reported in reference grammars and add evidence from corpus data to uncover the systematic ways in which Dutch verbs combine. Our analysis shows that verbs expressing meanings such as tense, aspect, modality and evidentiality are organized in a semantic scope hierarchy; that is, some verb meanings systematically have scope over others but not the other way round. We argue that this scope hierarchy reflects hierarchies of functional categories, elaborated in both functional and generative frameworks.}, journal = {Linguistics}, author = {Coussé, Evie and Bouma, Gerlof}, year = {2022}, volume = {60}, number = {1}, pages = {123--176 }, } @incollection{saxena-borin-2022-then-317927, title = {And then there was one: Kanashi numerals from borrowed superdiversity to borrowed uniformity}, booktitle = {Synchronic and Diachronic Aspects of Kanashi}, editor = {Anju Saxena and Lars Borin}, author = {Saxena, Anju and Borin, Lars}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {9783110703153}, pages = {145--170}, } @inProceedings{lindstromtiedemann-etal-2022-cefr-321899, title = {CEFR-nivåer och svenska flerordsuttryck}, abstract = {När vi lär oss ett nytt språk ska vi inte bara lära oss enstaka ord och hur vi använder dessa, utan vi måste också lära oss vilka ordkombinationer som är ”fasta uttryck” till betydelsen (t.ex. hälsa på någon) eller till formen (t.ex. lättare sagt än gjort) eller båda delarna (t.ex. huller om buller). Enligt en del studier kan dessa uttryck utgöra så mycket som 50 % av vokabulären i ett språk som förstaspråk (L1) eller ännu mer (Jackendoff 1997; Erman 2007, 28). Men det är möjligt att de är vanligare i vardagligt språk och talspråk (Prentice & Sköldberg 2013). Flerordsenheter kan vara problematiska för andraspråkstalare (Nesselhauf 2003, 223) till och med på avancerad nivå (jfr Pawley & Syder 1983; Wray & Perkins 2000; Nesselhauf 2003; Prentice 2010). Samtidigt är de en helt nödvändig del av språket (Nesselhauf 2003, 223) och kan utmärka andraspråkstalarna som icke-modersmålstalare (Pawley & Syder 1983; Wray 2002). Flerordsuttryck är alltså en värdefull del av andraspråkskompetensen (se även Paquot 2019) och något som är viktigt att studera hur vi på bästa sätt introducerar för L2-talaren och om de kan kopplas till nivåer i bedömning. I den här studien presenterar vi resultat kring förståelsen av flerordsuttryck i svenska som andraspråk i relation till färdighetsnivåerna enligt Gemensam Europeisk Referensram för Språk (GERS eller CEFR, Common European Framework of Reference) (COE 2001; 2018; Skolverket 2009; Utbildningsstyrelsen 2018) genom crowdsourcing experiment.}, booktitle = {Svenskan i Finland 19 : föredrag vid den nittonde sammankomsten för beskrivningen av svenskan i Finland, Vasa den 6-7 maj 2021 / redigerade av Siv Björklund, Bodil Haagensen, Marianne Nordman och Anders Westerlund}, author = {Lindström Tiedemann, Therese and Alfter, David and Volodina, Elena}, year = {2022}, publisher = {Svensk-Österbottniska Samfundet}, address = {Vasa}, ISBN = {978-952-69650-5-5}, } @book{holmer-2022-neutrala-330060, title = {Neutrala substantiv på -ande i text och ordbok}, abstract = {Denna utgåva bygger på en doktorsavhandling med samma titel från september 2022. }, author = {Holmer, Louise}, year = {2022}, publisher = {Meijerbergs institut för svensk etymologisk forskning}, address = {Göteborg}, ISBN = {978-91-986791-3-7}, } @incollection{petersson-etal-2022-vagar-321666, title = {Vägar in i en profession – ämneslärarprogrammet i svenska }, booktitle = {Använd rummet Högskolepedagogiska metoder för aktiva lärsalar / Veronica Alfredsson, Noomi Asker, Christel Backman, Sara Uhnoo (red.).}, author = {Petersson, Stellan and Andréasson, Maia and Malmberg, Anja}, year = {2022}, publisher = {Studentlitterstur}, address = {Lund}, ISBN = {9789144157795}, pages = {303--310}, } @edited_book{saxena-borin-2022-synchronic-317920, title = {Synchronic and diachronic aspects of Kanashi}, editor = {Saxena, Anju and Borin, Lars}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {9783110703153}, } @incollection{volodina-alfter-2022-icall-321984, title = {ICALL: Research versus reality check.}, abstract = {Intelligent Computer-Assisted Language Learning has been one of Lars Borin’s research interests. The work on the Lärka language learning platform has started under his coordination. We see it our mission to make the platform live and prosperous, and through it to stimulate research into Swedish as a second language. Below, we name some weaknesses we have identified in Lärka while working with a course of beginner Swedish and outline our plans for tackling those.}, booktitle = {Live and Learn- Festschrift in honor of Lars Borin}, author = {Volodina, Elena and Alfter, David}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, pages = {145--152}, } @incollection{saxena-etal-2022-linguistic-317924, title = {Linguistic variation: A challenge for describing the phonology of Kanashi}, booktitle = {Synchronic and Diachronic Aspects of Kanashi}, editor = {Anju Saxena and Lars Borin}, author = {Saxena, Anju and Sjöberg, Anna and Sagar, Padam and Borin, Lars}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {9783110703153}, pages = {131--144}, } @article{landqvist-etal-2022-organisationer-321827, title = {Organisationer, frågor och (an)svar – Institutionellt terminologiskt samarbete mellan Sverige och Finland 1975–1998}, abstract = {This article describes and analyses the contacts between the Swedish and Finnish national terminology organisations between the 1970s and the 1990s. The Swedish Tekniska nomenklaturcentralen TNC (1941–2018; 2000–2018 Terminologicentrum TNC) was Sweden’s national center for special languages and terminology work for more than 75 years. Since its founding in 1941, the TNC was active not only in Sweden, but also in establishing and maintaining international contacts. The Finnish Centre for Technical Terminology TSK (after 2004 Finnish Terminology Centre TSK) was founded in 1974. The research material used is the collection of correspondence in the TNC’s document archive, in particular the section stored with the code “Ufin”, i.e., letters (N=98) documenting written communication between the TNC and the TSK. The article describes the topics covered in the communication, the individuals involved, the objectives and consequences of the communication and the results achieved. In a quantitative analysis, the themes of the letters are categorized in four main categories: publications, communication, information, and language issues. In a qualitative analysis, a specific terminological issue in the field of wood technology is analysed by close reading and content analysis. Finally, further studies, which complete the picture of terminological co- operation on institutional level in the Nordic countries, are proposed.}, journal = {Responsible Communication. VAKKI Publications. Eds H. Katajamäki, M. Enell-Nilsson, H. Kauppinen-Räisänen & H. Limatius }, author = {Landqvist, Hans and Nissilä, Niina and Pilke, Nina and Sjöberg, Sannina}, year = {2022}, volume = {14}, pages = {89–104}, } @book{holmer-2022-neutrala-318517, title = {Neutrala substantiv på -ande i text och ordbok}, author = {Holmer, Louise}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi}, address = {Göteborg }, } @inProceedings{casademontmoner-volodina-2022-generation-321987, title = {Generation of Synthetic Error Data of Verb Order Errors for Swedish}, abstract = {We report on our work-in-progress to generate a synthetic error dataset for Swedish by replicating errors observed in the authentic error annotated dataset. We analyze a small subset of authentic errors, capture regular patterns based on parts of speech, and design a set of rules to corrupt new data. We explore the approach and identify its capabilities, advantages and limitations as a way to enrich the existing collection of error-annotated data. This work focuses on word order errors, specifically those involving the placement of finite verbs in a sentence.}, booktitle = {NAACL workshop on Innovative Use of NLP for Building Educational Applications, July 15, 2022, Seattle, Washington}, author = {Casademont Moner, Judit and Volodina, Elena}, year = {2022}, publisher = {Association for Computational Linguistics}, address = {Seattle, Washington}, ISBN = {978-1-955917-83-4}, } @incollection{saxena-etal-2022-linguistic-317923, title = {A linguistic sketch of Kanashi}, booktitle = {Synchronic and Diachronic Aspects of Kanashi}, editor = {Anju Saxena and Lars Borin}, author = {Saxena, Anju and Borin, Lars and Comrie, Bernard and Sagar, Padam}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {9783110703153}, pages = {53--127}, } @incollection{saxena-borin-2022-introduction-317921, title = {Introduction: Kanashi, its speakers, its linguistic and extralinguistic context}, booktitle = {Synchronic and diachronic aspects of Kanashi}, editor = {Anju Saxena and Lars Borin}, author = {Saxena, Anju and Borin, Lars}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {9783110703153}, pages = {3--11}, } @incollection{saxena-etal-2022-clues-317928, title = {Clues to Kanashi prehistory 1: Loanword adaptation in nouns and adjectives}, booktitle = {Synchronic and Diachronic Aspects of Kanashi}, editor = {Anju Saxena and Lars Borin}, author = {Saxena, Anju and Borin, Lars and Comrie, Bernard}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {9783110703153}, pages = {173--213}, } @incollection{forsberg-skoldberg-2022-ordvektorer-320472, title = {Ordvektorer i lexikografiskt arbete}, abstract = {We present a preliminary case study on the use of word vectors in lexicographic practice. The study shows the potential of using vector models in the revision of existing dictionary entries as well as creating new entries.}, booktitle = {Live and learn. Festschrift in honor of Lars Borin (eds. Elena Volodina, Dana Dannélls, Aleksandrs Berdicevskis, Markus Forsberg & Shafqat Virk)}, author = {Forsberg, Markus and Sköldberg, Emma}, year = {2022}, publisher = {Department of Swedish, Multilingualism, Language Technology}, address = {Gothenburg}, ISBN = {978-91-87850-82-0}, pages = {37--41}, } @article{landqvist-2022-telefoner-316926, title = {Telefoner, tidpunkter och termometrar – i verkligheten och i språket}, journal = {GU Journalen}, author = {Landqvist, Hans}, year = {2022}, volume = {2022}, number = {3}, pages = {54--55}, } @incollection{blensenius-anderssonlilja-2022-search-314515, title = {In search of subjective meaning in Swedish pseudocoordination}, abstract = {This study provides a discussion of the development of subjective meaning associated with the motion-verb pseudocoordination gå och V 'go/walk and V' and the posture-verb pseudocoordination sitta och V 'sit and V', using historical and present-day linguistic data. It is claimed that an interpretation in terms of item-based analogy and entrenchment of frequent meaning clusters is the most plausible analysis for the development of subjective (and pejorative) meaning associated with gå och V. The study of sitta och V is preliminary, but the results indicate that the subjective meaning of this construction is less entrenched than that of the gå och V construction and that the subjective overtone of subjectivity may be a result of the combination of the social/cultural meaning of the posture and certain intrinsically pejorative verbs, together with certain locatives.}, booktitle = {Pseudo-Coordination and Multiple Agreement Constructions}, editor = {Giuliana Giusti and Vincenzo Nicolò Di Caro and Daniel Ross}, author = {Blensenius, Kristian and Andersson Lilja, Peter}, year = {2022}, publisher = {John Benjamins}, address = {Amsterdam, Philadelphia}, ISBN = {9789027210883}, pages = {213–229}, } @incollection{lindahl-r?dveneide-2022-argumentative-325260, title = {Argumentative Language Resources at Språkbanken Text}, abstract = {Språkbanken Text at the University of Gothenburg is a CLARIN B-centre providing language resources in Swedish, as well as tools to use them, for a wide range of disciplines. In 2017, we began exploring the field of argument mining – the process of automatically identifying and classifying arguments in text – partly aimed at establishing language resources and tools for argument analysis and mining in Swedish.}, booktitle = {CLARIN: The Infrastructure for Language Resources, eds. Darja Fišer & Andreas Witt}, author = {Lindahl, Anna and Rødven-Eide, Stian}, year = {2022}, publisher = {De Gruyter}, address = {Berlin, Boston}, ISBN = { 9783110767346 }, pages = {667--690}, } @incollection{saxena-etal-2022-kanashi-317930, title = {Kanashi and West Himalayish: Genealogy, language contact, prehistoric migrations}, booktitle = {Synchronic and Diachronic Aspects of Kanashi}, editor = {Anju Saxena and Lars Borin}, author = {Saxena, Anju and Borin, Lars and Comrie, Bernard}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin}, ISBN = {9783110703153}, pages = {237--254}, } @incollection{bouma-adesam-2022-counting-321810, title = {Counting dirty words: The effect of OCR quality on token statistics in historical Swedish corpora}, booktitle = {Live and learn: Festschrift in honor of Lars Borin / Editors: Elena Volodina, Dana Dannélls, Aleksandrs Berdicevskis, Markus Forsberg, Shafqat Virk}, author = {Bouma, Gerlof and Adesam, Yvonne}, year = {2022}, publisher = {University of Gothenburg}, address = {Gothenburg}, ISBN = {978-91-87850-83-7}, pages = {17--24}, } @inProceedings{lindahl-2022-machines-322689, title = {Do machines dream of artificial agreement?}, abstract = {In this paper the (assumed) inconsistency between F1-scores and annotator agreement measures is discussed. This is exemplified in five corpora from the field of argumentation mining. High agreement is important in most annotation tasks and also often deemed important for an annotated dataset to be useful for machine learning. However, depending on the annotation task, achieving high agreement is not always easy. This is especially true in the field of argumentation mining, because argumentation can be complex as well as implicit. There are also many different models of argumentation, which can be seen in the increasing number of argumentation annotated corpora. Many of these reach moderate agreement but are still used in machine learning tasks, reaching high F1-score. In this paper we describe five corpora, in particular how they have been created and used, to see how they have handled disagreement. We find that agreement can be raised post-production, but that more discussion regarding evaluating and calculating agreement is needed. We conclude that standardisation of the models and the evaluation methods could help such discussions.}, booktitle = {Proceedings of the 18th Joint ACL - ISO Workshop on Interoperable Semantic Annotation within LREC2022, June 20, 2022, Marseille, France / Harry Bunt (Editor)}, author = {Lindahl, Anna}, year = {2022}, publisher = {European Language Resources Association}, address = {Marseille}, ISBN = {979-10-95546-81-8}, } @inProceedings{landqvist-2022-finlandssvenska-317134, title = {Finlandssvenska översättare i Svenskt översättarlexikon utifrån översättningssociologiska utgångspunkter}, booktitle = {Svenskan i Finland 19. Föredrag vid den nittonde sammankomsten för beskrivningen av svenskan i Finland, Vasa 6–7 maj 2021. Red. Siv Björklund, Bodil Haagensen, Marianne Nordman och Anders Westerlund}, author = {Landqvist, Hans}, year = {2022}, publisher = {Åbo Akademi/Svensk-Österbottniska Samfundet}, address = {Vasa}, ISBN = {978-952-69650-4-8}, } @article{skoldberg-2022-andra-320475, title = {Andra upplagan av Svensk ordbok: förutsättningar och redaktionella val}, abstract = {In the article, the editor-in-chief of the second edition of Svensk ordbok utgiven av Svenska Akademien (SO2, 2021) gives an overall picture of, e.g., the technical conditions, financial framework and agreements with the financier which have guided the work with the edition. Furthermore, examples are provided of some of the lexicographical work initiatives that have taken place prior to the second edition and the motives behind these, as well as the priorities that have been necessary. }, journal = {LexicoNordica}, author = {Sköldberg, Emma}, year = {2022}, volume = {29}, pages = {139--152}, } @incollection{alimohammed-etal-2022-annotation-321989, title = {Annotation Management Tool: A Requirement for Corpus Construction}, abstract = {We present an annotation management tool, SweLL portal, that has been developed for the purposes of the SweLL infrastructure project for building a learner corpus of Swedish (Volodina et al., 2019). The SweLL portal has been used for supervised access to the database, data versioning, import and export of data and metadata, statistical overview, administration of annotation tasks, monitoring of annotation tasks and reliability controls. The development of the portal was driven by visions of longitudinal sustainable data storage and was partially shaped by situational needs reported by portal users, including project managers, researchers, and annotators.}, booktitle = {Selected Papers from the CLARIN Annual Conference 2021, Virtual Event, 2021, 27–29 September / Monica Monachini and Maria Eskevich (eds.)}, author = {Ali Mohammed, Yousuf and Matsson, Arild and Volodina, Elena}, year = {2022}, publisher = {Linköping Electronic Conference }, address = {Linköping, Sweden}, ISBN = {978-91-7929-444-1}, pages = {101--108}, } @article{berdicevskis-semenuks-2022-imperfect-313148, title = {Imperfect language learning reduces morphological overspecification: Experimental evidence}, journal = {PLoS ONE}, author = {Berdicevskis, Aleksandrs and Semenuks, Arturs}, year = {2022}, volume = {17}, number = {1}, pages = {1--26}, } @inProceedings{fridlund-etal-2022-codifying-315876, title = {Codifying the Debates of the Riksdag: Towards a Framework for Semi-automatic Annotation of Swedish Parliamentary Discourse}, abstract = {This study provides an exploratory attempt to develop a framework for how to semi-automatically annotate salient topics in Swedish parliamentary debate. The discussion is grounded in the ongoing digital humanities project SweTerror that studies the terrorism discourse in the Riksdag 1968–2018 through a mixed-methods approach. The paper presents our tentative framework through its three main categories: metadata, language data and frame data. While the first two categories are mostly generic and their data could mainly be automatically extracted, the third category is contextual and requires manual interpretation. We discuss the design of the latter through the theoretical concept of ‘framing’ and illustrate the framework’s overall principles through a case study of utterances in the debates 1968–1970 concerning terrorism. We conclude by suggesting that it may be more generally applicable for studies of parliamentary debates in HSS research if further modified for the particular research purposes. }, booktitle = {CEUR Workshop Proceedings. Matti La Mela, Fredrik Norén & Eero Hyvönen, eds., Proceedings of Digital Parliamentary Data in Action (DiPaDa 2022). Workshop Co-located with the 6th Digital Humanities in the Nordic and Baltic Countries Conference (DHNB 2022), Uppsala, Sweden, March 15, 2022.}, author = {Fridlund, Mats and Brodén, Daniel and Olsson, Leif-Jöran and Ängsal, Magnus Pettersson}, year = {2022}, publisher = {CEUR-WS}, address = {Aachen}, } @inProceedings{angsal-etal-2022-linguistic-318676, title = {Linguistic Framing of Political Terror: Distant and Close Readings of the Discourse on Terrorism in the Swedish Parliament 1993–2018}, abstract = {This paper provides a study of the discourse on terrorism in Swedish parliamentary debate 1993– 2018. The aim is to explore how terrorism is discursively constructed in parliamentary delibera- tions, drawing on the resources of Swe-Clarin in the form of the corpus tool Korp and the linguis- tic concept of ‘frame’. To map meanings attached to terrorism we pursue two research questions: what framing elements are connected to ‘terrorism’ and ‘terrorist’ in parliamentary speeches as 1) simplexes and 2) as part of compounds along the lines of controversies and party affiliations? The latter research question is probed through distant and close readings of the specific compound statsterrorism (‘state terrorism’). Our findings show that terrorism is typically framed as located outside of Sweden and as tied to Islamism, but the question of what countries are associated with state terrorism depends on the political affiliation of the interlocutor. The compound statsterror- ism is most prominently used by the left and green parties and then commonly associated with Israel and Turkey. We conclude by suggesting that a widened inquiry into compounds, in general as well as diachronically, is likely a productive way of expanding the scope of our research.}, booktitle = {CLARIN Annual Conference Proceedings, 10–12 October 2022, Prague, Czechia. Eds. Tomaž Erjavec & Maria Eskevich}, author = {Ängsal, Magnus Pettersson and Brodén, Daniel and Fridlund, Mats and Olsson, Leif-Jöran and Öhberg, Patrik}, year = {2022}, address = {Prag}, } @inProceedings{edlund-etal-2022-multimodal-311480, title = {A Multimodal Digital Humanities Study of Terrorism in Swedish Politics: An Interdisciplinary Mixed Methods Project on the Configuration of Terrorism in Parliamentary Debates, Legislation, and Policy Networks 1968–2018}, abstract = {This paper presents the design of one of Sweden’s largest digital humanities projects, SweTerror, that through an interdisciplinary multi-modal methodological approach develops an extensive speech-to-text digital HSS resource. SweTerror makes a major contribution to the study of terrorism in Sweden through a comprehensive mixed methods study of the political discourse on terrorism since the late 1960s. Drawing on artificial intelligence in the form of state-of-the-art language and speech technology, it systematically analyses all forms of relevant parliamentary utterances. It explores and curates an exhaustive but understudied multi-modal collection of primary sources of central relevance to Swedish democracy: the audio recordings of the Swedish Parliament’s debates. The project studies the framing of terrorism both as policy discourse and enacted politics, examining semantic and emotive components of the parliamentary discourse on terrorism as well as major actors and social networks involved. It covers political responses to a range of terrorism-related issues as well as factors influencing policy-makers’ engagement, including political affiliations and gender. SweTerror also develops an online research portal, featuring the complete research material and searchable audio made readily accessible for further exploration. Long-term, the project establishes a model for combining extraction technologies (speech recognition and analysis) for audiovisual parliamentary data with text mining and HSS interpretive methods and the portal is designed to serve as a prototype for other similar projects.}, booktitle = { Intelligent Systems and Applications. Proceedings of the 2021 Intelligent Systems Conference, September 2–3, 2021 / Arai K. (eds) }, author = {Edlund, Jens and Brodén, Daniel and Fridlund, Mats and Lindhé, Cecilia and Olsson, Leif-Jöran and Ängsal, Magnus Pettersson and Öhberg, Patrik}, year = {2022}, publisher = {Springer}, address = {Cham}, ISBN = {978-3-030-82195-1}, } @inProceedings{munozsanchez-etal-2022-first-320225, title = {A First Attempt at Unreliable News Detection in Swedish}, abstract = {Throughout the COVID-19 pandemic, a parallel infodemic has also been going on such that the information has been spreading faster than the virus itself. During this time, every individual needs to access accurate news in order to take corresponding protective measures, regardless of their country of origin or the language they speak, as misinformation can cause significant loss to not only individuals but also society. In this paper we train several machine learning models (ranging from traditional machine learning to deep learning) to try to determine whether news articles come from either a reliable or an unreliable source, using just the body of the article. Moreover, we use a previously introduced corpus of news in Swedish related to the COVID-19 pandemic for the classification task. Given that our dataset is both unbalanced and small, we use subsampling and easy data augmentation (EDA) to try to solve these issues. In the end, we realize that, due to the small size of our dataset, using traditional machine learning along with data augmentation yields results that rival those of transformer models such as BERT.}, booktitle = {Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis, Marseille, 20-25 June, 2022 / Editors: Johanna Monti, Valerio Basile, Maria Pia Di Buono, Raffaele Manna, Antonio Pascucci, Sara Tonell}, author = {Muñoz Sánchez, Ricardo and Johansson, Eric and Tayefeh, Shakila and Kad, Shreyash}, year = {2022}, publisher = {European Language Resources Association (ELRA)}, address = {Paris}, ISBN = {979-10-95546-99-3}, } @inProceedings{heittola-etal-2022-finland-317133, title = {TNC och Finland. Korrespondens inom terminologiområdet 1940–1999 }, booktitle = {Svenskan i Finland 19. Föredrag vid den nittonde sammankomsten för beskrivningen av svenskan i Finland, Vasa 6–7 maj 2021. Red. Siv Björklund, Bodil Haagensen, Marianne Nordman och Anders Westerlund}, author = {Heittola, Sanna and Landqvist, Hans and Nissilä, Niina and Pilke, Nina}, year = {2022}, publisher = {Åbo Akademi/Svensk-Österbottniska Samfundet}, address = {Vasa}, ISBN = {978-952-69650-4-8}, } @edited_book{volodina-etal-2022-live-320415, title = {Live and Learn- Festschrift in honor of Lars Borin}, abstract = {This Festschrift has been compiled to honor Professor Lars Borin on his 65th anniversary. It consists of 30 articles which reflect a fraction of Lars’ scholarly interests within computational linguistics and related fields. They come from his friends and colleagues around the world and deal with topics that have been – in one way or another – inspired by his work. A common theme for the articles is the never-ending need to learn, which is alluded to in the title of the volume, Live and Learn.}, editor = {Volodina, Elena and Dannélls, Dana and Berdicevskis, Aleksandrs and Forsberg, Markus and Virk, Shafqat}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, } @inProceedings{kokkinakis-hammarlin-2022-negative-321864, title = {Negative vaccine voices in Swedish social media }, abstract = {Vaccinations are one of the most significant interventions to public health, but vaccine hesitancy creates concerns for a portion of the population in many countries, including Sweden. Since discussions on vaccine hesitancy are often taken on social networking sites, data from Swedish social media are used to study and quantify the sentiment among the discussants on the vaccination-or-not topic during phases of the COVID-19 pandemic. Out of all the posts analyzed a majority showed a stronger negative sentiment, prevailing throughout the whole of the examined period, with some spikes or jumps due to the occurrence of certain vaccine-related events distinguishable in the results. Sentiment analysis can be a valuable tool to track public opinions regarding the use, efficacy, safety, and importance of vaccination. }, booktitle = {Proceedings of the 13th International Conference of Experimental Linguistics}, author = {Kokkinakis, Dimitrios and Hammarlin, Mia-Marie}, year = {2022}, } @inProceedings{volodina-etal-2022-swedish-321985, title = {Swedish L2 profile - a tool for exploring L2 data.}, abstract = {Learner corpus researchers, NLP researchers, as well as Digital Humanities and Social Sciences in general, rely on access to various data sets for empirical analysis, statistical insights, and/or for model building. However, interpretation of data is a non-trivial task and there is a need for data visualization tools. One such attempt is the Swedish L2 profile (SweL2P) – an ongoing project setting up the first digital tool allowing users to explore written Swedish learner language from a linguistic point of view.}, booktitle = {Learner Corpus Research conference, 22-24 September, Padua, Italy}, author = {Volodina, Elena and Lindström Tiedemann, Therese and Ali Mohammed, Yousuf}, year = {2022}, address = {Universitá degli Studi di Padova, Padua, Italy}, } @inProceedings{klezl-etal-2022-exploring-321958, title = {Exploring Linguistic Acceptability in Swedish Learners’ Language }, abstract = {We present our initial experiments on binary classification of sentences into linguistically correct versus incorrect ones in Swedish using the DaLAJ dataset (Volodina et al., 2021a). The nature of the task is bordering on linguistic acceptability judgments, on the one hand, and on grammatical error detection task, on the other. The experiments include models trained with different input features and on different variations of the training, validation, and test splits. We also analyze the results focusing on different error types and errors made on different proficiency levels. Apart from insights into which features and approaches work well for this task, we present first benchmark results on this dataset. The implementation is based on a bidirectional LSTM network and pre-trained FastText embeddings, BERT embeddings, own word and character embeddings, as well as part-of-speech tags and dependency labels as input features. The best model used BERT embeddings and a training and validation set enriched with additional correct sentences. It reached an accuracy of 73% on one of three test sets used in the evaluation. These promising results illustrate that the dataand format of DaLAJ make a valuable new resource for research in acceptability judgements in Swedish.}, booktitle = {Proceedings of the 11th Workshop on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL 2022)}, author = {Klezl, Julia and Ali Mohammed, Yousuf and Volodina, Elena}, year = {2022}, publisher = {Linköping University Electronic Press}, address = {Linköping, Sweden}, ISBN = {978-91-7929-459-5 }, } @incollection{volodina-etal-2022-lyxig-321974, title = {Lyxig språklig födelsedagspresent from the Swedish Word Family.}, abstract = {Morphology and lexical resources are known to be two of Lars Borin’s biggest research passions. We have, therefore, prepared a short description of a new kind of a lexical resource for Swedish, the Swedish Word Family. The resource is compiled based on learner corpora, and contains lexical items manually analyzed for derivational morphology.}, booktitle = {Live and Learn- Festschrift in honor of Lars Borin}, author = {Volodina, Elena and Ali Mohammed, Yousuf and Lindström Tiedemann, Therese}, year = {2022}, publisher = {Department of Swedish, Multilingualism, Language Technology}, address = {Gothenburg, Sweden}, ISBN = {978-91-87850-83-7}, } @misc{kokkinakis-etal-2022-proceedings-317658, title = {Proceedings of LREC 2022 Workshop: Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric/developmental impairments. (RaPID-2012), Saturday 25th of June 2022. }, abstract = {RaPID-4 aims to be an interdisciplinary forum for researchers to share information, findings, methods, models and experience on the collection and processing of data produced by people with various forms of mental, cognitive, neuropsychiatric, or neurodegenerative impairments, such as aphasia, dementia, autism, bipolar disorder, Parkinson's disease or schizophrenia. Particularly, the workshop's focus is on creation, processing and application of data resources from individuals at various stages of these impairments and with varying degrees of severity. Creation of resources includes e.g. the annotation, description, analysis and interpretation of linguistic, paralinguistic and extra-linguistic aspects of such data (i.e. spontaneous spoken language, transcripts, eye tracking, wearable and sensor measurements, digital biomarkers, etc.). Processing of such data can be used to identify, extract, correlate, evaluate and disseminate various linguistic or multimodal phenotypes and measurements, which then can be applied to aid diagnosis, monitor the progression or predict individuals at risk. A central aim is to facilitate the study of the relationships among various levels of linguistic, paralinguistic and extra-linguistic observations (e.g., acoustic measures; phonological, syntactic and semantic features; eye tracking, sensors, signs and multimodal signals). Submission of papers are invited in all of the aforementioned areas, particularly emphasizing multidisciplinary aspects of processing such data and the interplay between clinical/nursing/medical sciences, language technology, computational linguistics, natural language processing (NLP) and computer science. The workshop will act as a stimulus for the discussion of several ongoing research questions driving current and future research by bringing together researchers from various research communities. }, author = {Kokkinakis, Dimitrios and Themistocleous, Charalambos K. and Lundholm Fors, Kristina and Tsanas, Athanasios and Fraser, Kathleen C.}, year = {2022}, publisher = {European Language Resources Association}, address = {Paris}, ISBN = {979-10-95546-77-1}, } @incollection{fridlund-etal-2022-trawling-319822, title = {Trawling and Trolling for Terrorists in the Digital Gulf of Bothnia: Cross-lingual Text Mining for the Emergence of Terrorism in Swedish and Finnish Newspapers, 1780–1926}, abstract = {In pursuing the historical emergence of the discourse on terrorism, this study trawls the “digital Gulf of Bothnia” in the form of a corpus of combined Swedish and Finnish digitized newspaper texts. Through a cross-lingual exploration of the uses of the concept of terrorism in historical Swedish and Finnish news, we examine meanings anchored in the two culturally close but still decidedly different national political contexts. The study is an outcome of an integrative interdisciplinary effort by Swe-Clarin, using resources accessible through the CLARIN infrastructure to enrich scholarship in the humanities. The capabilities of the corpus tool Korp enable us to affirm prior research on the conceptual history of terrorism, but also to suggest a complex and diverse picture of the connotations of terrorism, both as state and sub-state violence up until the 20th century. At the same time, the study allows us to explore the potentials of cross-lingual text mining for historical analysis of national online newspaper corpora provided by Swe-Clarin and FIN-CLARIN.}, booktitle = {CLARIN: The Infrastructure for Language Resources, eds. Darja Fišer & Andreas Witt}, author = {Fridlund, Mats and Brodén, Daniel and Jauhiainen, Tommi and Malkki, Leena and Olsson, Leif-Jöran and Borin, Lars}, year = {2022}, publisher = {De Gruyter Mouton}, address = {Berlin, Boston}, ISBN = {9783110767346}, pages = {781--802}, } @inProceedings{landqvist-2022-termer-316458, title = {Termer och begrepp – både nytta och nöje! Rapport från ett pågående forskningsprojekt}, abstract = {Termer, begrepp och begreppsdefinitioner är centrala resurser när experter inom fackområden kommunicerar med varandra, experter kommunicerar med icke-experter och icke-experter kommunicerar med andra icke-experter. Exempel på dessa tre situationer, hämtade från juridikens fackområde, är kommunikation mellan jurister i en tingsrätt, kommunikation mellan jurister och icke-juridiskt insatta i en tingsrätt och kommunikation mellan en privatperson som ingår ett köpeavtal med en annan privatperson. Mellan 1941 och 2018 fungerade Tekniska Nomenklaturcentralen TNC (2000–2018: Terminologicentrum TNC) som nationellt centrum för terminologiskt arbete i Sverige. TNC verkade både i den offentliga sektorn och den privata. Efter nedläggningen av TNC vid årsskiftet 2018/2019 har Institutet för språk och folkminnen – Isof börjat bygga upp en verksamhet inriktad på fackspråk och terminologi, med fokus på den offentliga sektorn. Isof har också i uppdrag att förvalta och utveckla termdatabasen Rikstermbanken, som initierades och utvecklades av TNC, samt förvalta TNC:s bibliotek. Vid sidan av Isof finns också andra aktörer som arbetar med terminologi i Sverige, bl.a. företag som fordonstillverkaren Scania, och den ideella föreningen Terminologifrämjandet. Mot denna bakgrund arbetar forskare vid Göteborgs universitet och Vasa universitet i Finland sedan 2016 inom projektet Termer i tid – Tidens termer. Terminologi som språklig infrastruktur då, nu och sedan. Den övergripande målsättningen för projektet är att detta ska kartlägga hur ett antal centrala institutionella aktörer agerar och interagerar i terminologiska frågor i Sverige från 1940-talet och framåt samt klarlägga vilka möjligheter och utmaningar som det finns för terminologiskt arbete som en språklig infrastruktur i dagens och framtidens Sverige. Under seminariet presenteras projektet, ett antal resultat från arbetet inom projektet och några tankar om fortsättningen av projektet. }, booktitle = {Högre seminariet för språk och svenska, Linnéuniversitetet, 24 maj 2022}, author = {Landqvist, Hans}, year = {2022}, } @inProceedings{morger-etal-2022-cross-325984, title = {A Cross-lingual Comparison of Human and Model Relative Word Importance}, abstract = {Relative word importance is a key metric for natural language processing. In this work, we compare human and model relative word importance to investigate if pretrained neural language models focus on the same words as humans cross-lingually. We perform an extensive study using several importance metrics (gradient-based saliency and attention-based) in monolingual and multilingual models, including eye-tracking corpora from four languages (German, Dutch, English, and Russian). We find that gradient-based saliency, first-layer attention, and attention flow correlate strongly with human eye-tracking data across all four languages. We further analyze the role of word length and word frequency in determining relative importance and find that it strongly correlates with length and frequency, however, the mechanisms behind these non-linear relations remain elusive. We obtain a cross-lingual approximation of the similarity between human and computational language processing and insights into the usability of several importance metrics.}, booktitle = {Proceedings of the 2022 CLASP Conference on (Dis)embodiment, Gothenburg and online 15–16 September 2022 / Simon Dobnik, Julian Grove and Asad Sayeed (eds.)}, author = {Morger, Felix and Brandl, Stephanie and Beinborn, Lisa and Hollenstein, Nora}, year = {2022}, publisher = {Association for Computational Linguistics}, address = {Gothenburg, Sweden}, ISBN = {978-1-955917-67-4}, } @techreport{hammarstedt-etal-2022-sparv-318399, title = {Sparv 5 Developer’s Guide}, abstract = {The Sparv Pipeline developed by Språkbanken Text is a text analysis tool run from the command line. This Developer’s Guide describes its general structure and key concepts and serves as an API documentation. Most importantly, it describes how to write plugins for Sparv 5 so that you can add your own functions to the toolkit.}, author = {Hammarstedt, Martin and Schumacher, Anne and Borin, Lars and Forsberg, Markus}, year = {2022}, } @techreport{hammarstedt-etal-2022-sparv-318405, title = {Sparv 5 User Manual}, abstract = {The Sparv Pipeline developed by Språkbanken Text is a text analysis tool run from the command line. This user manual describes how to get Sparv 5 up and running on your own machine, how to configure it and how to use it for annotating your own corpora.}, author = {Hammarstedt, Martin and Schumacher, Anne and Borin, Lars and Forsberg, Markus}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi}, address = {Göteborg}, } @incollection{berdicevskis-etal-2022-actually-320416, title = {We may actually all die tomorrow... nevertheless: Predicting short-term frequency changes in Swedish neologisms}, abstract = {Predicting the future is difficult, as Lars Borin likes to point out by saying the phrase which is included in the title of this paper. Nevertheless, we attempt to predict short-term changes in the frequency of new Swedish words based on some measures of their linguistic and social dissemination. We show that it is possible to predict the direction of change with a higher-than-baseline accuracy. Most interestingly, we show that predictions are much less accurate for those words that denote new phenomena than for those who are new signifiers for already existing phenomena.}, booktitle = {Live and learn: Festschrift in honor of Lars Borin / Editors: Elena Volodina, Dana Dannélls, Aleksandrs Berdicevskis, Markus Forsberg, Shafqat Virk}, author = {Berdicevskis, Aleksandrs and Adesam, Yvonne and Coussé, Evie}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, pages = {5--12}, }