@edited_book{alfter-etal-2023-proceedings-331649, title = {Proceedings of the 12th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2023) }, abstract = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures.}, editor = {Alfter, David and Volodina, Elena and François, Thomas and Jönsson, Arne and Rennes, Evelina}, year = {2023}, publisher = {Linköping University Press}, address = {Linköping}, ISBN = {978-91-8075-250-3}, } @article{cousse-etal-2023-anvands-332468, title = {Hur används de, dem och dom i nutida skriftspråk? En storskalig korpusundersökning av nyheter och sociala medier}, abstract = {This study ties in with a longstanding debate on the Swedish spelling variants de, dem and dom for personal pronouns (third person plural) and definite articles (plural). It charts the usage of de, dem and dom in five large corpora with news and social media texts over the past 25 years. The corpora contain more than 1.5 billion tokens, which rules out manual handling of the data. Instead, this study makes use of computational methods (including an AI language model) to automatically identify and classify relevant observations. Analysis of the news corpora shows a relatively stable usage of de, dem and dom over the past 25 years. The forms de and dem are predominantly used according to the norm: de for pronouns in subject position and as a definite article; dem for pronouns in object position. The colloquial form dom is hardly found in news texts. Analysis of the social media corpora shows more variation and change. The colloquial form dom is used in 5–25% of all instances instead of de or dem and has decreased after an initial rise. The forms de and dem are sometimes used in a non-standard way: de occurs in object position in 4–10% of the observations; dem is found in subject position or as a definite article in 1–7% of the cases. Non-standard dem is potentially on the rise with younger writers. The corpus analysis also provides details on the usage of de and dem in relative clauses, and on the users’ ratings of posts containing de, dem and dom on the social media platform Reddit}, journal = {Språk & Stil}, author = {Coussé, Evie and Adesam, Yvonne and Rekathati, Faton and Berdicevskis, Aleksandrs}, year = {2023}, volume = {NF 33}, pages = {39--70}, } @misc{cousse-etal-2023-inget-324690, title = {Inget stöd i forskningen för att de/dem slås ut}, author = {Coussé, Evie and Adesam, Yvonne and Berdicevskis, Aleksandrs}, year = {2023}, number = {2023-03-20}, } @inProceedings{volodina-etal-2023-grandma-328176, title = {Grandma Karl is 27 years old – research agenda for pseudonymization of research data}, abstract = {Accessibility of research data is critical for advances in many research fields, but textual data often cannot be shared due to the personal and sensitive information which it con- tains, e.g names or political opinions. General Data Protection Regulation (GDPR) suggests pseudonymization as a solution to secure open access to research data, but we need to learn more about pseudonymization as an approach before adopting it for manipulation of research data. This paper outlines a research agenda within pseudonymization, namely need of studies into the effects of pseudonymization on unstructured data in relation to e.g. readability and language assessment, as well as the effectiveness of pseudonymization as a way of protecting writer identity, while also exploring different ways of developing context-sensitive algorithms for detection, labelling and replacement of personal information in unstructured data. The recently granted project on pseudonymization ‘Grandma Karl is 27 years old’1 addresses exactly those challenges.}, booktitle = {2023 IEEE Ninth International Conference on Big Data Computing Service and Applications (BigDataService), Athens, Greece, 2023}, author = {Volodina, Elena and Dobnik, Simon and Lindström Tiedemann, Therese and Vu, Xuan-Son}, year = {2023}, publisher = {IEEE Computer Society}, address = {Los Alamitos}, ISBN = {979-8-3503-3379-4}, } @misc{tahmasebi-etal-2023-proceedings-331093, title = {Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change, LChange'23, December 6th, 2023, Singapore}, abstract = {Welcome to the 4th International Workshop on Computational Approaches to Historical Language Change (LChange’23) co-located with EMNLP 2023. LChange is held on December 6th, 2023, as a hybrid event with participation possible both virtually and on-site in Singapore. Characterizing the time-varying nature of language will have broad implications and applications in multiple fields including linguistics, artificial intelligence, digital humanities, computational cognitive and social sciences. In this workshop, we bring together the world’s pioneers and experts in computational approaches to historical language change with a focus on digital text corpora. In doing so, this workshop carries out the triple goals of disseminating state-of-the-art research on diachronic modeling of language change, fostering cross-disciplinary collaborations, and exploring the fundamental theoretical and methodological challenges in this growing niche of computational linguistic research.}, author = {Tahmasebi, Nina and Montariol, Syrielle and Dubossarsky, Haim and Kutuzov, Andrey and Hengchen, Simon and Alfter, David and Periti, Francesco and Cassotti, Pierluigi}, year = {2023}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {979-8-89176-043-1}, } @inProceedings{volodina-etal-2023-dalaj-326817, title = {DaLAJ-GED – a dataset for Grammatical Error Detection tasks on Swedish}, booktitle = {Proceedings of the 12th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2023)}, editor = {David Alfter and Elena Volodina and Thomas François and Arne Jönsson and Evelina Rennes}, author = {Volodina, Elena and Ali Mohammed, Yousuf and Berdicevskis, Aleksandrs and Bouma, Gerlof and Öhman, Joey}, year = {2023}, publisher = { Linköping Electronic Conference Proceedings}, address = {Linköping }, ISBN = {978-91-8075-250-3}, pages = {94--101}, } @inProceedings{bloomstrom-etal-2023-preparing-328710, title = {Preparing a corpus of spoken Xhosa}, booktitle = {Proceedings of the 2023 CLASP Conference on Learning with Small Data (LSD), Gothenburg and online 11–12 September 2023}, author = {Bloom Ström, Eva-Marie and Slater, Onelisa and Zahran, Aron and Berdicevskis, Aleksandrs and Schumacher, Anne}, year = {2023}, publisher = {Association for Computational Linguistics}, address = {Gothenburg, Sweden}, ISBN = {979-8-89176-000-4}, pages = {62--67}, } @misc{ehret-etal-2023-measuring-326620, title = {Measuring Language Complexity}, author = {Ehret, Katharina and Berdicevskis, Aleksandrs and Bentz, Christian and Blumenthal-Dramé, Alice}, year = {2023}, volume = {9}, number = {s1}, pages = {1--167}, } @inProceedings{berdicevskis-erbro-2023-tomato-326355, title = {You say tomato, I say the same: A large-scale study of linguistic accommodation in online communities}, booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, author = {Berdicevskis, Aleksandrs and Erbro, Viktor}, year = {2023}, publisher = {University of Tartu Library}, ISBN = {978-99-1621-999-7}, pages = {415--424}, } @misc{themistocleous-etal-2023-assessing-331090, title = {Assessing Language Disorders using Artificial Intelligence: a Paradigm Shift }, abstract = {Speech, language, and communication deficits are present in most neurodegenerative syndromes. They enable the early detection, diagnosis, treatment planning, and monitoring of neurocognitive disease progression as part of traditional neurological assessment. Nevertheless, standard speech and language evaluation is time-consuming and resource-intensive for clinicians. We argue that using machine learning methodologies, natural language processing, and modern artificial intelligence (AI) for Language Assessment is an improvement over conventional manual assessment. Using these methodologies, Computational Language Assessment (CLA) accomplishes three goals: (i) provides a neuro-cognitive evaluation of speech, language, and communication in elderly and high-risk individuals for dementia; (ii) facilitates the diagnosis, prognosis, and therapy efficacy in at-risk and language-impaired populations; and (iii) allows easier extensibility to assess patients from a wide range of languages. By employing AI models, CLA may inform neurocognitive theory on the relationship between language symptoms and their neural bases. Finally, it signals a paradigm shift by significantly advancing our ability to optimize the prevention and treatment of elderly individuals with communication disorders, allowing them to age gracefully with social engagement. }, author = {Themistocleous, Charalambos and Tsapkini, Kyrana and Kokkinakis, Dimitrios}, year = {2023}, publisher = {arXiv.org}, } @inProceedings{ohlsson-etal-2023-going-329710, title = {Going to the market together. A presentation of a mixed methods project}, booktitle = {TwinTalks Workshop at DH2023, 10 July, Graz, Austria}, author = {Ohlsson, Claes and Virk, Shafqat and Tahmasebi, Nina}, year = {2023}, } @inProceedings{volodina-etal-2023-multiged-331652, title = {MultiGED-2023 shared task at NLP4CALL: Multilingual Grammatical Error Detection }, abstract = {This paper reports on the NLP4CALL shared task on Multilingual Grammatical Error Detection (MultiGED-2023), which included five languages: Czech, English, German, Italian and Swedish. It is the first shared task organized by the Computational SLA1 working group, whose aim is to promote less represented languages in the fields of Grammatical Error Detection and Correction, and other related fields. The MultiGED datasets have been produced based on second language (L2) learner corpora for each particular language. In this paper we introduce the task as a whole, elaborate on the dataset generation process and the design choices made to obtain MultiGED datasets, provide details of the evaluation metrics and CodaLab setup. We further briefly describe the systems used by participants and report the results. }, booktitle = {Proceedings of the 12th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2023) }, author = {Volodina, Elena and Bryant, Christopher and Caines, Andrew and De Clercq, Orphée and Frey, Jennifer-Carmen and Ershova, Elizaveta and Rosen, Alexandr and Vinogradova, Olga }, year = {2023}, publisher = {Linköping University Press}, } @incollection{tahmasebi-dubossarsky-2023-computational-325543, title = {Computational modeling of semantic change}, abstract = {In this chapter we provide an overview of computational modeling for semantic change using large and semi-large textual corpora. We aim to provide a key for the interpretation of relevant methods and evaluation techniques, and also provide insights into important aspects of the computational study of semantic change. We discuss the pros and cons of different classes of models with respect to the properties of the data from which one wishes to model semantic change, and which avenues are available to evaluate the results. This chapter is forthcoming as the book has not yet been published. }, booktitle = {Routledge Handbook of Historical Linguistics, 2nd edition}, author = {Tahmasebi, Nina and Dubossarsky, Haim}, year = {2023}, publisher = {Routledge}, } @inProceedings{masciolini-etal-2023-towards-329384, title = {Towards automatically extracting morphosyntactical error patterns from L1-L2 parallel dependency treebanks}, abstract = {L1-L2 parallel dependency treebanks are UD-annotated corpora of learner sentences paired with correction hypotheses. Automatic morphosyntactical annotation has the potential to remove the need for explicit manual error tagging and improve interoperability, but makes it more challenging to locate grammatical errors in the resulting datasets. We therefore propose a novel method for automatically extracting morphosyntactical error patterns and perform a preliminary bilingual evaluation of its first implementation through a similar example retrieval task. The resulting pipeline is also available as a prototype CALL application.}, booktitle = {Proceedings of the 18th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2023), July 13, 2023, Toronto, Canada}, author = {Masciolini, Arianna and Volodina, Elena and Dannélls, Dana}, year = {2023}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {978-1-959429-80-7}, } @inProceedings{beccaria-etal-2023-extraction-334169, title = {Extraction and Analysis of Acoustic Features from Italian-Speaking Children with Autism Spectrum Disorder }, abstract = {Background: The persistent difficulties in social interaction and communication that characterize Autism Spectrum Disorder can be accessed by investigating the quality of language. Indeed, these deficits involve the presence of anomalies in speech production and understanding, which find an expression at the acoustic and prosodic levels of linguistic analysis. Objectives: The main aim of this work is to propose a speech pipeline for the extraction of Italian speech biomarkers typical of ASD by conducting an acoustic and phonological analysis. Moreover, we will highlight the strengths and difficulties of this kind of investigation introducing new topics for further research. Methods: The poster will present the analysis of a speech corpus of 14 Italian-speaking children with ASD and 14 controls (C). The corpus is demographically balanced (age 6-10, 8;1 ± 1;3. Sex: 3F, 11 M) and homogeneous at the diatopic level (origin: Prato, Pistoia, Florence). First, we extracted the acoustic features by using eGeMAPS (openSMILE; Eyben et al., 2015), specifically ideated for the study of impaired speech. Then, we implemented the Mann-Whitney U-test to select the features with the most statistically significant distance in the production of the two groups. Secondly, we conducted a parallel extraction regarding the pitch (F0 mean and standard deviation). We propose this additional analysis because pitch varies according to some demographic traits of the speaker (sex, age, height) and the literature presents opposite trends. For this task, we used Praat to have more flexibility in the manipulation of the extraction. We set the F0 range between 70 and 400 Hz (Patel et al., 2020). Finally, we conducted a comparison between the results of the two methods excluding female participants to verify if the trend of pitch changes when the participants are not mixed. Results: Table 1 shows the features selected between the ones extracted. They are related to prosody, quality of voice, loudness, and spectral distribution. Jitter, shimmer and HNR are usually investigated together to describe the emotional prosody and the quality of voice. The same trend found on our corpus is recorded in previous studies on languages other than Italian (Bone et al. 2015; Kissine & Geelhand 2019). Moreover, spectral flux is usually investigated together with shimmer and jitter to describe speech impairments (Haider et al., 2019). Nevertheless, if we consider the studies related to autistic speech, there are few that describe this feature because of the different methodologies used during the extraction. Finally, the values of pitch extracted by eGeMAPS and Praat show the same trend. It is higher in ASD than in controls, both if we considered the corpus mixed and the one with only the male speakers. However, the pitch does not show a statistically significant difference between the two groups (Table 2). Conclusions: These results, although preliminary, seem to confirm the presence of phonetic alterations of speech associated with the disorder. Further studies could improve the accuracy of the pipeline proposed by doing a qualitative analysis of the results and considering other linguistic and paralinguistic domains (e.g., morphological, pragmatic, and gestural analysis). }, booktitle = {The 22nd International Society for Autism Research (INSAR), May 3-4, Stockholm, Sweden}, author = {Beccaria, Federica and Gagliardi, Gloria and Kokkinakis, Dimitrios}, year = {2023}, } @incollection{borin-etal-2023-language-337444, title = {Language Report Swedish}, abstract = {Swedish speech and language technology (LT) research goes back over 70 years. This has paid off: there is a national research infrastructure, as well as significant research projects, and Swedish is well-endowed with language resources (LRs) and tools. However, there are gaps that need to be filled, especially high-quality goldstandard LRs required by the most recent deep-learning methods. In the future, we would like to see closer collaborations and communication between the “traditional” LT research community and the burgeoning AI field, the establishment of dedicated academic LT training programmes, and national funding for LT research.}, booktitle = {Cognitive Technologies}, author = {Borin, Lars and Domeij, Rickard and Edlund, Jens and Forsberg, Markus}, year = {2023}, pages = {219--222}, } @incollection{virk-etal-2023-lingfn-337386, title = {LingFN: A Framenet for the Linguistic Domain}, abstract = {Frame semantics is a theory of meaning in natural language, which defines the structure of the lexical semantic resources known as framenets. Both framenets and frame semantics have proved useful for a number of natural language processing (NLP) tasks. However, in this connection framenets have often been criticized for their limited coverage. A proposed reasonable-effort solution to this problem is to develop domain-specific (sublanguage) framenets to complement the corresponding general-language framenets for particular NLP tasks, and in the literature we find such initiatives covering domains such as medicine, soccer, and tourism. In this paper, we report on building a framenet to cover the terms and concepts encountered in descriptive linguistic grammars (written in English) i.e. a framenet for the linguistic domain (LingFN) to complement the general-language BFN.}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, author = {Virk, Shafqat and Klang, Per and Borin, Lars and Saxena, Anju}, year = {2023}, ISBN = {9783031243363}, pages = {367--379}, } @inProceedings{hammarlin-etal-2023-fearing-327373, title = {Fearing mRNA: A Mixed Methods Study of Vaccine Rumours}, abstract = {The first mass-distributed vaccines based on mRNA technology were launched in 2021 to protect against COVID-19, sparking rumours among vaccine critical individuals that these “new” vaccines might be more dangerous to the health than other, “traditional” vaccines. Drawing on rumour theories and social cognitive perspectives, the aim of this chapter is to account for the purpose and the spreading of medical rumours that encircle mRNA COVID-19 vaccines. We ask: How are rumours concerning mRNA expressed and established? In terms of trust and distrust, what function do the rumours have? We take as our empirical case the fast spreading of a medical journal article written by a group of infectious medicine researchers at Lund University, Sweden, that spawned an already established vaccine rumour, and analyse Swedish-language tweets discussing mRNA vaccines posted between February 10, 2022 and November 10, 2022. Our study follows a mixed methods sequential explanatory design consisting of an initial computational distant reading analysis based on structural topic modeling, followed by a close qualitative reading and thematic analysis of the results. Our analysis shows how mRNA rumours are not primarily based on ignorance, but rather on distrust regarding the officially sanctioned, positive narrative of new vaccine technologies, expressed through what we term counter-scientific argumentation.}, booktitle = {NordMedia23: "Technological Takeover? Social and Cultural Implications – Promises and Pitfalls", 16–18 August 2023, Bergen, Norway}, author = {Hammarlin, Mia-Marie and Kokkinakis, Dimitrios and Miegel, Fredrik and Stoencheva, Jullietta}, year = {2023}, address = {Bergen, Norway}, } @article{hammarlin-etal-2023-covid-329784, title = {COVID-19 Vaccine Hesitancy: A Mixed Methods Investigation of Matters of Life and Death.}, abstract = {In this article, hesitancy towards COVID-19 vaccinations is investigated as a phenomenon touching upon existential questions. We argue that it encompasses ideas of illness and health, and also of dying and fear of suffering. Building on a specific strand within anti-vaccination studies, we conjecture that vaccine hesitancy is, to some extent, reasonable, and that this scepticism should be studied with compassion. Through a mixed methods approach, vaccine hesitancy, as it is being expressed in a Swedish digital open forum, is investigated and understood as, on the one hand, a perceived need of protecting one’s body from techno-scientific experiments, and thus the risk of becoming a victim of medicine itself. On the other hand, the community members express what we call a tacit belief in modern medicine by demonstrating their own “expert” pandemic knowledge. The analysis also shows how the COVID-19 pandemic triggers memories of another pandemic, namely the swine flu in 2009–2010, and what we term a medical crisis that occurred then, due to a vaccine thatcaused a rare but severe side effect in Sweden and elsewhere.}, journal = {Journal of Digital Social Research (JDSR)}, author = {Hammarlin, MIa-Marie and Kokkinakis, Dimitrios and Borin, Lars}, year = {2023}, volume = {5}, number = {4}, pages = {31--61}, } @inProceedings{kokkinakis-etal-2023-investigating-325628, title = {Investigating the Effects of MWE Identification in Structural Topic Modelling }, abstract = {Multiword expressions (MWEs) are common word combinations which exhibit idiosyncrasies in various linguistic levels. For various downstream natural language processing applications and tasks, the identification and discovery of MWEs has been proven to be potentially practical and useful, but still challenging to codify. In this paper we investigate various, relevant to MWE, resources and tools for Swedish, and, within a specific application scenario, we apply structural topic modelling to investigate whether there are any interpretative advantages of identifying MWEs.}, booktitle = {The 19th Workshop on Multiword Expressions (MWE 2023)}, author = {Kokkinakis, Dimitrios and Muñoz Sánchez, Ricardo and Bruinsma, Sebastianus C. J. and Hammarlin, Mia-Marie}, year = {2023}, publisher = {ACL}, ISBN = {978-1-959429-59-3}, } @misc{ilinykh-etal-2023-proceedings-327035, title = {Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023), May 22, 2023, Tórshavn, Faroe Islands}, abstract = {The second workshop on resources and representations for under-resourced language and domains was held in Tórshavn, Faroe Islands on May 22nd, 2023. The workshop was conducted in a physical setting, allowing for potential hybrid participation. Continuing with the aim of the first edition in 2020, RESOURCEFUL explored the role of the kind and the quality of resources that are available to us, as well as the challenges and directions for constructing new resources in light of the latest trends in natural language processing. The workshop has provided a forum for discussions between the two communities involved in building data-driven and annotation- driven resources.}, author = {Ilinykh, Nikolai and Morger, Felix and Dannélls, Dana and Dobnik, Simon and Megyesi, Beáta and Nivre, Joakim}, year = {2023}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA }, ISBN = {978-1-959429-73-9}, } @article{ehret-etal-2023-measuring-326113, title = {Measuring language complexity: challenges and opportunities}, journal = {Linguistics Vanguard}, author = {Ehret, Katharina and Berdicevskis, Aleksandrs and Bentz, Christian and Blumenthal-Dramé, Alice}, year = {2023}, volume = {9}, pages = {1--8}, } @article{forsgren-etal-2023-text-330978, title = {The use of text-mining software to facilitate screening of literature on centredness in health care.}, abstract = {Research evidence supporting the implementation of centredness in health care is not easily accessible due to the sheer amount of literature available and the diversity in terminology and conceptualisations used. The use of text-mining functions to semi-automate the process of screening and collating citations for a review is a way of tackling the vast amount of research citations available today. There are several programmes that use text-mining functions to facilitate screening and data extraction for systematic reviews. However, the suitability of these programmes for reviews on broad topics of research, as well as the general uptake by researchers, is unclear. This commentary has a dual aim, which consists in outlining the challenges of screening literature in fields characterised by vague and overlapping conceptualisations, and to exemplify this by exploratory use of text-mining in the context of a scoping review on centredness in health care.}, journal = {Systematic Reviews}, author = {Forsgren, Emma and Wallström, Sara and Feldthusen, Caroline and Zechner, Niklas and Sawatzky, Richard and Öhlén, Joakim}, year = {2023}, volume = {12}, number = {1}, pages = {73}, } @inProceedings{kokkinakis-etal-2023-analysis-330230, title = {Analysis of mRNA-vaccine posts on Swedish Twitter data }, abstract = {The aim of this study was to use Swedish social media data to capture public perspectives and sentiments regarding the abovementioned study on possible effect of the novel mRNA vaccines that became massively available to the public during late 2021. The intention is to understand the key issues (topics/themes) that have captured public attention in Sweden, as well as the barriers and facilitators to successful or not mRNA vaccines.}, booktitle = {14th International Conference of Experimental Linguistics,18-20 October 2023, Athens, Greece}, author = {Kokkinakis, Dimitrios and Bruinsma, Bastian and Hammarlin, Mia-Marie}, year = {2023}, } @inProceedings{morger-2023-there-333596, title = {Are There Any Limits to English-Swedish Language Transfer? A Fine-grained Analysis Using Natural Language Inference}, abstract = {The developments of deep learning in natural language processing (NLP) in recent years have resulted in an unprecedented amount of computational power and data required to train state-of-the-art NLP models. This makes lower-resource languages, such as Swedish, increasingly more reliant on language transfer effects from English since they do not have enough data to train separate monolingual models. In this study, we investigate whether there is any potential loss in English-Swedish language transfer by evaluating two types of language transfer on the GLUE/SweDiagnostics datasets and comparing between different linguistic phenomena. The results show that for an approach using machine translation for training there is no considerable loss in overall performance nor by any particular linguistic phenomena, while relying on pre-training of a multilingual model results in considerable loss in performance. This raises questions about the role of machine translation and the use of natural language inference (NLI) as well as parallel corpora for measuring English-Swedish language transfer.}, booktitle = {Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023), May 22, 2023, Torshavn, the Faroe Islands / Editors: Nikolai Ilinykh, Felix Morger, Dana Dannélls, Simon Dobnik, Beáta Megyesi, Joakim Nivre}, author = {Morger, Felix}, year = {2023}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {978-195942973-9}, } @incollection{bouma-2023-continental-333445, title = {LFG and Continental West-Germanic languages}, booktitle = {Mary Dalrymple (ed.), Handbook of Lexical Functional Grammar}, author = {Bouma, Gerlof}, year = {2023}, publisher = {Language Science Press}, address = {Berlin}, ISBN = {978-3-96110-424-6}, pages = {1407--1468}, } @misc{forsberg-etal-2023-words-328244, title = {Words unboxed: discovering new words with Kubord}, author = {Forsberg, Markus and Sikora, Justyna and Sköldberg, Emma}, year = {2023}, publisher = {Kungliga biblioteket}, number = { 2023-08-29}, address = {Stockholm}, } @inProceedings{kokkinakis-etal-2023-scaling-326698, title = {Scaling-up the Resources for a Freely Available Swedish VADER (svVADER) }, abstract = {With widespread commercial applications in various domains, sentiment analysis has become a success story for Natural Language Processing (NLP). Still, although sentiment analysis has rapidly progressed during the last years, mainly due to the application of modern AI technologies, many approaches apply knowledge-based strategies, such as lexicon-based, to the task. This is particularly true for analyzing short social media content, e.g., tweets. Moreover, lexicon-based sentiment analysis approaches are usually preferred over learning-based methods when training data is unavailable or insufficient. Therefore, our main goal is to scale-up and apply a lexicon-based approach which can be used as a baseline to Swedish sentiment analysis. All scaled-up resources are made available, while the performance of this enhanced tool is evaluated on two short datasets, achieving adequate results. }, booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, author = {Kokkinakis, Dimitrios and Muñoz Sánchez, Ricardo and Hammarlin, Mia-Marie}, year = {2023}, } @misc{holmer-etal-2023-nordiska-334604, title = {Nordiska studier i lexikografi 16. Rapport från 16:e konferensen om lexikografi i Norden, Lund 27-29 april 2022.}, abstract = {Nordiska studier i lexikografi 16 rapporterar från den 16:e konferensen i lexikografi, som genomfördes i Lund 27–29 april 2022. Volymen innehåller 30 bidrag som bygger på inlägg från konferensen i form av plenarföreläsningar, sektionsföredrag och posterpresentationer. Artiklarna spänner innehållsligt över ett brett fält, men samtliga anlägger någon form av lexikografiskt perspektiv. Flera av dem anknyter till konferensens tema Lexikografiska utmaningar. Merparten av bidragen är författade på danska, norska eller svenska, men ett mindre antal är skrivna på engelska.}, author = {Holmer, Louise and Horn, Greta and Landqvist, Hans and Nilsson, Pär and Nordgren, Eva and Sköldberg, Emma}, year = {2023}, address = {Göteborg}, ISBN = {978-91-986791-5-1}, } @inProceedings{berdicevskis-etal-2023-superlim-331445, title = {Superlim: A Swedish Language Understanding Evaluation Benchmark}, booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, December 6-10, 2023, Singapore / Houda Bouamor, Juan Pino, Kalika Bali (Editors)}, author = {Berdicevskis, Aleksandrs and Bouma, Gerlof and Kurtz, Robin and Morger, Felix and Öhman, Joey and Adesam, Yvonne and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Isbister, Tim and Lindahl, Anna and Malmsten, Martin and Rekathati, Faton and Sahlgren, Magnus and Volodina, Elena and Börjeson, Love and Hengchen, Simon and Tahmasebi, Nina}, year = {2023}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {979-8-89176-060-8}, pages = {8137--8153}, } @article{broden-etal-2023-diachrony-330178, title = {The diachrony of the new political terrorism: Neologisms as discursive framing in Swedish parliamentary data 1971–2018}, abstract = {This paper begins to unpack the framing of terrorism in the Swedish Parliament through distant reading and by chronologically extracting neologisms in a comprehensive corpus of transcripts of parliamentary debates. Combining language technology and historical contextualization, we find support for the argument that the term ‘terrorism’ gained much of its modern meaning around 1970. Specifically, our study points to a legislative framing of the issue of terrorism in Swedish parliamentary debate from the early 1970s and onwards. We also find a proliferation in the production of neologisms and compounds after 9/11 2001, reflecting, among other things, the rise of a more distinct counter-terrorism discourse and more ‘specialized’ roles and functions related to terrorism and counter-terrorism activities. The paper concludes by emphasizing the analytical benefits of tracing parliamentary discourse through neologisms as an explorative approach to identify significant patterns for further investigation.}, journal = {Digital Humanities in the Nordic and Baltic Countries Publications}, author = {Brodén, Daniel and Olsson, Leif-Jöran and Fridlund, Mats and Ängsal, Magnus Pettersson and Öhberg, Patrik}, year = {2023}, volume = {5}, number = {1}, pages = {79–89}, } @inProceedings{skoldberg-2023-"varfor-334607, title = {"Varför står det olika i SAOL och i SO?" Om (bearbetning av) skillnader mellan Svenska Akademiens samtidsordböcker}, abstract = {The Swedish Academy’s contemporary dictionaries, the glossary SAOL and the definition dictionary SO, have many features in common but they also show a lot of differences, especially in terms of content. Many of these differences can be explained with reference to the perspectives, traditions and publication year of the dictionaries. However, some differences are difficult to justify. For this reason, the editorial team of SAOL and SO is currently working on 1) identifying and 2) managing differences with respect to the information given in the two lexical resources. In this article, I discuss different types of differences, both motivated and unmotivated, between the dictionaries. The issue of priorities in the editorial work concerning unmotivated differences between SAOL and SO is also addressed.}, booktitle = {Holmer, Louise, Greta Horn, Hans Landqvist, Pär Nilsson, Eva Nordgren & Emma Sköldberg (red.). Nordiska studier i lexikografi 16. Rapport från 16:e konferensen om lexikografi i Norden. Lund 27–29 april 2022. (Skrifter utgivna av Nordiska föreningen för lexikografi. Skrift nr 17. Meijerbergs arkiv för svensk ordforskning 48.) }, author = {Sköldberg, Emma}, year = {2023}, publisher = {Nordiska föreningen för lexikografi}, address = {Lund & Göteborg}, ISBN = {978-91-986791-5-1}, } @inProceedings{matsson-kristrom-2023-building-329957, title = {Building and Serving the Queerlit Thesaurus as Linked Open Data}, abstract = {This paper describes the creation of the Queer Literature Indexing Thesaurus (QLIT) as well as the digital infrastructure supporting the workflow for editing and publishing it. The purpose of QLIT is to adequately catalogue Swedish fiction with LGBTQI themes. It is continually edited in plain-text RDF and automatically processed for correctness and storage. Finally, it is published online as Linked Open Data and used with external systems. The technical approach relies on scripts and applications developed ad hoc, rather than existing solutions. Code is available on https://github.com/gu-gridh/queerlit-terms}, booktitle = {DHNB2023 Conference Proceedings. Sustainability: Environment - Community - Data. The 7thDigital Humanities in the Nordic and BalticCountries Conference. Oslo – Stavanger – Bergen, Norway. March 8–10, 2023}, author = {Matsson, Arild and Kriström, Olov}, year = {2023}, publisher = {Universitetet i Oslo}, address = {Oslo}, } @article{landqvist-2023-gender-327264, title = {Gender equality and/or inequality? Female and male translators in a Swedish digital encyclopaedia of translators}, abstract = {This article presents a study of publicly available Svenskt översättarlexikon ‘The Swedish Encyclopaedia of Translators’ (SwET 2009), most probably the first digital encyclopaedia of translators. The study is situated in the fields of the sociology of translators, (literary) translator studies, and translation history, and focuses on how female translators are described, characterized and evaluated in the version of SwET from 2022. Three research questions are addressed in the paper: (1) What is the ratio of entries presenting female and male translators in the SwET? (2) What is the quantitative treatment of the partners in the sub-category “Translator Couples”? (3) And what are the descriptions, characterizations and evaluations of the partners in that sub-category? In response to the three RQs, the same three situations emerge: (a) gender equality and inequality, (b) gender equality, and (c) gender inequality. Possible explanations for the results reported are presented and discussed. Finally, proposals for future studies of digital translator encyclopaedias are presented.}, journal = {Stridon. Journal of Studies in Translation and Interpreting}, author = {Landqvist, Hans}, year = {2023}, volume = {3}, number = {1}, pages = {93--114}, } @misc{landqvist-2023-allmansprak-324658, title = {Allmänspråk och fackspråk i en ordbok över allmänspråket}, abstract = {Blogginlägg med anledning av det uppmärksammade "snippa-målet" 2023}, author = {Landqvist, Hans}, year = {2023}, number = {2023-03-20}, } @inProceedings{landqvist-etal-2023-terminologisamarbete-327015, title = {Terminologisamarbete i Norden. Teman, fokus och resultat från 1940-talet till 1970-talet}, abstract = {År 2026 kommer Nordterm att kunna fira sin femtioårsdag (jfr Bucher 2016b). Men det nordiska terminologiarbetet har en längre historia: ”i alla nordiska länder har [det] funnits ett visst centralt ansvar för terminologiarbete för ländernas huvudspråk ända sedan 1930-talet […]” (Bucher 2016a:74-75). Några aktörer som tagit sådant ansvar är danska Terminologicentralen – TC (1940–1960), norska Rådet for Teknisk Terminologi – RTT (1938–2001) och svenska Tekniska Nomenklaturcentralen/Terminologicentrum – TNC (1941–2018) (Selander 1972; Bucher 2016b; Store norske leksikon 2020; Terminologifrämjandet 2023). Också i Finland bedrevs terminologi(sam)arbete före Nordterms tid, men Centralen för Teknisk Terminologi rf – TSK/Terminologicentralen TSK rf/Terminologicentralen rf inrättades 1974 och bedriver alltjämt verksamhet (Nissilä et al. 2021; Heittola et al. 2022; Terminologicentralen rf 2023). Inom det pågående projektet Termer i tid – Tidens termer kartlägger vi TNC:s arbete för att trygga tillgången på god terminologi inom olika fackområden och bidra till god terminologisk praxis. För detta utnyttjar vi bl.a. det omfattande TNC-arkivet (Heittola et al. 2022; Landqvist et al. 2022). I vår presentation fokuserar vi på samarbetet mellan de tre nationella terminologiorganisationerna TC, RTT och TNC utifrån material i TNC-arkivet. Våra forskningsfrågor är: 1. Vilka teman är aktuella under olika decennier i kontakterna mellan TC, RTT och TNC? 2. Vilka särskilda fokusområden finns mellan å ena sidan TC och TNC och å andra sidan RTT och TNC? 3. Vilka resultat ger kontakterna i fråga om utvalda terminologifrågor? Som material fungerar två delmängder av TNC-arkivet: Utlandskorrespondens Danmark – Udan och Utlandskorrespondens Norge – Unor. Dessa dokumenterar kontakter i skrift mellan TNC, TC och RTT. Vi klarlägger kontakterna över tid och utifrån tema/n, försöker identifiera fokusområden samt redovisar och diskuterar hur utvalda terminologifrågor hanteras (jfr Heittola et al. 2022; Landqvist et al. 2022). Studien anlägger således både ett makro- och ett mikroperspektiv på kontakterna mellan de tre institutionerna. Referenser Bucher, A-L. (2016a). Nationella terminologicentraler – i allmännyttans intresse. I: N. Pilke & N. Nissilä (Red.). Tänkta termer. Terminologihänsyn i nordiskt perspektiv. VAKKI Publications 5. Vasa universitet, 72–99. Tillgänglig: https://vakki.net/wp-content/uploads/2020/08/tankta_termer_72-99_bucher.pdf (citerad 12.2.2023). Bucher, A-L. (2016b). Nordterm 40 år. Terminfo 2016:3. Tillgänglig: http://www.terminfo.fi/sisalto/nordterm-40-ar-359.html (citerad 12.2.2023). Heittola, S., Landqvist, H., Nissilä, N. & Pilke, N. (2022). TNC och Finland. Korrespondens inom terminologiområdet 1941–1999. I: S. Björklund, B. Haagensen, M. Nordman & A. Westerlund (Red.). Svenskan i Finland 19. Föredrag vid den nittonde sammankomsten för beskrivningen av svenskan i Finland. Vasa den 6–7 maj 2021. Skrifter utgivna av Svensk-Österbottniska Samfundet 82. Åbo Akademi & Svensk-Österbottniska Samfundet, 88–103. Tillgänglig: https://www.doria.fi/handle/10024/185549 (citerad 12.2.2023). Landqvist, H., Nissilä, N., Pilke, N. & Sjöberg, S. (2022). Organisationer, frågor och (an)svar – Institutionellt terminologiskt samarbete mellan Sverige och Finland 1975–1998. I: H. Katajamäki, M. Enell-Nilsson, H. Kauppinen-Räisänen & H. Limatius (Red.). Responsible Communication. VAKKI Publications 14. Vaasan yliopisto, 89–104. Tillgänglig: https://vakki.net/index.php/2022/12/15/responsible-communication/ (citerad 12.2.2023). Nissilä, N., Heittola, S., Pilke, N. & Landqvist, H. (2021). ”Av intresse för saken dristar jag mig att till diskussion framlägga ett par spörsmål” – Kaksi suomalaista akateemista uranuurtajaa terminologiaverkoston kirjeenvaihdossa. I: H. Katajamäki, M. Enell-Nilsson, H. Kauppinen-Räisänen, L. Kääntä & H. Salovaara (Red.). Workplace Communication IV. VAKKI Publications 13. Vaasan yliopisto, 153–168. Tillgänglig: https://vakki.net/index.php/2021/12/21/workplace-communication-iv/ (citerad 12.2.2023). Selander, E. (1972). Terminologisamarbete i Norden – erfarenheter i Sverige. I: A. Hamburger, A. Sudmann & B. Molde (Red.). Språk i Norden 1972. Årsskrift för de nordiska språknämnderna. Skrifter utgivna av Nämnden för svensk språkvård 47. Nämnden för svensk språkvård, 95–102. Tillgänglig: http://www.diva-portal.org/smash/get/diva2:1179421/FULLTEXT01.pdf (citerad 12.2.2023). Store norske leksikon (2020). Tillgänglig: https://snl.no/R%C3%A5det_for_teknisk_terminologi (citerad 12.2.2023). Termer i tid – Tidens termer (2023). Terms in Time – The Terms of the Time. Tillgänglig: https://sites.uwasa.fi/term/ (citerad 12.2.2023). Terminologicentralen rf (2023). Tillgänglig: https://sanastokeskus.fi/tsk/sv/terminologicentralen_rf-29.html (citerad 12.2.2023). Terminologifrämjandet (2023). Från Tekniska nomenklaturcentralen till Terminologicentrum. Tillgänglig: https://terminologiframjandet.se/h552a9FtZ/sveriges-terminologiska-landskap-2019/fran-tekniska-nomenklatur%d1%81entralen-till-terminologicentrum/ (citerad 12.2.2023).}, booktitle = {Nordterm 2023, 14–15 juni 2023, Stockholm}, author = {Landqvist, Hans and Nissilä, Niina and Sjöberg, Sannina}, year = {2023}, } @article{landqvist-etal-2023-samarbetet-332277, title = {Samarbetet mellan TNC och RTT 1938–1998: terminologiarbete som resurs för meningsfull kommunikation}, abstract = {This article describes and analyses the contacts between the Swedish and Norwegian national terminology organisations between the 1940s and the 1990s. The Swedish Tekniska nomenklaturcentralen TNC (1941–2018; 2000–2018 Terminologicentrum TNC) was Sweden’s national center for special languages and terminology work for more than 75 years. Since its founding in 1941, the TNC was active not only in Sweden, but also in establishing and maintaining international contacts. The Norwegian Rådet for teknisk terminologi RTT was founded in 1938 and closed down in 2001. The research material used is the collection of correspondence in the TNC’s document archive, in particular the section stored with the code “Unor”, i.e., letters (N=374) documenting written communication between the TNC and the RTT. The article describes the topics covered in the communication, the individuals involved, the objectives and consequences of the communication and the results achieved. In a quantitative analysis, the themes of the letters are categorised in four main categories: publications, communication, information, and language issues. In a qualitative analysis, a specific terminological issue in the field of welding technology is analysed by close reading and content analysis. Finally, further studies, which complete the picture of terminological cooperation on institutional level in the Nordic countries, are proposed.}, journal = {Communicating with Purpose. VAKKI Publications. Eds. E. Lillqvist, M. Eronen- Valli, V. Manninen, N. Nissilä & E. Salmela}, author = {Landqvist, Hans and Nissilä, Niina and Pilke, Nina and Sjöberg, Sannina}, year = {2023}, volume = {15}, pages = {232–250}, } @misc{blensenius-holmer-2023-saol-324993, title = {SAOL: Dröjer innan de och dem blir som dom}, author = {Blensenius, Kristian and Holmer, Louise}, year = {2023}, number = {2023-04-04 }, } @inProceedings{holmer-blensenius-2023-okynniga-334601, title = {Okynniga pluraler. Normering och bruk av s-plural speglat i SAOL och SO}, booktitle = {Holmer, Louise, Greta Horn, Hans Landqvist, Pär Nilsson, Eva Nordgren, Emma Sköldberg (red.), Nordiska studier i Lexikografi (NSL) 16, NSL 17, Rapport från 16:e konferensen om lexikografi i Norden, Lund 27–29 april 2022, Meijerbergs arkiv för svensk ordforskning 48}, author = {Holmer, Louise and Blensenius, Kristian}, year = {2023}, publisher = {Nordiska föreningen för lexikografi, Meijerbergs institut för svensk etymologisk forskning}, address = {Göteborg}, ISBN = {978-91-986791-5-1}, } @inProceedings{landqvist-2023-svenskt-330560, title = {Svenskt översättarlexikon för forskningsändamål: några utgångspunkter, resultat och funderingar}, abstract = {Föredrag (inbjuden talare) vid Textseminariet, SOL-centrum, Lunds universitet, 24 november 2023}, booktitle = {Textseminariet, SOL-centrum, Lunds universitet, 24 november 2023}, author = {Landqvist, Hans}, year = {2023}, } @article{landqvist-2023-kunskapsorganisering-332279, title = {Kunskapsorganisering, sökmöjligheter och läsvägar: en fallstudie av handböcker för hundägare}, abstract = {This paper explores how the anonymous authors of two Swedish handbooks for dog owners, published in 1823 and 1849 respectively, tried to communicate their knowledge about the treatment of dogs so that the readers of the handbooks could, hopefully, apply the information offered in their everyday life. The study clarifies how the authors organize the knowledge that they want their readers to take part of, how they show their readers opportunities to search for the desired knowledge and which reading paths within the handbooks that the readers are offered. The two handbooks are regarded as multimodal, dialogical, final and addressive texts, and the study reported is qualitative and comparative; the handbooks are compared with each other, with other studies of dog owner manuals in Swedish, and with studies of other types of practical handbooks in Swedish. The results show great similarities between the two handbooks regarding the investigated variables, but the handbook from 1849 is judged to be more well planned and easier to use for contemporary knowledge-seeking dog owners. Finally, further studies of the handbooks are suggested, including syntactic-focused studies of them, comparisons with contemporary veterinary medical literature and studies of a larger material of dog owners’ handbooks}, journal = {Communicating with Purpose. VAKKI Publications. Eds. E. Lillqvist, M. Eronen- Valli, V. Manninen, N. Nissilä & E. Salmela}, author = {Landqvist, Hans}, year = {2023}, volume = {15}, pages = {207–231}, } @inProceedings{blensenius-2023-harmonisk-334638, title = {Mot en harmonisk lemma-lexemmodell och ordklassuppsättning}, booktitle = {Holmer, Louise, Greta Horn, Hans Landqvist, Pär Nilsson, Eva Nordgren, Emma Sköldberg (red.), Nordiska studier i Lexikografi (NSL) 16, NSL 17, Rapport från 16:e konferensen om lexikografi i Norden, Lund 27–29 april 2022, Meijerbergs arkiv för svensk ordforskning 48}, author = {Blensenius, Kristian}, year = {2023}, publisher = {Nordiska föreningen för lexikografi, Meijerbergs institut för svensk etymologisk forskning}, address = {Göteborg}, ISBN = {978-91-986791-5-1}, } @inProceedings{masciolini-2023-query-329383, title = {A query engine for L1-L2 parallel dependency treebanks}, abstract = {L1-L2 parallel dependency treebanks are learner corpora with interoperability as their main design goal. They consist of sentences produced by learners of a second language (L2) paired with native-like (L1) correction hypotheses. Rather than explicitly labelled for errors, these are annotated following the Universal Dependencies standard. This implies relying on tree queries for error retrieval. Work in this direction is, however, limited. We present a query engine for L1-L2 treebanks and evaluate it on two corpora, one manually validated and one automatically parsed.}, booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa), May 22-24, 2023 Tórshavn, Faroe Islands / Editors: Tanel Alumäe and Mark Fishel}, author = {Masciolini, Arianna}, year = {2023}, publisher = {University of Tartu Library}, address = {Tartu, Estonia}, ISBN = {978-99-1621-999-7}, } @incollection{holmer-blensenius-2023-stavning-323528, title = {Stavning och böjning av lånord. De orange blinkrarna}, abstract = {Holmer & Blensenius har bidragit med underlag till kapitlet i fråga. Den slutliga utformningen har gjorts av Språkrådet.}, booktitle = {Maria Bylin & Björn Melander (red.). Språkrådet rekommenderar. Perspektiv, metoder och avvägningar i språkriktighetsfrågor}, author = {Holmer, Louise and Blensenius, Kristian}, year = {2023}, publisher = {Språkrådet, Institutet för språk och folkminnen}, address = {Stockholm}, ISBN = {978-91-86959-90-6}, pages = {93--104}, } @article{landqvist-2023-ordbockers-325762, title = {Ordböckers möjligheter och begränsningar}, journal = {GU Journalen}, author = {Landqvist, Hans}, year = {2023}, number = {2}, pages = {47--48}, } @inProceedings{r?dveneide-etal-2023-unsc-338176, title = {The UNSC-Graph: An Extensible Knowledge Graph for the UNSC Corpus}, abstract = {We introduce the UNSC-Graph, a knowledge graph for a corpus of debates of the United Nations Security Council (UNSC) during the period 1995-2020. The graph combines previously disconnected data sources including from the UNSC Repertoire, the UN Library, Wikidata, and from metadata extracted from the speeches themselves. Beyond existing metadata detailing debates’ topics and participants, we also extended the graph to include all country mentions in a speech, geographical neighbours of countries mentioned, as well as sentiment scores. By linking the graph to Wikidata, we are able to include additional geopolitical information and extract various country name aliases to extend the coverage of country mentions beyond existing NER-based approaches. Studying mentions of Ukraine after 2014, we present a use case for the graph as a source for continuous analysis of international politics and geopolitical events discussed in the UNSC.}, booktitle = {Computational linguistics - Association for Computational Linguistics}, author = {Rødven-Eide, Stian and Zaczynska, Karolina and Pires, Antonio and Patz, Ronny and Stede, Manfred}, year = {2023}, publisher = {Association for Computational Lingustics}, address = {Ingolstadt, Germany}, } @inProceedings{wilkens-etal-2023-tcfle-337441, title = {TCFLE-8: a Corpus of Learner Written Productions for French as a Foreign Language and its Application to Automated Essay Scoring}, abstract = {Automated Essay Scoring (AES) aims to automatically assess the quality of essays. Automation enables large-scale assessment, improvaements in consistency, reliability, and standardization. Those characteristics are of particular relevance in the context of language certification exams. However, a major bottleneck in the development of AES systems is the availability of corpora, which, unfortunately, are scarce, especially for languages other than English. In this paper, we aim to foster the development of AES for French by providing the TCFLE-8 corpus, a corpus of 6.5k essays collected in the context of the Test de Connaissance du Français (TCF - French Knowledge Test) certification exam. We report the strict quality procedure that led to the scoring of each essay by at least two raters according to the levels of the Common European Framework of Reference for Languages (CEFR) and to the creation of a balanced corpus. In addition, we describe how linguistic properties of the essays relate to the learners' proficiency in TCFLE-8. We also advance the state-of-the-art performance for the AES task in French by experimenting with two strong baselines (i.e., RoBERTa and feature-based). Finally, we discuss the challenges of AES using TCFLE-8.}, booktitle = {EMNLP 2023 - 2023 Conference on Empirical Methods in Natural Language Processing, Proceedings}, author = {Wilkens, Rodrigo and Pintard, Alice and Alfter, David and Folny, Vincent and François, Thomas}, year = {2023}, ISBN = {9798891760608}, } @inProceedings{zhou-etal-2023-finer-325541, title = {The Finer They Get: Combining Fine-Tuned Models For Better Semantic Change Detection}, abstract = {In this work we investigate the hypothesis that enriching contextualized models using fine-tuning tasks can improve their capacity to detect lexical semantic change (LSC). We include tasks aimed to capture both low-level linguistic information like part-of-speech tagging, as well as higher level (semantic) information. Through a series of analyses we demonstrate that certain combinations of fine-tuning tasks, like sentiment, syntactic information, and logical inference, bring large improvements to standard LSC models that are based only on standard language modeling. We test on the binary classification and ranking tasks of SemEval-2020 Task 1 and evaluate using both permutation tests and under transfer-learning scenarios.}, booktitle = {24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, author = {Zhou, Wei and Tahmasebi, Nina and Dubossarsky, Haim}, year = {2023}, publisher = {Linköping University Electronic Press}, ISBN = {978-99-1621-999-7}, } @misc{dunabeitia-etal-2023-editorial-333441, title = {Editorial: Digital Linguistic Biomarkers: Beyond Paper and Pencil Tests -Volume II }, abstract = {Our first volume laid the foundation for understanding the potential of digital linguistic biomarkers in assessing various cognitive and psychological aspects. In this second volume, we witness a significant advancement in both the scope and depth of research in this area. The featured articles in this volume contribute to our understanding of how linguistic biomarkers can transcend traditional paper-and-pencil tests, offering a more nuanced and comprehensive approach to the assessment of cognitive function and psychological well-being.In the first study of the volume [Gonzalez-Recober et al., 2023], the authors employed automated methods to investigate speech production during category and letter fluency tasks, commonly used neuropsychological assessments for evaluating lexical retrieval abilities. Their analysis encompassed a diverse range of linguistic and acoustic features, providing a more comprehensive perspective on these tasks than previous studies. As expected, participants produced more words during the category fluency task than during the letter fluency task. Moreover, several linguistic and acoustic measures displayed distinctions between the two tasks. The automated techniques employed in this study offer a reproducible and scalable approach for analyzing fluency tasks, with potential applications in clinical settings. By implementing these methods, future research endeavors are expected to expand our knowledge of speech feature differences, not only in terms of total scores but also across various speech measures, particularly among clinical populations.In the second article of the volume [Sánchez-Vincitore et al. 2023], the authors present a longitudinal analysis of linguistic biomarkers to detect cognitive decline. Their study underscores the potential of natural language processing techniques in identifying subtle cognitive changes over time. They examined data from over 3,000 participants aged 45 and older to investigate the relationship between age, gender, and language-mediated working memory processes using commercial cognitive tests (in their case, scientific tests developed by CogniFit Inc.). The findings revealed that age negatively predicted working memory performance, highlighting the potential of computerized assessments in predicting cognitive functions during aging and the need for further research on gender effects in cognitive aging. This study contributed to the growing body of evidence supporting the utility of linguistic biomarkers in early cognitive assessment.In the third study of our volume [Kim et al. 2023], the focus shifts to postoperative delirium (POD) in elderly patients following spinal surgery. POD has been linked to adverse outcomes in this demographic, prompting researchers to explore potential biomarkers for degenerative cerebral dysfunctions like mild cognitive impairment and dementia. The authors used electroencephalography (EEG) to measure an EEG biomarker reflecting idle cortical states through intrinsic alpha oscillations in the prefrontal regions. Cognitive follow-ups were performed using the Telephone Interview for Cognitive Status™ (TICS). The study observed that among patients diagnosed with POD, neurocognitive disorders could persist for up to 1 year postsurgery. These findings suggest that EEG has the potential to be a novel and valuable tool for identifying elderly surgical patients at a higher risk of developing postoperative delirium, offering opportunities for early intervention and improved patient outcomes.As the fourth article in our volume, the study by [Saccone et al. 2023] delves into the realm of schizophrenia, examining how it affects speech prosody and pragmatic functions. The study conducted corpus-based research, focusing on real-life spontaneous interactions to shed light on the prosodic features of schizophrenia. Notably, the speech patterns of patients revealed distinct characteristics. Their speech was organized into smaller, less structured information chunks, punctuated by frequent silences and extended pauses during turn-taking. Fluency was disrupted by retracing phenomena, particularly in complex information structures. Besides, comparing Topic and Comment-prominences between patients and non-pathological individuals revealed a consistent pattern. Patients exhibited higher values for Topic-prominence across all parameters, while the non-pathological group displayed the opposite trend. These findings provide valuable insights into the prosodic and pragmatic aspects of speech in schizophrenia, emphasizing the importance of understanding these linguistic manifestations in the context of the disorder's impact on communication.In closing, the second volume of "Digital Linguistic Biomarkers: Beyond Paper and Pencil Tests" presents a short yet diverse and comprehensive array of research articles that collectively advance the field. These contributions not only underscore the relevance and timeliness of linguistic biomarkers in the digital age but also highlight their potential to revolutionize the way we assess cognitive function, psychological well-being, and aging across diverse populations, extending to pathological and clinical samples.}, author = {Dunabeitia, Jon Andoni and Kokkinakis, Dimitrios and Gagliardi, Gloria}, year = {2023}, volume = {14}, } @inProceedings{ljunglof-levin-2023-unicodex-331075, title = {UniCoDeX (Universal Construction Dependency Xrammar)}, booktitle = {Dagstuhl Seminar Report. Universals of Linguistic Idiosyncrasy in Multilingual Computational Linguistics, May 7–12, 2023, Dagstuhl, Germany}, author = {Ljunglöf, Peter and Levin, Lori}, year = {2023}, publisher = { Leibniz-Zentrum für Informatik}, address = {Dagstuhl}, } @inProceedings{kokkinakis-etal-2023-prevalence-324818, title = {The Prevalence of mRNA Related Discussions during the Post-COVID-19 Era}, abstract = {Vaccinations are one of the most significant interventions to public health, but vaccine hesitancy and skepticism are raising serious concerns for a portion of the population in many countries, including Sweden. In this study, we use Swedish social media data and structural topic modeling to automatically identify mRNA-vaccine related discussion themes and gain deeper insights into how people’s refusal or acceptance of the mRNA technology affects vaccine uptake. Our point of departure is a scientific study published in February 2022, which seems to once again sparked further suspicion and concern and highlight the necessity to focus on issues about the nature and trustworthiness in vaccine safety. Structural topic modelling is a statistical method that facilitates the study of topic prevalence, temporal topic evolution, and topic correlation automatically. Using such a method, our research goal is to identify the current understanding of the mechanisms on how the public perceives the mRNA vaccine in the light of new experimental findings.}, booktitle = { Caring is Sharing – Exploiting the Value in Data for Health and Innovation / M. Hägglund et al. (eds.) Proceedings of the 33rd Medical Informatics Europe Conference (MIE2023), Gothenburg, Sweden, 22-25 May 2023}, author = {Kokkinakis, Dimitrios and Bruinsma, Sebastianus Cornelis Jacobus and Hammarlin, Mia-Marie}, year = {2023}, publisher = {IOS Press}, ISBN = {978-1-64368-388-1}, }