Hoppa till huvudinnehåll
Språkbanken Text är en avdelning inom Språkbanken.

BibTeX

@book{morger-2024-minds-346391,
	title        = {In the minds of stochastic parrots: Benchmarking, evaluating and interpreting large language models},
	abstract     = {The arrival of large language models (LLMs) in recent years has changed the landscape of natural language processing (NLP). Their impressive performance on popular benchmarks, ability to solve a range of different tasks and their human-like linguistic interactional abilities, have prompted a debate into whether these are just "stochastic parrots" who are cleverly repeating what humans say without understanding its meaning or whether they are acquiring essential language capabilities, which would be an important stepping stone towards artificial general intelligence. 

To tackle this question, developing analysis methods to measure and understand the language capabilities of LLMs has become a defining challenge. These include developing benchmarks to reliably measure their performance as well and interpretability methods to gauge their inner-workings. This is especially relevant at a time when these models already are having a considerable impact on our society. An increasing amount users are affected by the technology and calls are made for transparent, regulated and thorough evaluation of AI. In these efforts, it is important to estimate the possibilities and limitations of these analysis methods since they will play an important role in holding technologies in AI accountable.

In this compilation thesis, I expound on the components and processes involved in analyzing LLMs. The articles included in this compilation thesis use different approaches for analyzing LLMs, from introducing a multi-task benchmark Superlim for Swedish NLU to investigating LLMs' ability to predict language variation. To this effort I explore what the possibilities and limitations are of popular analysis methods and what implications these have for developing LLMs. I argue that integrating explanatory approaches from empirical linguistic research is important to understand the role of both the data and the linguistic features used when analyzing LLMs. Doing so does not only help guide the development of LLMs, but also bring insights into linguistics.},
	author       = {Morger, Felix},
	year         = {2024},
	ISBN         = {978-91-8069-944-0},
}

@inProceedings{berdicevskis-etal-2023-superlim-331445,
	title        = {Superlim: A Swedish Language Understanding Evaluation Benchmark},
	booktitle    = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, December 6-10, 2023, Singapore  / Houda Bouamor, Juan Pino, Kalika Bali (Editors)},
	author       = {Berdicevskis, Aleksandrs and Bouma, Gerlof and Kurtz, Robin and Morger, Felix and Öhman, Joey and Adesam, Yvonne and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Isbister, Tim and Lindahl, Anna and Malmsten, Martin and Rekathati, Faton and Sahlgren, Magnus and Volodina, Elena and Börjeson, Love and Hengchen, Simon and Tahmasebi, Nina},
	year         = {2023},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {979-8-89176-060-8},
	pages        = {8137--8153},
}

@techreport{morger-2024-when-342179,
	title        = {When Sparv met Superlim. . . A Sparv Plugin for Natural Language Understanding Analysis of Swedish},
	abstract     = {This technical report introduces Sparv-Superlim, a Sparv plugin for natural language understanding analysis of Swedish. It uses the reference models trained on the Superlim multi-task benchmark to add additional analyses to the Sparv Pipeline. I show how to install and configure the tool as well as apply it to analyze Swedish political manifestos to see if the predictions the plugin does align with known political positions of Swedish parties. These use cases shows that the reference models vary in their applicability to predict correct sentiments on novel data and illustrates the importance of integrating reference models trained on a multi-task benchmark like Superlim to evaluate the ecological validity of the benchmark.},
	author       = {Morger, Felix},
	year         = {2024},
}

@inProceedings{morger-2024-swediagnostics-341148,
	title        = {SweDiagnostics: A Diagnostics Natural Language Inference Dataset for Swedish},
	abstract     = {This paper presents SweDiagnostics, a natural language inference dataset for Swedish based on the GLUE Diagnostic dataset. It is the largest, manually corrected NLI dataset in Swedish to date and can be used to evaluate models on NLI in Swedish as well as estimate English-Swedish language transfer capabilities. We present the dataset, the methodology used for translation, compare existing implementations and discuss limitations of the dataset, in particular those related to translationese.},
	booktitle    = {17th Workshop on Building and Using Comparable Corpora, BUCC 2024 at LREC-COLING 2024 - Proceedings},
	author       = {Morger, Felix},
	year         = {2024},
	ISBN         = {9782493814319},
}

@inProceedings{morger-etal-2022-cross-325984,
	title        = {A Cross-lingual Comparison of Human and Model Relative Word Importance},
	abstract     = {Relative word importance is a key metric for natural language processing. In this work, we compare human and model relative word importance to investigate if pretrained neural language models focus on the same words as humans cross-lingually. We perform an extensive study using several importance metrics (gradient-based saliency and attention-based) in monolingual and multilingual models, including eye-tracking corpora from four languages (German, Dutch, English, and Russian). We find that gradient-based saliency, first-layer attention, and attention flow correlate strongly with human eye-tracking data across all four languages. We further analyze the role of word length and word frequency in determining relative importance and find that it strongly correlates with length and frequency, however, the mechanisms behind these non-linear relations remain elusive. We obtain a cross-lingual approximation of the similarity between human and computational language processing and insights into the usability of several importance metrics.},
	booktitle    = {Proceedings of the 2022 CLASP Conference on (Dis)embodiment, Gothenburg and online 15–16 September 2022 / Simon Dobnik, Julian Grove and Asad Sayeed (eds.)},
	author       = {Morger, Felix and Brandl, Stephanie and Beinborn, Lisa and Hollenstein, Nora},
	year         = {2022},
	publisher    = {Association for Computational Linguistics},
	address      = {Gothenburg, Sweden},
	ISBN         = {978-1-955917-67-4},
}

@inProceedings{morger-2023-there-333596,
	title        = {Are There Any Limits to English-Swedish Language Transfer? A Fine-grained Analysis Using Natural Language Inference},
	abstract     = {The developments of deep learning in natural language processing (NLP) in recent years have resulted in an unprecedented amount of computational power and data required to train state-of-the-art NLP models. This makes lower-resource languages, such as Swedish, increasingly more reliant on language transfer effects from English since they do not have enough data to train separate monolingual models. In this study, we investigate whether there is any potential loss in English-Swedish language transfer by evaluating two types of language transfer on the GLUE/SweDiagnostics datasets and comparing between different linguistic phenomena. The results show that for an approach using machine translation for training there is no considerable loss in overall performance nor by any particular linguistic phenomena, while relying on pre-training of a multilingual model results in considerable loss in performance. This raises questions about the role of machine translation and the use of natural language inference (NLI) as well as parallel corpora for measuring English-Swedish language transfer.},
	booktitle    = {Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023), May 22, 2023, Torshavn, the Faroe Islands / Editors: Nikolai Ilinykh, Felix Morger, Dana Dannélls, Simon Dobnik, Beáta Megyesi, Joakim Nivre},
	author       = {Morger, Felix},
	year         = {2023},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-195942973-9},
}

@misc{ilinykh-etal-2023-proceedings-327035,
	title        = {Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023), May 22, 2023, Tórshavn, Faroe Islands},
	abstract     = {The second workshop on resources and representations for under-resourced language and domains was
held in Tórshavn, Faroe Islands on May 22nd, 2023. The workshop was conducted in a physical setting,
allowing for potential hybrid participation.
Continuing with the aim of the first edition in 2020, RESOURCEFUL explored the role of the kind and
the quality of resources that are available to us, as well as the challenges and directions for constructing
new resources in light of the latest trends in natural language processing. The workshop has provided
a forum for discussions between the two communities involved in building data-driven and annotation-
driven resources.},
	author       = {Ilinykh, Nikolai and Morger, Felix and Dannélls, Dana and Dobnik, Simon and Megyesi, Beáta and Nivre, Joakim},
	year         = {2023},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-959429-73-9},
}

@techreport{adesam-etal-2020-swedishglue-299130,
	title        = {SwedishGLUE – Towards a Swedish Test Set for Evaluating Natural Language Understanding Models},
	author       = {Adesam, Yvonne and Berdicevskis, Aleksandrs and Morger, Felix},
	year         = {2020},
	publisher    = {University of Gothenburg},
}