Skip to main content
Språkbanken Text is a part of Språkbanken.

BibTeX

@inProceedings{farahani-johansson-2024-deciphering-343041,
	title        = {Deciphering the Interplay of Parametric and Non-parametric Memory in Retrieval-augmented Language Models},
	abstract     = {Generative language models often struggle with specialized or less-discussed knowledge. A potential solution is found in Retrieval-Augmented Generation (RAG) models which act like retrieving information before generating responses. In this study, we explore how the Atlas approach, a RAG model, decides between what it already knows (parametric) and what it retrieves (non-parametric). We use causal mediation analysis and controlled experiments to examine how internal representations influence information processing. Our findings disentangle the effects of parametric knowledge and the retrieved context. They indicate that in cases where the model can choose between both types of information (parametric and non-parametric), it relies more on the context than the parametric knowledge. Furthermore, the analysis investigates the computations involved in how the model uses the information from the context. We find that multiple mechanisms are active within the model and can be detected with mediation analysis: first, the decision of whether the context is relevant, and second, how the encoder computes output representations to support copying when relevant.},
	booktitle    = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 16966–16977, Miami, USA},
	author       = {Farahani, Mehrdad and Johansson, Richard},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{dannells-etal-2024-transformer-338708,
	title        = {Transformer-based Swedish Semantic Role Labeling through Transfer Learning},
	abstract     = {Semantic Role Labeling (SRL) is a task in natural language understanding where the goal is to extract semantic roles for a given sentence. English SRL has achieved state-of-the-art performance using Transformer techniques and supervised learning. However, this technique is not a viable choice for smaller languages like Swedish due to the limited amount of training data. In this paper, we present the first effort in building a Transformer-based SRL system for Swedish by exploring multilingual and cross-lingual transfer learning methods and leveraging the Swedish FrameNet resource. We demonstrate that multilingual transfer learning outperforms two different cross-lingual transfer models. We also found some differences between frames in FrameNet that can either hinder or enhance the model’s performance. The resulting end-to-end model is freely available and will be made accessible through Språkbanken Text’s research infrastructure.},
	booktitle    = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), 20-25 May, 2024, Torino, Italia},
	author       = {Dannélls, Dana and Johansson, Richard and Buhr, Lucy Yang},
	year         = {2024},
	publisher    = {ELRA and ICCL},
	address      = {Turin, Italy},
	ISBN         = {978-2-493814-10-4},
}

@inProceedings{johansson-2024-what-337926,
	title        = {What Happens to a Dataset Transformed by a Projection-based Concept Removal Method?},
	abstract     = {We investigate the behavior of methods using linear projections to remove information about a concept from a language representation, and we consider the question of what happens to a dataset transformed by such a method. A theoretical analysis and experiments on real-world and synthetic data show that these methods inject strong statistical dependencies into the transformed datasets. After applying such a method, the representation space is highly structured: in the transformed space, an instance tends to be located near instances of the opposite label. As a consequence, the original labeling can in some cases be reconstructed by applying an anti-clustering method.},
	booktitle    = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 17486–17492, Torino, Italy.},
	author       = {Johansson, Richard},
	year         = {2024},
	publisher    = {ELRA and ICCL},
}