Skip to main content

BibTeX

@inProceedings{doostmohammadi-etal-2023-surface-327186,
	title        = {Surface-Based Retrieval Reduces Perplexity of Retrieval-Augmented Language Models},
	abstract     = {Augmenting language models with a retrieval mechanism has been shown to significantly improve their performance while keeping the number of parameters low. Retrieval-augmented models commonly rely on a semantic retrieval mechanism based on the similarity between dense representations of the query chunk and potential neighbors. In this paper, we study the state-of-the-art Retro model and observe that its performance gain is better explained by surface-level similarities, such as token overlap. Inspired by this, we replace the semantic retrieval in Retro with a surface-level method based on BM25, obtaining a significant reduction in perplexity. As full BM25 retrieval can be computationally costly for large datasets, we also apply it in a re-ranking scenario, gaining part of the perplexity reduction with minimal computational overhead.},
	booktitle    = {    Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 521–529, Toronto, Canada},
	author       = {Doostmohammadi, Ehsan and Norlund, Tobias and Kuhlmann, Marco and Johansson, Richard},
	year         = {2023},
	publisher    = {Association for Computational Linguistics},
}