@inProceedings{hagstrom-etal-2023-effect-331015, title = {The Effect of Scaling, Retrieval Augmentation and Form on the Factual Consistency of Language Models}, abstract = {Large Language Models (LLMs) make natural interfaces to factual knowledge, but their usefulness is limited by their tendency to deliver inconsistent answers to semantically equivalent questions. For example, a model might supply the answer “Edinburgh” to “Anne Redpath passed away in X.” and “London” to “Anne Redpath’s life ended in X.” In this work, we identify potential causes of inconsistency and evaluate the effectiveness of two mitigation strategies: up-scaling and augmenting the LM with a passage retrieval database. Our results on the LLaMA and Atlas models show that both strategies reduce inconsistency but that retrieval augmentation is considerably more efficient. We further consider and disentangle the consistency contributions of different components of Atlas. For all LMs evaluated we find that syntactical form and task artifacts impact consistency. Taken together, our results provide a better understanding of the factors affecting the factual consistency of language models.}, booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pages 5457–5476, Singapore}, author = {Hagström, Lovisa and Saynova, Denitsa and Norlund, Tobias and Johansson, Moa and Johansson, Richard}, year = {2023}, publisher = {Association for Computational Linguistics}, }