Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{skelbye-dannells-2021-processing-306957,
	title        = {OCR Processing of Swedish Historical Newspapers Using Deep Hybrid CNN–LSTM Networks},
	abstract     = {Deep CNN–LSTM hybrid neural networks have proven to improve the accuracy of Optical Character Recognition (OCR) models for different languages. In this paper we examine to what extent these networks improve the OCR accuracy rates on Swedish historical newspapers. By experimenting
with the open source OCR engine Calamari, we are able to show that mixed deep CNN–LSTM hybrid models outperform previous models on the task of character recognition of Swedish historical newspapers spanning 1818–1848. We achieved an average character accuracy rate (CAR) of 97.43% which is a new state–of–the–art result on 19th century Swedish newspaper text. Our data, code and
models are released under CC BY licence.},
	booktitle    = {Proceedings of the International Conference on Recent Advances in Natural Language Processing, 1–3 September, 2021},
	editor       = {Galia Angelova and Maria Kunilovskaya and Ruslan Mitkov and Ivelina Nikolova-Koleva},
	author       = {Skelbye, Molly and Dannélls, Dana},
	year         = {2021},
	publisher    = {INCOMA },
	address      = {Shoumen, Bulgaria},
	ISBN         = {978-954-452-072-4},
}