Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{dannells-etal-2021-engine-305700,
	title        = {A Two-OCR Engine Method for Digitized Swedish Newspapers },
	abstract     = {In  this  paper  we  present  a  two-OCR  engine  method  that  was  developed  at  Kungliga  biblioteket (KB), the National Library of Sweden, for improving the correctness of the OCR for mass digitization of Swedish newspapers. To evaluate the method a reference material spanning the years 1818–2018 was prepared and manually transcribed. A quantitative evaluation was then performed against the material. In this first evaluation we experimented with word lists for different time periods. The results show that even though there was no significant overall improvement of the OCR results, some combinations of word lists are successful for certain periods and should therefore be explored further.},
	booktitle    = {Selected Papers from the CLARIN Annual Conference 2020, Linköping Electronic Conference Proceedings 180},
	author       = {Dannélls, Dana and Björk, Lars and Dirdal, Ove and Johansson, Torsten},
	year         = {2021},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7929-609-4},
}