@inProceedings{ahlberg-bouma-2012-best-172769, title = { A best-first anagram hashing filter for approximate string matching with generalized edit distance}, abstract = {This paper presents an efficient method for approximate string matching against a lexicon. We define a filter that for each source word selects a small set of target lexical entries, from which the best match is then selected using generalized edit distance, where edit operations can be assigned an arbitrary weight. The filter combines a specialized hash function with best-first search. Our work extends and improves upon a previously proposed hash-based filter, developed for matching with uniform-weight edit distance. We evaluate an approximate matching system implemented with the new best-first filter, by conducting several experiments on a historical corpus and a set of weighted rules taken from the literature. We present running times and discuss how performance varies using different stopping criteria and target lexica. The results show that the filter is suitable for large rule sets and million word corpora, and encourage further development. }, booktitle = {24th International Conference on Computational Linguistics COLING, 8-15 December 2012, Mumbai, India. Proceedings}, author = {Ahlberg, Malin and Bouma, Gerlof}, year = {2012}, }