Skip to main content

BibTeX

@inProceedings{ahlberg-bouma-2012-best-172769,
	title        = { A best-first anagram hashing filter for approximate string matching with generalized edit distance},
	abstract     = {This paper presents an efficient method for approximate string matching against a lexicon. We
define a filter that for each source word selects a small set of target lexical entries, from which
the best match is then selected using generalized edit distance, where edit operations can be
assigned an arbitrary weight. The filter combines a specialized hash function with best-first
search. Our work extends and improves upon a previously proposed hash-based filter, developed
for matching with uniform-weight edit distance. We evaluate an approximate matching system
implemented with the new best-first filter, by conducting several experiments on a historical
corpus and a set of weighted rules taken from the literature. We present running times and
discuss how performance varies using different stopping criteria and target lexica. The results
show that the filter is suitable for large rule sets and million word corpora, and encourage
further development.
},
	booktitle    = {24th International Conference on Computational Linguistics COLING,  8-15 December 2012, Mumbai, India. Proceedings},
	author       = {Ahlberg, Malin and Bouma, Gerlof},
	year         = {2012},
}