@inProceedings{ahlberg-bouma-2012-best-172769, title = { A best-first anagram hashing filter for approximate string matching with generalized edit distance}, abstract = {This paper presents an efficient method for approximate string matching against a lexicon. We define a filter that for each source word selects a small set of target lexical entries, from which the best match is then selected using generalized edit distance, where edit operations can be assigned an arbitrary weight. The filter combines a specialized hash function with best-first search. Our work extends and improves upon a previously proposed hash-based filter, developed for matching with uniform-weight edit distance. We evaluate an approximate matching system implemented with the new best-first filter, by conducting several experiments on a historical corpus and a set of weighted rules taken from the literature. We present running times and discuss how performance varies using different stopping criteria and target lexica. The results show that the filter is suitable for large rule sets and million word corpora, and encourage further development. }, booktitle = {24th International Conference on Computational Linguistics COLING, 8-15 December 2012, Mumbai, India. Proceedings}, author = {Ahlberg, Malin and Bouma, Gerlof}, year = {2012}, } @inProceedings{theiler-bouma-2012-price-172733, title = {Two for the price of one: an LFG treatment of sentence initial object es in German.}, abstract = { We present an analysis of sentence initial object es ‘it’ in German. The weak pronoun es may only realize such an object under specific information structural conditions. We follow recent work suggesting these conditions are exactly those that licence the use of the presentational construction, marked by a sentence initial dummy es. We propose that the initial objects are an example of function amalgamation, show that only objects that may also appear in the clause-internal postverbal domain can participate in this fusion and make this precise in LFG. We end the paper with a contrastive discussion. }, booktitle = {Proceedings of LFG'12. Miriam Butt and Tracy Holloway King (Eds.)}, author = {Theiler, Nadine and Bouma, Gerlof}, year = {2012}, pages = {603--623}, } @inProceedings{adesam-etal-2012-processing-166657, title = {Processing spelling variation in historical text}, booktitle = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)}, author = {Adesam, Yvonne and Ahlberg, Malin and Bouma, Gerlof}, year = {2012}, } @inProceedings{adesam-etal-2012-bokstaffua-163218, title = {bokstaffua, bokstaffwa, bokstafwa, bokstaua, bokstawa... Towards lexical link-up for a corpus of Old Swedish}, booktitle = {Proceedings of the LTHist workshop at Konvens}, author = {Adesam, Yvonne and Ahlberg, Malin and Bouma, Gerlof}, year = {2012}, } @inProceedings{bouma-2012-real-158261, title = {Real-Time Persistent Queues and Deques with Logic Variables (Declarative Pearl)}, abstract = { We present a Prolog implementation of real-time persistent queues and double-ended queues. Our implementation is inspired by Okasaki’s lazy-functional approach, but relies only on standard Prolog, comprising of the pure subset plus if-then-else constructs to efficiently implement guards and meta-calls for convenience. The resulting data structure is a nice demonstration of the fact that the use of logic variables to hold the outcome of an unfinished computation can sometimes give the same kind of elegant and compact solutions as lazy evaluation. }, booktitle = {Proceedings of the 11th International Symposium on Functional and Logic Programming (FLOPS 2012)}, author = {Bouma, Gerlof}, year = {2012}, ISBN = {978-3-642-29821-9}, pages = {62----73}, }