Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{dannells-2006-automatic-66478,
	title        = {Automatic Acronym Recognition},
	abstract     = {This paper deals with the problem
of recognizing and extracting acronym- definition pairs in Swedish medical texts.
This project applies a rule-based method to solve the acronym recognition task and compares and evaluates the results of different machine learning algorithms on the same task. The method proposed is based on the approach that acronym-definition pairs follow a set of patterns and other regularities that can be usefully applied for the acronym identification task. Supervised machine learning was applied to monitor the performance of the rule-based method, using Memory Based Learning (MBL). The rule-based algorithm was evaluated on a hand tagged acronym corpus and performance was measured using standard measures recall, precision and f-score.
The results show that performance
could further improve by increasing the training set and modifying the input settings for the machine learning algorithms. An analysis of the errors produced indicates that further improvement of the rule-based method requires the use of syntactic information and textual pre-processing.},
	booktitle    = {Proceedings of the 11th conference on European chapter of the Association for Computational Linguistics (EACL)},
	author       = {Dannélls, Dana},
	year         = {2006},
	ISBN         = {1-932432-59-0},
}

@inProceedings{kokkinakis-dannells-2006-recognizing-33936,
	title        = {Recognizing Acronyms and their Definitions in Swedish Medical Texts},
	abstract     = {This paper addresses the task of recognizing acronym-definition pairs in Swedish (medical) texts as well as the compilation of a freely 
available sample of such manually annotated pairs. A material suitable not only for supervised learning experiments, but also as 
a testbed for the evaluation of the quality of future acronym-definition recognition systems. There are a number of approaches to 
the identification described in the literature, particularly within the biomedical domain, but none of those addresses the variation and 
complexity exhibited in a language other than English. This is realized by the fact that we can have a mixture of two languages in
 the same document and/or sentence, i.e. Swedish and English; that Swedish is a compound language that significantly deteriorates 
the performance of previous approaches (without adaptations) and, most importantly, the fact that there is a large variation of 
possible acronym-definition permutations realized in the analysed corpora, a variation that is usually ignored in previous studies. 
},
	booktitle    = {roceedings of the 5th Languages Resources and Evalutaion (LREC). },
	author       = {Kokkinakis, Dimitrios and Dannélls, Dana},
	year         = {2006},
}