BibTeX

@inProceedings{kokkinakis-2012-journal-155893,
	title        = {The Journal of the Swedish Medical Association - a Corpus Resource for Biomedical Text Mining in Swedish.},
	abstract     = {Biomedical text mining applications are largely dependent on high quality knowledge resources. Traditionally, these include lexical databases, terminologies, nomenclatures and ontologies and, during the last decade, also corpora of various sizes, variety and diversity. Some of these corpora are annotated with an expanding range of information types and metadata while others become available with a minimal set of annotations. At the same time, it is of great importance that biomedical corpora for lesser-spoken languages also get developed in order to support and facilitate the implementation of practical applications for such languages and to stimulate the development of language technology research and innovation infrastructures in the domain. This paper provides a detailed description of a Swedish biomedical corpus based on the electronic editions of the Journal of the Swedish Medical Association "Läkartidningen" of the years 1996-2010. The corpus consists of a variety of documents that can be related to different medical domains, developed as a response to the increasing needs for large and reliable medical information for Swedish biomedical NLP. The corpus has been structurally annotated with a minimal set of meta information and automatically indexed with the largest and systematically organised computer processable collection of medical terminology, the Swedish SNOMED CT (Systematized Nomenclature of Medicine -- Clinical Terms). This way topic-focused subcorpora, e.g. with diabetes-related content, can be easily developed.},
	booktitle    = {The Third Workshop on Building and Evaluating Resources for Biomedical Text Mining (BioTxtM), an LREC Workshop. Turkey.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2012},
	volume       = {Accepted},
}
Sidansvarig: sb-webb