@techreport{Åhlfelt-Hans2006-34047, title = {Literature Review on Patient_Friendly Documentation Systems}, author = {Åhlfelt, Hans and Borin, Lars and Daumke, Philipp and Grabar, Natalia and Hallett, Catalina and Hardcastle, david and Kokkinakis, Dimitrios and Mancini, Clara and Marko, Kornel and Merkel, Magnus and Pietsch, Christian and Power, Richard and Scott, Donia and Silvervarg, Annika and Toporowska Gronostaj, Maria and Williams, Sandra and Willis, Alistair}, year = {2006}, publisher = {Göteborg University}, adress = {Göteborg}, } @inProceedings{Kokkinakis-Dimitrios2006-33936, title = {Recognizing Acronyms and their Definitions in Swedish Medical Texts}, abstract = {This paper addresses the task of recognizing acronym-definition pairs in Swedish (medical) texts as well as the compilation of a freely available sample of such manually annotated pairs. A material suitable not only for supervised learning experiments, but also as a testbed for the evaluation of the quality of future acronym-definition recognition systems. There are a number of approaches to the identification described in the literature, particularly within the biomedical domain, but none of those addresses the variation and complexity exhibited in a language other than English. This is realized by the fact that we can have a mixture of two languages in the same document and/or sentence, i.e. Swedish and English; that Swedish is a compound language that significantly deteriorates the performance of previous approaches (without adaptations) and, most importantly, the fact that there is a large variation of possible acronym-definition permutations realized in the analysed corpora, a variation that is usually ignored in previous studies. }, booktitle = {roceedings of the 5th Languages Resources and Evalutaion (LREC). }, author = {Kokkinakis, Dimitrios and Dannélls, Dana}, year = {2006}, } @inProceedings{Kokkinakis-Dimitrios2006-33937, title = {Collection, Encoding and Linguistic Processing of a Swedish Medical Corpus - The MEDLEX Experience.}, abstract = {Corpora annotated with structural and linguistic characteristics play a major role in nearly every area of language processing. During recent years a number of corpora and large data sets became known and available to research even in specialized fields such as medicine, but still however, targeted predominantly for the English language. This paper provides a description of the collection, encoding and linguistic processing of an ever growing Swedish medical corpus, the MEDLEX Corpus. MEDLEX consists of a variety of text-documents related to various medical text genres. The MEDLEX Corpus has been structurally annotated using the Corpus Encoding Standard for XML (XCES), lemmatized and automatically annotated with part-of-speech and semantic information (extended named entities and the Medical Subject Headings, MeSH, terminology). The results from the processing stages (part-of-speech, entities and terminology) have been merged into a single representation format and syntactically analysed using a cascaded finite state parser. Finally, the parser’s results are converted into a tree structure that follows the TIGER-XML coding scheme, resulting a suitable for further exploration and fairly large Treebank of Swedish medical texts. }, booktitle = {Proceedings of the 5th Languages Resources and Evalutaion (LREC)}, author = {Kokkinakis, Dimitrios}, year = {2006}, } @inProceedings{Kokkinakis-Dimitrios2006-33938, title = {Lay Language versus Professional Language within the Cardiovascular Subdomain - a Contrastive Study}, abstract = {This paper reports on a corpus-based, contrastive study of Swedish medical language. It is focused on the vocabulary used in two types of medical textual material: professional portals and web-based consumer sites within the domain of cardiovascular disorders. Linguistic, statistical and quantitatively based readability studies are considered in order to find the typical language-dependent and, possibly, language independent characteristics of the material examined and suggest concrete measures that might bridge the gap in medical vocabulary as used by laypersons/consumers and professionals. }, booktitle = {Proceedings of the 2006 WSEAS Int. Conf. on Cellular & Molecular Biology, Biophysics & Bioengineering}, author = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria}, year = {2006}, } @inProceedings{Kokkinakis-Dimitrios2006-33925, title = {Developing Resources for Swedish Bio-Medical Text Mining}, abstract = {Collection and annotation of corpora in specialized fields, such as medicine, and particularly for lesser-spoken languages, than for instance English, is an important enterprise for the continuous development and growth of language technology research, for resource development and for the implementation of practical applications for these languages. In this paper, we describe our ongoing efforts to build a large Swedish medical corpus, the MEDLEX Corpus, how we combine ge-neric named entity and terminology recognition for the detailed annotation of the corpus, and how these annotations are further utilized by an annotations-aware cascaded finite-state parser. }, booktitle = {Proceedings of the 2nd International Symposium on Semantic Mining in Biomedicine (SMBM)}, author = {Kokkinakis, Dimitrios}, year = {2006}, } @article{Kokkinakis-Dimitrios2006-34032, title = {Comparing Lay and Professional Language in Cardiovascular Disorders Corpora.}, abstract = {This paper reports on a corpus-based, contrastive study of Swedish medical language. It is focused on the vocabulary used in two types of medical textual material: professional portals and web-based consumer sites within the domain of cardiovascular disorders. Linguistic, statistical and quantitatively based readability studies are considered in order to find the typical language-dependent and, possibly, language independent characteristics of the material examined and suggest concrete measures that might bridge the gap in medical vocabulary as used by laypersons/consumers and professionals. }, author = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria}, year = {2006}, volume = {3}, number = {6}, pages = {429--437}, } @incollection{Kokkinakis-Dimitrios2006-56225, title = {Att bygga en språkbro mellan allmänhet och vårdpersonal - språket i texter om hjärt-kärlsjukdomar}, booktitle = {Humanistdag-boken}, author = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria and Johansson Kokkinakis, Sofie}, year = {2006}, publisher = {Göteborgs universitet}, adress = {Göteborg}, } @article{Kokkinakis-Dimitrios2006-45197, title = {Towards a Swedish Medical Treebank}, abstract = {In this paper, we present our current activities towards the compilation and the multi-layered annotation of a domain-dependent corpus for Swedish in the area of medicine. The focus of the paper is based on the description of the constituent structure and functionally oriented annotation of the corpus. Moreover, the annotation scheme adopted, which incorporates three main layers of linguistic processing, lexical analysis, shallow semantic analysis and syntactic processing, will be exemplified. For the syntactic analysis we use a cascaded finite-state parser, aware of the shallow semantic annotations produced. The result of this analysis, including syntactic parsing and shallow semantic analysis, is transformed into the TIGER-XML interchange format. Our goal is to produce a large, rich in annotations, medical treebank suitable for both corpus-based grammar learning systems, for semantic relation extraction and for linguistic exploration of theoretical nature.}, author = {Kokkinakis, Dimitrios}, year = {2006}, } @inProceedings{Kokkinakis-Dimitrios2006-34033, title = {Towards a Swedish Medical Treebank}, booktitle = {5th Conference on Treebanks and Linguistic Theories}, author = {Kokkinakis, Dimitrios}, year = {2006}, } @inProceedings{Marko-Kornel2006-34049, title = {Cross-Lingual Alignment of Medical Lexicons}, abstract = {We present an approach for the creation of a multilingual medical dictionary for the biomedical domain. In a first step, available monolingual lexical resources are compiled into a common interchange format. Secondly, according to a linking format deciced by the authors, the cross-lingual mappings of lexical entries are added. We show how these mappings can be generated using a morpho-semantic term normalization engine, which captures intra- as well as interlingual synonymy relationships on the level of subwords.}, booktitle = {Language Resources and Evaluation }, author = {Marko, Kornel and Baud, Robert and Zweigenbaum, Pierre and Merkel, Magnus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios and Schulz, Stefan}, year = {2006}, volume = {2006}, pages = {5--8}, }