Språkbanken Text är en avdelning inom Språkbanken.
BibTeX

@inProceedings{ghosh-etal-2011-shallow-151356,
	title        = {Shallow Discourse Parsing with Conditional Random Fields},
	abstract     = {Parsing discourse is a challenging natural language processing task. In this paper we take a data driven approach to identify  arguments of explicit discourse connectives. In contrast to previous work we do not make any assumptions on the span of arguments and consider parsing as a token-level sequence labeling task. We design the argument segmentation task as a cascade of decisions based on conditional random fields (CRFs). We train the CRFs on lexical, syntactic and semantic features extracted from the Penn Discourse Treebank and evaluate feature combinations on the commonly used test split. We show that the best combination of features includes syntactic and semantic features. The comparative error analysis investigates the performance variability over connective types and argument positions.},
	booktitle    = {Proceedings of 5th International Joint Conference on Natural Language Processing; editors Haifeng Wang and David Yarowsky; Chiang Mai, Thailand; November 8-13, 2011},
	author       = {Ghosh, Sucheta and Johansson, Richard and Riccardi, Giuseppe and Tonelli, Sara},
	year         = {2011},
	pages        = {1071--1079},
}

@article{forsberg-2011-green-140694,
	title        = {Green resources in plain sight: opening up the SweFN++ project},
	abstract     = {SweFN++ is a project focused on the cre-
ation and curation of Swedish lexical re-
sources geared towards language technol-
ogy applications. An important theme of
the project is openness and its realization
as a lexical infrastructure.
We give a short overview of the project,
elaborate on what we mean by openness,
and present the current state of the lexical infrastructure.
},
	journal      = {Proceedings of the Nodalida 2011 Workshop on visibility and availability of LT resources},
	author       = {Forsberg, Markus},
	year         = {2011},
}

@incollection{borin-forsberg-2011-diachronic-144291,
	title        = {A diachronic computational lexical resource for 800 years of Swedish},
	booktitle    = {Language technology for cultural heritage},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2011},
	publisher    = {Springer},
	address      = {Berlin},
	ISBN         = {978-3-642-20226-1},
	pages        = {41--61},
}

@inProceedings{smith-etal-2011-developing-152723,
	title        = {Developing a toolkit for written information materials for patients with colorectal cancer undergoing elective surgery},
	abstract     = {This study examines language complexity, readability and suitability, of written health information materials given to patients undergoing colorectal cancer (CRC) surgery. The overall aim is to investigate whether the implementation of adapted, person-centred information and communication for patients with CRC undergoing elective surgery, can enhance the patients’ self-care beliefs and well-being during recovery in the phase following diagnosis and initial treatment. Several explorative, qualitative studies are planned and will function both as a basis for the proposed interventions and provide explanations for the actual processes leading to the desired outcomes. Patients’ knowledge enablement will be reached by several interrelated intervention strategies and specific activities. One of these strategies deals with means to facilitate patients’ information seeking patterns and the goal is to provide patients with written information materials according to preferences for complex and detailed or legible texts. Thus, the interventions planed aim to enhance a movement from receiving information and instructions to participating in knowing. Written and printed patient information material from 28 Swedish clinics for patients diagnosed with CRC undergoing elective surgery were selected for analysis by means of standard metrics and more elaborate language technology techniques. Various text parameters such as lexical variation, frequency bands and the use of terminology were examined. The material was also analysed using a Suitability Assessment Instrument in order to examine content, literacy demand, graphic illustrations, layout and typography, learning stimulation and finally cultural appropriateness. In addition, five focusgroups were conducted where patients were asked to give their experiences of using written information. Results from the language technology analysis showed a variety in materials, where it could be divided in to easy, medium and difficult to read and comprehend. Patients in focusgroups told they would like written materials to be levelled in order to gain information stepwise, but also stressed the importance of information given both orally and in writing, and that they must correspond. Using the SAM-instrument was a good complement for deeper understanding, and taking all three analyses in account, we aim to design a balanced toolkit for how to best design written information materials where a person tailored approach can be offered.
},
	booktitle    = {Svenska Läkaresällskapets Riksstämman},
	author       = {Smith, Frida and Carlsson, Eva and Friberg, Febe and Kokkinakis, Dimitrios and Forsberg, Markus and Öhrn, Matilda and Öhlén, Joakim},
	year         = {2011},
}

@inProceedings{borin-etal-2011-semantic-140686,
	title        = {Semantic Search in Literature as an e-Humanities Research Tool: CONPLISIT – Consumption Patterns and Life-Style in 19th Century Swedish Literature},
	abstract     = {We present our ongoing work on language technology-based e-science in the humanities, with a focus on text-based research in the historical sciences. Currently, we are
working on the adaptation and integration of lexical resources representing different historical stages of Swedish into a lexical
and morphological toolbox that will allow us to develop semantically oriented text search applications for historical research on Swedish text. We describe a semantic search prototype which was built using REST web services from this toolbox
as components, and which has been
evaluated by historians interested in using digitized 19th century novels as primary data for an historical investigation of the emerging consumer society in 19th century
Sweden.},
	booktitle    = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)},
	author       = {Borin, Lars and Forsberg, Markus and Ahlberger, Christer},
	year         = {2011},
	volume       = {11},
	pages        = {58--65},
}

@inProceedings{ju-etal-2011-towards-151361,
	title        = {Towards Using Reranking in Hierarchical Classification},
	abstract     = {We consider the use of reranking as a way to relax typical in-
dependence assumptions often made in hierarchical multilabel classification.
Our reranker is based on (i) an algorithm that generates promising k-best
classification hypotheses from the output of local binary classifiers that clas-
sify nodes of a target tree-shaped hierarchy; and (ii) a tree kernel-based
reranker applied to the classification tree associated with the hypotheses
above. We carried out a number of experiments with this model on the
Reuters corpus: we firstly show the potential of our algorithm by computing
the oracle classification accuracy. This demonstrates that there is a signifi-
cant room for potential improvement of the hierarchical classifier. Then, we
measured the accuracy achieved by the reranker, which shows a significant
performance improvement over the baseline.
},
	booktitle    = {Proceedings of the Joint ECML/PKDD-PASCAL Workshop on Large-Scale Hierarchical Classification; September 5, 2011; Athens, Greece},
	author       = {Ju, Qi and Johansson, Richard and Moschitti, Alessandro},
	year         = {2011},
}

@article{johanssonkokkinakis-volodina-2011-corpus-148533,
	title        = {Corpus-based approaches for the creation of a frequency based vocabulary list in  the EU project KELLY – issues on reliability, validity and coverage},
	abstract     = {At present there are relatively few vocabulary lists for Swedish describing modern vocabulary as well as being adapted to language learners’ needs. In Europe including Sweden there exist approaches to unify ways of working consistently with language learning, one example worth naming in this respect is the Common European Framework of Reference (CEFR) which provides guidelines for systematic approach to language teaching and assessment of language proficiency. This article describes EU project Kelly (KEywords for Language Learning for Young and adults alike, 2009-2012), the main 
objective of which was to create  vocabulary lists  for nine languages (Swedish, English, Norwegian, Greek, Italian, Polish, Arabic, Chinese and Russian)  and adapt them to CEFR levels. We describe the  process of  compiling  and validating the Swedish Kelly-list, dwell on benefits and limitations of using a corpus based approach in this project; as 
well as mention the impact of the methodological approach for compiling vocabulary lists for specific purposes. },
	journal      = {eLex, 10-12 November 2011, Slovenia},
	author       = {Johansson Kokkinakis, Sofie and Volodina, Elena},
	year         = {2011},
	volume       = {2011},
}

@article{borin-forsberg-2011-swesaurus-151331,
	title        = {Swesaurus – ett svenskt ordnät med fria tyglar},
	journal      = {LexicoNordica},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2011},
	volume       = {18},
	pages        = {17--39},
}

@article{hammarstrom-borin-2011-unsupervised-141707,
	title        = {Unsupervised learning of morphology},
	journal      = {Computational Linguistics},
	author       = {Hammarström, Harald and Borin, Lars},
	year         = {2011},
	volume       = {37},
	number       = {2},
	pages        = {309--350},
}

@techreport{borin-etal-2011-metadata-142495,
	title        = {Metadata descriptions and other interoperability standards},
	abstract     = {An important aim of META-NORD is to upgrade and harmonize national language resources and tools in order to make them interoperable, within languages and across languages, with respect to their data formats and as far as possible also as regards their content.
Since resources and to some extent tools will remain in one location – one of a number of META-NORD centers – the preferred way of accessing and utilizing resources and tools will be through metadata and APIs, allowing the assembly of on-the-fly tool-chains made up of standardized component language technology tools, processing distributed – and in many cases interlinked – language resources in standardized formats.},
	author       = {Borin, Lars and Lindh, Jonas and Brandt, Martha and Olsson, Leif-Jöran},
	year         = {2011},
}

@inProceedings{ghosh-etal-2011-discourse-151350,
	title        = {End-to-End Discourse Parser Evaluation},
	abstract     = {We are interested in the problem of discourse parsing of textual documents. We present a novel end-to-end discourse parser that, given a plain text document in input, identifies the discourse relations in the text, assigns them a semantic label and detects discourse arguments spans. The parsing architecture is based on a cascade of decisions supported by Conditional Random Fields (CRF). We train and evaluate three different parsers using the PDTB corpus. The three system versions are compared to evaluate their robustness with respect to deep/shallow and automatically extracted syntactic features.},
	booktitle    = {Fifth IEEE International Conference on Semantic Computing (ICSC), 2011; September 18-21, 2011; Palo Alto, United States},
	author       = {Ghosh, Sucheta and Tonelli, Sara and Riccardi, Giuseppe and Johansson, Richard},
	year         = {2011},
	ISBN         = {978-1-4577-1648-5},
}

@inProceedings{wilhelmsson-2011-automatic-259874,
	title        = {Automatic Question Generation from Swedish Documents as a Tool for Information Extraction},
	abstract     = {An implementation of automatic question generation (QG) from raw Swedish text is presented. QG is here chosen as an alternative to natural query systems where any query can be posed and no indication is given of whether the current text database includes the information sought for. The program builds on parsing with grammatical functions from which corresponding questions are generated and it incorporates the article database of Swedish Wikipedia. The pilot system is meant to work with a text shown in the GUI and auto-completes user input to help find available questions. The act of question generation is here described together with early test results regarding the current produced questions.},
	booktitle    = {Proceedings of the 18th Nordic Conference of Computational Linguistics NODALIDA 2011, NEALT Proceedings Series Vol. 11},
	author       = {Wilhelmsson, Kenneth},
	year         = {2011},
	publisher    = { Northern European Association for Language Technology (NEALT) },
	address      = {Tartu},
}

@article{lindh-2011-francis-142483,
	title        = {Francis Nolan},
	abstract     = {The Encyclopedia of Applied Linguistics is a ground-breaking resource, spanning the entire field. Truly international in scope, it brings together contributions from the world’s most respected scholars in applied linguistics.
Available online or as a 10-volume print set, this comprehensive print and electronic resource provides an overview of all the key areas in applied linguistics, from language learning and language policy, to qualitative methods in applied linguistics, and technology and language. Comprising over 3.5 million words, across 1,200 entries, it spans key developments and ideas in applied linguistics, historic and emerging areas of research, and includes 250 biographies of prominent figures who have helped shaped this diverse, and ever-growing field.},
	journal      = {The Encyclopedia of Applied Linguistics},
	author       = {Lindh, Jonas},
	year         = {2011},
	pages        = {2},
}

@inProceedings{vancoppenolle-etal-2011-german-154315,
	title        = {A German Grammar for Generation in OpenCCG},
	abstract     = {We present a freely available CCG fragment for German that is being developed for natural language generation tasks in the domain of share price statistics. It is implemented in OpenCCG, an open source Java implementation of the compuationally attractive CCG formalism. Since generation requires lexical categories to have semantic representations, so that posssible realizations can be produced, the underlying grammar needs to define semantics. Hybrid Logic Dependency Semantics, a logic calculus especially suited for encodings linguistic meaning, is used to declare the semantics layer. To our knowledge, related work on German CCG development has not yet focused on the semantics layer. In terms of syntax, we concentrate on aspects of German as a partially free constituent order language. Special attention is payed to scrambling, where we employ CCG's type-changing mechanism in a manner athat is somewhat unusual, but allows us to a) minimize the amount of syntactic categories that are needed to model scrambling, compared to providing categories for all possible argument orders, and b) retain enough control to impose restrictions on scrambling.},
	booktitle    = {H. Hedeland, T. Schmidt, K. Wörner (eds.): Multilingual Resources and Multilingual Applications. Proc. of the Conference of the German Society for Computational Linguistics and Language Technology (GSCL), Hamburg, 2011. Working Papers in Multilingualism, Series B },
	author       = {Vancoppenolle, Jean and Tabbert, Eric and Bouma, Gerlof and Stede, Manfred},
	year         = {2011},
	number       = {96},
	pages        = {145--150},
}

@inProceedings{kokkinakis-malm-2011-character-143875,
	title        = {Character Profiling in 19th Century Fiction},
	abstract     = {This paper describes the way in which personal relationships between main characters in 19th century Swedish prose fiction can be identified using information guided by named entities, provided by a entity recognition system adapted to the 19th century Swedish language characteristics. Interpersonal relation extraction is based on the context between two relevant, identified person entities. The extraction process of the relationships also utilize the content of on-line available lexical semantic resources (suitable vocabularies) and fairly standard context matching methods that provide a basic mechanism for identifying a wealth of interpersonal relations that hopefully can aid the reader of a 19th-century Swedish literary work to better understand its content and plot, and get a bird’s eye view on the landscape of the core story.},
	booktitle    = {Workshop: Language Technologies for Digital Humanities and Cultural Heritage in conjunction with the Recent Advances in Natural Language Processing (RANLP). Hissar, Bulgaria.},
	author       = {Kokkinakis, Dimitrios and Malm, Mats},
	year         = {2011},
}

@inProceedings{kokkinakis-2011-reducing-143877,
	title        = {Reducing Complexity in Parsing Scientific Medical Data, a Diabetes Case Study},
	abstract     = {The aim of this study is to assemble and deploy various NLP components and resources in order to parse scientific medical data and evaluate the degree in which these resources contribute to the overall parsing performance. With parsing we limit our efforts to the identi-fication of unrestricted noun phrases with full phrase structure and investigate the effects of using layers of semantic annotations prior to parsing. Scientific medical texts exhibit com-plex linguistic structure but also regularities that can be captured by pre-processing the texts with specialized semantically-aware tools. Our results show evidence of improved performance while the complexity of parsing is reduced. Parsed scientific texts and inferred syntactic information can be leveraged to improve the accuracy of higher-level tasks such as information extraction and enhance the acquisition of semantic relations and events.},
	booktitle    = {Workshop: Biomedical Natural Language Processing in conjunction with Recent Advances in Natural Language Processing (RANLP). Hissar, Bulgaria.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
}

@edited_book{malmgren-etal-2011-lexins-174145,
	title        = {Lexins svenska lexikon (4 uppl.)},
	editor       = {Malmgren, Sven-Göran and Berg, Daniel and Berg, Sture and Hult, Ann-Kristin and Holmer, Louise and Sjögreen, Christian and Sköldberg, Emma and Toporowska Gronostaj, Maria},
	year         = {2011},
	publisher    = {Internetpublikation},
	address      = {Stockholm},
}

@article{kokkinakis-2011-natural-149930,
	title        = {Natural language processing of clinical data with a focus on diffuse symptoms},
	abstract     = {The medical domain is well supported with a wealth of large, rich and
varied controlled vocabularies and terminological resources. This paper
investigates the extent by which the largest available medical nomenclature
for Swedish, the Systematized Nomenclature of Medicine Clinical Terms
(SNOMED CT), can handle a particularly challenging and difficult to
automatically acquire type of terminology, namely (clinical) phenotypes.
The aim of the study is to better understand phenotype contextualization in
order to improve and enhance our knowledge of communicative events in
various healthcare settings. Our approach can be seen as an exploratory one
in which we believe to yield useful insights into the nature of how findings,
symptoms and signs (i.e. clinical phenotypes in general) are expressed in
real data. This study is initiated in the context of the project "Interpretation
and understanding of functional symptoms in primary health care". The
main research goal of which is to study health care interactions with
patients suffering from Functional Somatic Syndromes (FSS). FSS are
characterized by particular constellations of medically unexplained, often
chronic symptoms, such as dizziness, fatigue, dyspepsia, muscle and joint
pain.
We use methods from the natural language processing field in order to
investigate how symptom mentions are expressed and how available
successful automated means are for capturing symptom descriptions both
on collected written (patient records) and transcribed material
(patient/nurse and patient/doctor encounters).
We manually evaluated the content of the resource on the collected data
and our results indicate that a large number of such phenotypes are
expressed using figurative language, or contextualized using a number of
variant expressions. SNOMED CT cannot easily accommodate for such
variation and vagueness expressed in real text data, unless we devise means
to handle such variation, e.g. by the use of near synonym dictionaries,
development and linking of consumer health vocabularies. The presented
research has several implications since accurate identification of
phenotypes can for instance increase the value of available data in decision
making and thus allow automatic systems to dynamically correct
inappropriate domain decisions.
We have evaluated the content of a large controlled vocabulary for Swedish
on symptom descriptions in clinical texts.},
	journal      = {Läkaresällskapets Riksstämman },
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
}

@article{kokkinakis-2011-medicinska-149931,
	title        = {Medicinska terminologier - officiella standarder och verklighet},
	abstract     = {Officiella medicinska termlistor hinner aldrig bli helt kompletta 
eller uppdaterade i tid med de senaste upptäckterna inom det (bio)medicinska 
fältet
Växande behov av koppling mellan fack- och allmänspråk för praktiska 
(medicinskt orienterade) tillämpningar, t.ex. "din journal på nätet"-projektet	
Applikationer med indata som innehåller både fackspråk och allmänspråk - brist 
på täckande medicinska (elektroniska) ordböcker/termlistor med 
integrerad utförlig språklig och medicinsk information för lekmän finns inte
transkriberade patient-läkarsamtal

Använda existerande medicinska terminologier i språkteknologisk forskning som 
stöd för informationsutvinning - skapa strukturerade representationer av 
texter (samförekomstanalys; faktaextraktion och syntes; relation- och 
händelseextraktion;  t.ex. mellan sjukdom - behandling - utfall få att 
kunna få ett bra underlag för att kunna förutsäga hur framtida behandlingar slår)
Använda terminologin som ett medium för att underlätta kommunikationen 
mellan hälsotagare och hälsogivare t.ex. underlätta förståelse av 
medicinska termer av allmänheten 
},
	journal      = {Terminologiworkshop i Karlstad},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
}

@article{lindh-2011-peter-142484,
	title        = {Peter French},
	abstract     = {The Encyclopedia of Applied Linguistics is a ground-breaking resource, spanning the entire field. Truly international in scope, it brings together contributions from the world’s most respected scholars in applied linguistics.
Available online or as a 10-volume print set, this comprehensive print and electronic resource provides an overview of all the key areas in applied linguistics, from language learning and language policy, to qualitative methods in applied linguistics, and technology and language. Comprising over 3.5 million words, across 1,200 entries, it spans key developments and ideas in applied linguistics, historic and emerging areas of research, and includes 250 biographies of prominent figures who have helped shaped this diverse, and ever-growing field.},
	journal      = {The Encyclopedia of Applied Linguistics},
	author       = {Lindh, Jonas},
	year         = {2011},
	pages        = {2},
}

@inProceedings{vasljevs-etal-2011-meta-140690,
	title        = {META-NORD: Baltic and Nordic Branch of the European Open Linguistic Infrastructure},
	booktitle    = {Proceedings of the Nodalida 2011 Workshop on visibilty and availability of LT resources},
	author       = {Vasljevs, Andrejs and Pedersen, Bolette Sandford and De Smedt, Koenraad and Borin, Lars and Skadina, Inguna},
	year         = {2011},
}

@inProceedings{kokkinakis-2011-health-141311,
	title        = {Health Portals and Clinical Phenotypes - Recognition using SNOMED CT},
	abstract     = {The medical domain is particularly well endowed with various sources of terminology. Usually, such sources vary with respect to size, structure, depth and breadth of descriptive power, granularity and applicability. This paper investigates the extent by which the largest available medical nomenclature for Swedish can cope with a particularly challenging and difficult to automatically acquire type of terminology, namely (clinical) phenotypes. We evaluated the content of the resource on extracted reference symptom lists from several popular health portals. The results indicate that a large number of such phenotypes are expressed using figurative language, or contextualized using a number of variant expressions. SNOMED CT cannot easily accommodate for such variation and vagueness expressed in real text data, unless we devise means to handle such variation, e.g. by the use of near synonym dictionaries, development and linking of consumer health vocabularies. The presented research has several implications since accurate identification of phenotypes can for instance increase the value of available data in decision making and thus allow automatic systems to dynamically correct inappropriate domain decisions.},
	booktitle    = {9th Scandinavian Conference on Health Informatics},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
}

@inProceedings{lindh-morrison-2011-humans-146100,
	title        = {Humans versus machine: forensic voice comparison on a small database of Swedish voice recordings},
	abstract     = {A procedure for comparing the performance of humans and machines on speaker recognition and on forensic voice comparison is proposed and demonstrated. The procedure is consistent with the new paradigm for forensic-comparison science (use of the likelihood-ratio framework and testing of the validity and reliability of the results). The use of the procedure is demonstrated using a small database of Swedish voice recordings.},
	booktitle    = {Proceedings of ICPhS2011},
	author       = {Lindh, Jonas and Morrison, Geoffrey Stewart},
	year         = {2011},
	volume       = {17},
	pages        = {4},
}

@inProceedings{skadina-etal-2011-meta-148648,
	title        = {META-NORD: Towards sharing of language resources in Nordic and Baltic countries},
	abstract     = {This paper introduces the META-NORD project
which develops Nordic and Baltic part of
the European open language resource infrastructure.
META-NORD works on assembling,
linking across languages, and making
widely available the basic language resources
used by developers, professionals and researchers
to build specific products and applications.
The goals of the project, overall
approach and specific action lines on wordnets,
terminology resources and treebanks are
described. Moreover, results achieved in first
five months of the project, i.e. language
whitepapers, metadata specification and IPR
management, are presented.},
	booktitle    = {Proceedings of the Workshop on Language Resources, Technology and Services in the Sharing Paradigm},
	author       = {Skadina, Inguna and Vasiljevs, Andrejs and Borin, Lars and De Smedt, Koenraad and Lindén, Krister and Rögnvaldsson, Eiríkur},
	year         = {2011},
	pages        = {107--114},
}

@inProceedings{rama-borin-2011-estimating-140688,
	title        = {Estimating Language Relationships from a Parallel Corpus. A Study of the Europarl Corpus},
	abstract     = {Since the 1950s, linguists have been using short lists (40–200 items) of basic vocabulary as the central component in a methodology which is claimed to make it possible to automatically calculate genetic
relationships among languages. In
the last few years these methods have experienced something of a revival, in that more languages are involved, different distance
measures are systematically compared and evaluated, and methods from computational biology are used for calculating language family trees. In this paper, we explore how this methodology
can be extended in another direction, by using larger word lists automatically extracted from a parallel corpus using word alignment software. We present preliminary
results from using the Europarl parallel corpus in this way for estimating the distances between some languages in the Indo-European language family.},
	booktitle    = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2011},
	volume       = {11},
	pages        = {161--167},
}

@inProceedings{saxena-borin-2011-dialect-140689,
	title        = {Dialect Classification in the Himalayas: a Computational Approach},
	abstract     = {Linguistic fieldwork data – in the form of basic vocabulary lists – for nine closely related language varieties are compared using an automatic procedure with manual feedback, whose major advantage is its
complete consistency. The results of the vocabulary comparison turn out to be in accord with other linguistic features, making this methodology a promising addition to the toolbox of genetic lingusitics.},
	booktitle    = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)},
	author       = {Saxena, Anju and Borin, Lars},
	year         = {2011},
	volume       = {11},
	pages        = {307--310},
}

@inProceedings{kokkinakis-2011-evaluating-139977,
	title        = {Evaluating the Coverage of three Controlled Health Vocabularies with Focus on Findings, Signs & Symptoms},
	abstract     = {The medical domain is blessed with a magnitude of terminological resources of various
characteristics, sizes, structure, depth and breadth of descriptive power, granularity etc.
In this domain a particularly interesting and difficult entity type are signs, symptoms and
findings which to a large extend are expressed in a periphrastic manner, sometimes by the use
of figurative or metaphorical language, or contextualized
using a wealth of vague variant expressions. We hypothesize therefore that no
major official terminology source alone can accommodate for the variation and complexity
present in real text data, such as electronic medical records, notes or health related documents.
In this paper we evaluate the content of the three largest medical control vocabularies available for Swedish on extracted reference symptom lists and initiate a discussion on how
we should proceed in order to accommodate for increased coverage on similar genres.
},
	booktitle    = {Workshop on Creation, Harmonization and Application of Terminology Resources Co-located with NODALIDA 2011},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
	pages        = {5},
}

@inProceedings{kokkinakis-2011-what-141312,
	title        = {What is the Coverage of SNOMED CT® on Scientific Medical Corpora?},
	abstract     = {This paper reports on the results of a large scale mapping of SNOMED CT on scientific medical corpora. The aim is to automatically access the validity, reliability and coverage of the Swedish SNOMED-CT translation, the largest, most extensive available resource of medical terminology. The method described here is based on the generation of predominantly safe harbor term variants which together with simple linguistic processing and the already available SNOMED term content are mapped to large corpora. The results show that term variations are very frequent and this may have implication on technological applications (such as indexing and information retrieval, decision support systems, text mining) using SNOMED CT. Naïve approaches to terminology mapping and indexing would critically affect the performance, success and results of such applications. SNOMED CT appears not well-suited for automatically capturing the enormous variety of concepts in scientific corpora (only 6,3% of all SNOMED terms could be directly matched to the corpus) unless extensive variant forms are generated and fuzzy and partial matching techniques are applied with the risk of allowing the recognition of a large number of false positives and spurious results.},
	booktitle    = {Studies in Health Technology and Informatics / XXIII  International Conference of the European Federation for Medical Informatics},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
	volume       = {169},
}
Sidansvarig: sb-webb