@article{kokkinakis-2011-natural-149930, title = {Natural language processing of clinical data with a focus on diffuse symptoms}, abstract = {The medical domain is well supported with a wealth of large, rich and varied controlled vocabularies and terminological resources. This paper investigates the extent by which the largest available medical nomenclature for Swedish, the Systematized Nomenclature of Medicine Clinical Terms (SNOMED CT), can handle a particularly challenging and difficult to automatically acquire type of terminology, namely (clinical) phenotypes. The aim of the study is to better understand phenotype contextualization in order to improve and enhance our knowledge of communicative events in various healthcare settings. Our approach can be seen as an exploratory one in which we believe to yield useful insights into the nature of how findings, symptoms and signs (i.e. clinical phenotypes in general) are expressed in real data. This study is initiated in the context of the project "Interpretation and understanding of functional symptoms in primary health care". The main research goal of which is to study health care interactions with patients suffering from Functional Somatic Syndromes (FSS). FSS are characterized by particular constellations of medically unexplained, often chronic symptoms, such as dizziness, fatigue, dyspepsia, muscle and joint pain. We use methods from the natural language processing field in order to investigate how symptom mentions are expressed and how available successful automated means are for capturing symptom descriptions both on collected written (patient records) and transcribed material (patient/nurse and patient/doctor encounters). We manually evaluated the content of the resource on the collected data and our results indicate that a large number of such phenotypes are expressed using figurative language, or contextualized using a number of variant expressions. SNOMED CT cannot easily accommodate for such variation and vagueness expressed in real text data, unless we devise means to handle such variation, e.g. by the use of near synonym dictionaries, development and linking of consumer health vocabularies. The presented research has several implications since accurate identification of phenotypes can for instance increase the value of available data in decision making and thus allow automatic systems to dynamically correct inappropriate domain decisions. We have evaluated the content of a large controlled vocabulary for Swedish on symptom descriptions in clinical texts.}, journal = {Läkaresällskapets Riksstämman }, author = {Kokkinakis, Dimitrios}, year = {2011}, }