@inProceedings{Kokkinakis-Dimitrios2011-139977, title = {Evaluating the Coverage of three Controlled Health Vocabularies with Focus on Findings, Signs & Symptoms}, abstract = {The medical domain is blessed with a magnitude of terminological resources of various characteristics, sizes, structure, depth and breadth of descriptive power, granularity etc. In this domain a particularly interesting and difficult entity type are signs, symptoms and findings which to a large extend are expressed in a periphrastic manner, sometimes by the use of figurative or metaphorical language, or contextualized using a wealth of vague variant expressions. We hypothesize therefore that no major official terminology source alone can accommodate for the variation and complexity present in real text data, such as electronic medical records, notes or health related documents. In this paper we evaluate the content of the three largest medical control vocabularies available for Swedish on extracted reference symptom lists and initiate a discussion on how we should proceed in order to accommodate for increased coverage on similar genres. }, booktitle = {Workshop on Creation, Harmonization and Application of Terminology Resources Co-located with NODALIDA 2011}, author = {Kokkinakis, Dimitrios}, year = {2011}, pages = {5}, } @inProceedings{Kokkinakis-Dimitrios2011-141311, title = {Health Portals and Clinical Phenotypes - Recognition using SNOMED CT}, abstract = {The medical domain is particularly well endowed with various sources of terminology. Usually, such sources vary with respect to size, structure, depth and breadth of descriptive power, granularity and applicability. This paper investigates the extent by which the largest available medical nomenclature for Swedish can cope with a particularly challenging and difficult to automatically acquire type of terminology, namely (clinical) phenotypes. We evaluated the content of the resource on extracted reference symptom lists from several popular health portals. The results indicate that a large number of such phenotypes are expressed using figurative language, or contextualized using a number of variant expressions. SNOMED CT cannot easily accommodate for such variation and vagueness expressed in real text data, unless we devise means to handle such variation, e.g. by the use of near synonym dictionaries, development and linking of consumer health vocabularies. The presented research has several implications since accurate identification of phenotypes can for instance increase the value of available data in decision making and thus allow automatic systems to dynamically correct inappropriate domain decisions.}, booktitle = {9th Scandinavian Conference on Health Informatics}, author = {Kokkinakis, Dimitrios}, year = {2011}, } @inProceedings{Kokkinakis-Dimitrios2011-141312, title = {What is the Coverage of SNOMED CT® on Scientific Medical Corpora?}, abstract = {This paper reports on the results of a large scale mapping of SNOMED CT on scientific medical corpora. The aim is to automatically access the validity, reliability and coverage of the Swedish SNOMED-CT translation, the largest, most extensive available resource of medical terminology. The method described here is based on the generation of predominantly safe harbor term variants which together with simple linguistic processing and the already available SNOMED term content are mapped to large corpora. The results show that term variations are very frequent and this may have implication on technological applications (such as indexing and information retrieval, decision support systems, text mining) using SNOMED CT. Naïve approaches to terminology mapping and indexing would critically affect the performance, success and results of such applications. SNOMED CT appears not well-suited for automatically capturing the enormous variety of concepts in scientific corpora (only 6,3% of all SNOMED terms could be directly matched to the corpus) unless extensive variant forms are generated and fuzzy and partial matching techniques are applied with the risk of allowing the recognition of a large number of false positives and spurious results.}, booktitle = {Studies in Health Technology and Informatics / XXIII International Conference of the European Federation for Medical Informatics}, author = {Kokkinakis, Dimitrios}, year = {2011}, volume = {169}, } @inProceedings{Kokkinakis-Dimitrios2011-143877, title = {Reducing Complexity in Parsing Scientific Medical Data, a Diabetes Case Study}, abstract = {The aim of this study is to assemble and deploy various NLP components and resources in order to parse scientific medical data and evaluate the degree in which these resources contribute to the overall parsing performance. With parsing we limit our efforts to the identi-fication of unrestricted noun phrases with full phrase structure and investigate the effects of using layers of semantic annotations prior to parsing. Scientific medical texts exhibit com-plex linguistic structure but also regularities that can be captured by pre-processing the texts with specialized semantically-aware tools. Our results show evidence of improved performance while the complexity of parsing is reduced. Parsed scientific texts and inferred syntactic information can be leveraged to improve the accuracy of higher-level tasks such as information extraction and enhance the acquisition of semantic relations and events.}, booktitle = {Workshop: Biomedical Natural Language Processing in conjunction with Recent Advances in Natural Language Processing (RANLP). Hissar, Bulgaria.}, author = {Kokkinakis, Dimitrios}, year = {2011}, } @inProceedings{Kokkinakis-Dimitrios2011-143875, title = {Character Profiling in 19th Century Fiction}, abstract = {This paper describes the way in which personal relationships between main characters in 19th century Swedish prose fiction can be identified using information guided by named entities, provided by a entity recognition system adapted to the 19th century Swedish language characteristics. Interpersonal relation extraction is based on the context between two relevant, identified person entities. The extraction process of the relationships also utilize the content of on-line available lexical semantic resources (suitable vocabularies) and fairly standard context matching methods that provide a basic mechanism for identifying a wealth of interpersonal relations that hopefully can aid the reader of a 19th-century Swedish literary work to better understand its content and plot, and get a bird’s eye view on the landscape of the core story.}, booktitle = {Workshop: Language Technologies for Digital Humanities and Cultural Heritage in conjunction with the Recent Advances in Natural Language Processing (RANLP). Hissar, Bulgaria.}, author = {Kokkinakis, Dimitrios and Malm, Mats}, year = {2011}, } @article{Kokkinakis-Dimitrios2011-149930, title = {Natural language processing of clinical data with a focus on diffuse symptoms}, abstract = {The medical domain is well supported with a wealth of large, rich and varied controlled vocabularies and terminological resources. This paper investigates the extent by which the largest available medical nomenclature for Swedish, the Systematized Nomenclature of Medicine Clinical Terms (SNOMED CT), can handle a particularly challenging and difficult to automatically acquire type of terminology, namely (clinical) phenotypes. The aim of the study is to better understand phenotype contextualization in order to improve and enhance our knowledge of communicative events in various healthcare settings. Our approach can be seen as an exploratory one in which we believe to yield useful insights into the nature of how findings, symptoms and signs (i.e. clinical phenotypes in general) are expressed in real data. This study is initiated in the context of the project "Interpretation and understanding of functional symptoms in primary health care". The main research goal of which is to study health care interactions with patients suffering from Functional Somatic Syndromes (FSS). FSS are characterized by particular constellations of medically unexplained, often chronic symptoms, such as dizziness, fatigue, dyspepsia, muscle and joint pain. We use methods from the natural language processing field in order to investigate how symptom mentions are expressed and how available successful automated means are for capturing symptom descriptions both on collected written (patient records) and transcribed material (patient/nurse and patient/doctor encounters). We manually evaluated the content of the resource on the collected data and our results indicate that a large number of such phenotypes are expressed using figurative language, or contextualized using a number of variant expressions. SNOMED CT cannot easily accommodate for such variation and vagueness expressed in real text data, unless we devise means to handle such variation, e.g. by the use of near synonym dictionaries, development and linking of consumer health vocabularies. The presented research has several implications since accurate identification of phenotypes can for instance increase the value of available data in decision making and thus allow automatic systems to dynamically correct inappropriate domain decisions. We have evaluated the content of a large controlled vocabulary for Swedish on symptom descriptions in clinical texts.}, author = {Kokkinakis, Dimitrios}, year = {2011}, } @article{Kokkinakis-Dimitrios2011-149931, title = {Medicinska terminologier - officiella standarder och verklighet}, abstract = {Officiella medicinska termlistor hinner aldrig bli helt kompletta eller uppdaterade i tid med de senaste upptäckterna inom det (bio)medicinska fältet Växande behov av koppling mellan fack- och allmänspråk för praktiska (medicinskt orienterade) tillämpningar, t.ex. "din journal på nätet"-projektet Applikationer med indata som innehåller både fackspråk och allmänspråk - brist på täckande medicinska (elektroniska) ordböcker/termlistor med integrerad utförlig språklig och medicinsk information för lekmän finns inte transkriberade patient-läkarsamtal Använda existerande medicinska terminologier i språkteknologisk forskning som stöd för informationsutvinning - skapa strukturerade representationer av texter (samförekomstanalys; faktaextraktion och syntes; relation- och händelseextraktion; t.ex. mellan sjukdom - behandling - utfall få att kunna få ett bra underlag för att kunna förutsäga hur framtida behandlingar slår) Använda terminologin som ett medium för att underlätta kommunikationen mellan hälsotagare och hälsogivare t.ex. underlätta förståelse av medicinska termer av allmänheten }, author = {Kokkinakis, Dimitrios}, year = {2011}, } @inProceedings{Smith-Frida2011-152723, title = {Developing a toolkit for written information materials for patients with colorectal cancer undergoing elective surgery}, abstract = {This study examines language complexity, readability and suitability, of written health information materials given to patients undergoing colorectal cancer (CRC) surgery. The overall aim is to investigate whether the implementation of adapted, person-centred information and communication for patients with CRC undergoing elective surgery, can enhance the patients’ self-care beliefs and well-being during recovery in the phase following diagnosis and initial treatment. Several explorative, qualitative studies are planned and will function both as a basis for the proposed interventions and provide explanations for the actual processes leading to the desired outcomes. Patients’ knowledge enablement will be reached by several interrelated intervention strategies and specific activities. One of these strategies deals with means to facilitate patients’ information seeking patterns and the goal is to provide patients with written information materials according to preferences for complex and detailed or legible texts. Thus, the interventions planed aim to enhance a movement from receiving information and instructions to participating in knowing. Written and printed patient information material from 28 Swedish clinics for patients diagnosed with CRC undergoing elective surgery were selected for analysis by means of standard metrics and more elaborate language technology techniques. Various text parameters such as lexical variation, frequency bands and the use of terminology were examined. The material was also analysed using a Suitability Assessment Instrument in order to examine content, literacy demand, graphic illustrations, layout and typography, learning stimulation and finally cultural appropriateness. In addition, five focusgroups were conducted where patients were asked to give their experiences of using written information. Results from the language technology analysis showed a variety in materials, where it could be divided in to easy, medium and difficult to read and comprehend. Patients in focusgroups told they would like written materials to be levelled in order to gain information stepwise, but also stressed the importance of information given both orally and in writing, and that they must correspond. Using the SAM-instrument was a good complement for deeper understanding, and taking all three analyses in account, we aim to design a balanced toolkit for how to best design written information materials where a person tailored approach can be offered. }, booktitle = {Svenska Läkaresällskapets Riksstämman}, author = {Smith, Frida and Carlsson, Eva and Friberg, Febe and Kokkinakis, Dimitrios and Forsberg, Markus and Öhrn, Matilda and Öhlén, Joakim}, year = {2011}, }