@inProceedings{Oelke-Daniela2012-155493, title = { Advanced Visual Analytics Methods for Literature Analysis}, abstract = {The volumes of digitized literary collections in various languages increase at a rapid pace, which results also in a growing demand for computational support to analyze such linguistic data. This paper combines robust text analysis with advanced visual analytics and bring a new set of tools to literature analysis. Visual analytics techniques can offer new and unexpected insights and knowledge to the literary scholar. We analyzed a small subset of a large literary collection, the Swedish Literature Bank, by focusing on the extraction of persons’ names, their gender and their normalized, linked form, including mentions of theistic beings (e.g., Gods’ names and mythological figures), and examined their appearance over the course of the novel. A case study based on 13 novels, from the aforementioned collection, shows a number of interesting applications of visual analytics methods to literature problems, where named entities can play a prominent role, demonstrating the advantage of visual literature analysis. Our work is inspired by the notion of distant reading or macroanalysis for the analyses of large literature collections. }, booktitle = {Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH). An EACL 2012 workshop. Avignon, France.}, author = {Oelke, Daniela and Kokkinakis, Dimitrios and Malm, Mats}, year = {2012}, volume = {Accepted}, pages = {10}, } @inProceedings{Oelke-Daniela2012-155495, title = {Visual Analytics and the Language of Web Query Logs - A Terminology Perspective}, abstract = {This paper explores means to integrate natural language processing methods for terminology and entity identification in medical web session logs with visual analytics techniques. The aim of the study is to examine whether the vocabulary used in queries posted to a Swedish regional health web site can be assessed in a way that will enable a terminologist or medical data analysts to instantly identify new term candidates and their relations based on significant co-occurrence patterns. We provide an example application in order to illustrate how the visualizations of co-occurrence relationships between medical and general entities occurring in such logs can be visualized, accessed and explored. To enable a visual exploration of the generated co-occurrence graphs, we employ a general purpose social network analysis tool, Visone (http://visone.info), that permits to visualize and analyze various types of graph structures. Our examples show that visual analytics based on co-occurrence analysis provides insights into the use of layman language in relation to established (professional) terminologies, which may help terminologists decide which terms to include in future terminologies. Increased understanding of the used querying language is also of interest in the context of public health web sites. The query results should reflect the intentions of the information seekers, who may express themselves in layman language that differs from the one used on the available web sites provided by medical professionals.}, booktitle = {The 15th EURALEX International Congress (European Association of Lexicography). Oslo, Norway.}, author = {Oelke, Daniela and Eklund, Ann-Marie and Marinov, Svetoslav and Kokkinakis, Dimitrios}, year = {2012}, pages = {8}, } @inProceedings{Kokkinakis-Dimitrios2012-155530, title = {Contextualisation of functional symptoms in primary health care}, abstract = {Background: a number of patients consulting primary health care have physical symptoms that may be labeled “medically unexplained”, i.e. absence of a demonstrable organic etiology. Common functional somatic symptoms (FSS) are irritable bowel, tension headache and chronic fatigue. FSS-patients are generally frustrated with the inability of health care to alleviate their illness. Health care staff often also feel frustration. The communication between patient and care giver is the key for coming to terms with the problem. Objective: to investigate how complex, vague and long-standing symptoms with no identified organic cause are put into context, interpreted and acted upon in primary health-care interactions. Two types of interventions are envisaged (i) methods for early identification of patients at risk of entering a vicious circle of functional symptoms and (ii) methods for re-interpreting symptoms in alternative and more purposeful ways. Methods: the project studies interactions between patients and nurses giving advice over telephone, consultations between patients and physicians, interviews and study patients' medical case notes. Eligible patients (18-65 y.o.) contact their primary health care centre by telephone, have had at least eight physical consultations with nurses or physicians in the last 12 months and if a majority of the symptoms within this time span had no clear organic or psychiatric cause. The project contains a number of subprojects, according to the type of data collected. Several methods of analysis will be used, mainly critical discourse analysis, phenomenologic-hermeneutic and computation linguistic analyses. (Expected) Results: using the collected data, we describe characteristics of the communication that takes place in these settings and the way symptoms and diseases are represented. This will facilitate the development of future interventions aimed at decreasing the morbidity due to FSS and give further insights into the problem. }, booktitle = {The 5th GENEVA Conference on Person-Centered Medicine. Geneva, Switzerland. }, author = {Kokkinakis, Dimitrios and Lidén, Eva and Svensson, Staffan and Björk Brämberg, Elisabeth and Määttä, Sylvia}, year = {2012}, } @inProceedings{Kokkinakis-Dimitrios2012-155537, title = {Men, Women and Gods: Distant Reading in Literary Collections - Combining Visual Analytics with Language Technology}, abstract = {The volumes of digitized literary collections in various languages increase at a rapid pace and so increases the need to computationally support the analysis of such data. Literature can be studied in a number of different ways and from many different perspectives and text analysis make up a central component of literature studies. If such analysis can be integrated with advanced visual methods and fed back to the daily work of the literature researcher, then it is likely to reveal the presence of useful and nuanced insights into the complex daily lives, ideas and beliefs of the main characters found in many of the literary works. In this paper we describe the combination of robust text analysis with visual analytics and bring a new set of tools to literary analysis. As a show case, we analyzed a small subset (13 novels of a single author) taken from a large literary collection, the Swedish Literature Bank . The analysis is based upon two levels of inquiry, namely by focusing on mentions of theistic beings (e.g. Gods' names) as well as mentions of persons' names, including their gender and their normalized, linked variant forms, and examining their appearance in sentences, paragraphs and chapters. The case study shows several successful applications of visual analytics methods to various literature problems and demonstrates the advantages of the implementation of visual literature fingerprinting. Our work is inspired by the notion of distant reading or macronalysis for the analyses of literature collections. We start by recognizing all characters in the novels using a mature language technology (named entity recognition) which can be turned into a tool in aid of text analysis in this field. We apply context cues, lists of animacy and gender markers and inspired by the document centered approach and the labelled consistency principle which is a form of on-line learning from documents under processing which looks at unambiguous usages of words or names for assigning annotations in ambiguous words or names. For instance, if in an unambiguous context where there is a strong gender indicator, such as 'Mrs Alexander' the name 'Alexander' is assigned a feminine gender, then subsequent mentions of the same name in the same discourse will be assigned the feminine gender as well unless there is a conflict with another person with the same name. We argue, that the integration of text analysis such as the one briefly outlined and visualization techniques, such as higher resolution pixel-based fingerprinting, could be put to effective use also in literature studies. We also see an opportunity to devise new ways of exploring the large volumes of literary texts being made available through national cultural heritage digitization projects, for instance by exploring the possibility to show several literary texts (novels) at once. We will illustrate some of the applied techniques using several examples from our case study, such as summary plots based on all the characters in these novels as well as fingerprints based on the distribution of characters across the novels.}, booktitle = {Proceedings of the Advances in Visual Methods for Linguistics (AVML)}, author = {Kokkinakis, Dimitrios and Oelke, Daniela}, year = {2012}, volume = {Accepted}, } @inProceedings{Kokkinakis-Dimitrios2012-155893, title = {The Journal of the Swedish Medical Association - a Corpus Resource for Biomedical Text Mining in Swedish.}, abstract = {Biomedical text mining applications are largely dependent on high quality knowledge resources. Traditionally, these include lexical databases, terminologies, nomenclatures and ontologies and, during the last decade, also corpora of various sizes, variety and diversity. Some of these corpora are annotated with an expanding range of information types and metadata while others become available with a minimal set of annotations. At the same time, it is of great importance that biomedical corpora for lesser-spoken languages also get developed in order to support and facilitate the implementation of practical applications for such languages and to stimulate the development of language technology research and innovation infrastructures in the domain. This paper provides a detailed description of a Swedish biomedical corpus based on the electronic editions of the Journal of the Swedish Medical Association "Läkartidningen" of the years 1996-2010. The corpus consists of a variety of documents that can be related to different medical domains, developed as a response to the increasing needs for large and reliable medical information for Swedish biomedical NLP. The corpus has been structurally annotated with a minimal set of meta information and automatically indexed with the largest and systematically organised computer processable collection of medical terminology, the Swedish SNOMED CT (Systematized Nomenclature of Medicine -- Clinical Terms). This way topic-focused subcorpora, e.g. with diabetes-related content, can be easily developed.}, booktitle = {The Third Workshop on Building and Evaluating Resources for Biomedical Text Mining (BioTxtM), an LREC Workshop. Turkey.}, author = {Kokkinakis, Dimitrios}, year = {2012}, volume = {Accepted}, } @inProceedings{Johansson-Richard2012-156400, title = {Semantic Role Labeling with the Swedish FrameNet}, abstract = {We present the first results on semantic role labeling using the Swedish FrameNet, which is a lexical resource currently in development. Several aspects of the task are investigated, including the selection of machine learning features, the effect of choice of syntactic parser, and the ability of the system to generalize to new frames and new genres. In addition, we evaluate two methods to make the role label classifier more robust: cross-frame generalization and cluster-based features. Although the small amount of training data limits the performance achievable at the moment, we reach promising results. In particular, the classifier that extracts the boundaries of arguments works well for new frames, which suggests that it already at this stage can be useful in a semi-automatic setting.}, booktitle = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12); Istanbul, Turkey; May 23-25}, author = {Johansson, Richard and Friberg Heppin, Karin and Kokkinakis, Dimitrios}, year = {2012}, ISBN = {978-2-9517408-7-7}, pages = {3697--3700}, } @inProceedings{Kokkinakis-Dimitrios2012-164587, title = {Literacy Demands and Information to Cancer Patients}, abstract = {This study examines language complexity of written health information materials for patients undergoing colorectal cancer surgery. Written and printed patient information from 28 Swedish clinics are automatically analyzed by means of language technology. The analysis reveals different problematic issues that might have impact on readability. The study is a first step, and part of a larger project about patients’ health information seeking behavior in relation to written information material. Our study aims to provide support for producing more individualized, person centered information materials according to preferences for complex and detailed or legible texts and thus enhance a movement from receiving information and instructions to participating in knowing. In the near future the study will continue by integrating focus groups with patients that may provide valuable feedback and enhance our knowledge about patients’ use and preferences of different information material.}, booktitle = {Proceedings of the 15th International Conference on Text, Speech and Dialogue}, author = {Kokkinakis, Dimitrios and Forsberg, Markus and Johansson Kokkinakis, Sofie and Smith, Frida and Öhlén, Joakim}, year = {2012}, ISBN = {978-364232789-6}, } @inProceedings{Kokkinakis-Dimitrios2012-164788, title = {Initial Experiments of Medication Event Extraction Using Frame Semantics}, abstract = {Semantic annotation of text corpora for mining complex relations and events has gained a considerable growing attention in the medical domain. The goal of this paper is to present a snapshot of ongoing work that aims to develop and apply an appropriate infrastructure for automatic event labelling and extraction in the Swedish medical domain. Annotated text samples, appropriate lexical resources (e.g. term lists and the Swedish Frame-Net++) and hybrid techniques are currently developed in order to alleviate some of the difficulties of the task. As a case study this paper presents a pilot approach based on the application of the theory of frame semantics to automatically identify and extract detailed medication information from medical texts. Medication information is often written in narrative form (e.g. in clinical records) and is therefore difficult to be acquired and used in computerized systems (e.g. decision support). Currently our approach uses a combination of generic entity and terminology taggers, specifically designed medical frames and various frame-related patterns. Future work intends to improve and enhance current results by using more annotated samples, more medically-relevant frames and combination of supervised learning techniques with the regular expression patterns.}, booktitle = {Scandinavian Conference on Health Informatics (SHI)}, author = {Kokkinakis, Dimitrios}, year = {2012}, volume = {Linköping Electronic Conference Proceedings}, ISBN = {978-91-7519-758-6}, pages = {41--47}, } @inProceedings{Eklund-Ann-Marie2012-165309, title = {Drug interests revealed by a public health portal}, abstract = {Online health information seeking has become an important part of people's everyday lives. However, studies have shown that many of those have problems forming effective queries. In order to develop better support and tools for assisting people in health-related query formation we have to gain a deeper understanding into their information seeking behaviour in relation to key issues, such as medication and drugs. The present study attempts to understand the semantics of the users' information needs with respect to medication-related information. Search log queries from the Swedish 1177.se health portal were automatically annotated and categorized according to relevant background knowledge sources. Understanding the semantics of information needs can enable optimization and tailoring of (official) health related information presented to the online consumer, provide better terminology support and thematic coding of the queries and in the long run better models of consumers’ information needs. }, booktitle = {Proceedings of the SLTC-Workshop: Exploratory Query-log Analysis. Lund, Sweden.}, author = {Eklund, Ann-Marie and Kokkinakis, Dimitrios}, year = {2012}, pages = {2}, } @inProceedings{Smith-Frida2012-170895, title = {Hur kan vi förbättra skriftligt informations- och utbildningsmaterial för patienter som opereras elektivt för kolorektal cancer?}, abstract = {Kolorektal cancer (KRC) är den tredje största cancerdiagnosen i Sverige med drygt 5500 drabbade årligen. Primär behandling är kirurgi kompletterad av pre- och postoperativ onkologisk behandling. Standardiserade koncept för accelererat vårdförlopp med kortare vårdtider lägger mycket fokus på fysisk rehabilitering, men mindre på den psykiska påfrestning det innebär att bli opererad för en cancerdiagnos. Patienter förväntas ta stort ansvar för sin rehabilitering, både på sjukhuset och hemma. För att vara förberedd behövs både skriftlig och muntlig information. Syftet med studien var att kartlägga och karaktärisera det skriftliga informations- och utbildningsmaterial (IOU) som används till patienter som opereras elektivt för KRC. Vidare var syftet att beskriva patienters uppfattning om struktur och innehåll på IOU. IOU från 28 kliniker som opererar patienter med KRC samlades in (totalt 220 st). För att kunna ge ett mått på texternas svårighetsgrad gjordes språkteknologisk analys på samtliga IOU, där bl.a. ordlängd, meningsbyggnad och jämförelse med annan typ av litteratur mättes På 117 st gjordes en suitabilityanalys med instrumentet SAM+CAM där domän som innehåll, läsbarhet, bilder, layout samt stimulans och motivation för lärande bedömdes. Fem fokusgrupper med patienter genomfördes där patienterna uppmanades att berätta om vad de tycker utmärker ett bra respektive dåligt IOU, vad de saknar i innehåll och när och på vilket sätt de vill ha materialet utlämnat. Resultatet av språkteknologiska- och suitabilityanalysen visar att de flesta IOU bedömdes som ”adequate”, men spridningen var stor. Patienterna hade önskemål om mer nivåuppdelat/nivåriktat material, där man själv kan välja hur mycket information man vill ha vid ett visst tillfälle. Flera ämnen saknades, eller var för otydligt beskrivna för att patienterna skulle känna sig trygga vid hemgång. Resultatet av de tre analysmetoderna bör kunna användas för att utveckla en ”verktygslåda” för att i framtiden kunna utforma bättre riktat IOU för patientgruppen. }, booktitle = {Nationella konferensen i Cancervård, 24-25 maj 2012, Stockholm}, author = {Smith, Frida and Öhlén, Joakim and Carlsson, Eva and Forsberg, Markus and Kokkinakis, Dimitrios and Friberg, Febe}, year = {2012}, } @article{Smith-Frida2012-170897, title = {Ny studie visar hur information till patienter med kolorektal cancer kan förbättras}, abstract = {Skriftligt informationsmaterial är ofta skrivet på för hög nivå och ställer höga krav på den tänkta läsaren (patienten). Förutom läsbarhet finns det fler faktorer att utvärdera för att se om materialet är lämpligt. Innehåll, struktur, layout och typsnitt, illustrationer och lärande och motivation är sådant som bör tas hänsyn till. Ett lämpligare, bättre anpassat material kan hjälpa personer med sjukdom att ställa bättre frågor när de har samtal med vårdpersonal och det kan göra personen mindre osäker och orolig för det okända som väntar. En ny studie som ingår i forskningsprojektet PINCORE (personcentred information and communication in colorectal cancer care) syftar till att förbättra information och kommunikation vid kolorektal cancer.}, author = {Smith, Frida and Öhlén, Joakim and Carlsson, Eva and Friberg, Febe and Forsberg, Markus and Kokkinakis, Dimitrios}, year = {2012}, number = {5}, pages = {18--21}, }