@edited_book{allen-etal-2009-svensk-99825, title = {Svensk ordbok utgiven av Svenska Akademien. 1-2}, editor = {Allén, Sture (vetenskaplig rådgivare) and Berg, Daniel and Berg, Sture and Gellerstam, Martin and Holmer, Louise and Hult, Ann-Kristin and Lindstrand, Susanne and Lövfors, Sven and Malmgren, Sven-Göran and Sjögreen, Christian and Sköldberg, Emma and Tegner, Lennart and Toporowska Gronostaj, Maria}, year = {2009}, ISBN = {978-91-1-302267-3}, } @article{malmgren-toporowskagronostaj-2009-valensbeskrivning-109243, title = {Valensbeskrivning i svenska ordböcker — och några andra}, journal = {LexicoNordica}, author = {Malmgren, Sven-Göran and Toporowska Gronostaj, Maria}, year = {2009}, volume = {2009}, number = {16}, pages = {181--196}, } @article{skoldberg-toporowskagronostaj-2009-charmknutte-101453, title = {Charmknutte, viktigpetter och kladdmaja. Substantiverade förnamn i sammansättningar ur ett lexikografiskt perspektiv}, journal = {Studia anthroponymica Scandinavica}, author = {Sköldberg, Emma and Toporowska Gronostaj, Maria}, year = {2009}, volume = {27}, pages = {73--96}, } @inProceedings{lindh-2009-pick-123922, title = {Pick a Voice among Wolves, Goats and Lambs}, booktitle = {Proceedings of the 18th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Cambridge, UK}, author = {Lindh, Jonas}, year = {2009}, number = {18}, } @inProceedings{lindh-2009-first-99189, title = {A first step towards a text-independent speaker verification Praat plug-in using Mistral/Alize tools}, booktitle = {The XXIInd Swedish Phonetics Conference, Department of Linguistics, Stockholm University, 2009.}, author = {Lindh, Jonas}, year = {2009}, ISBN = {978-91-633-4892-1}, pages = {194--197}, } @inProceedings{lindh-2009-perception-99180, title = {Perception of voice similarity and the results of a voice line-up}, booktitle = {The XXIInd Swedish Phonetics Conference, Department of Linguistics, Stockholm University, 2009.}, author = {Lindh, Jonas}, year = {2009}, ISBN = {978-91-633-4892-1}, pages = {186--189}, } @article{lindh-eriksson-2009-swedat-118616, title = {The SweDat Project and Swedia Database for Phonetic and Acoustic Research}, abstract = {The project described here may be seen as a continuation of an earlier project, SweDia 2000, aimed at transforming the database collected in that project to a full-fledged e-science database. The database consists of recordings of Swedish dialects from 107 locations in Sweden and Swedish speaking parts of Finland. The goal of the present project is to make the material searchable in a flexible and simple way to make it available to a much wider sector of the research community than is the case at present. The database will be accessible over the Internet via user-friendly interfaces specifically designed for this type of data. Other more specialized research interfaces will also be designed to facilitate phonetic acoustic research and orientation of the database.}, journal = {Proceeding E-SCIENCE '09 Proceedings of the 2009 Fifth IEEE International Conference on e-Science}, author = {Lindh, Jonas and Eriksson, Anders}, year = {2009}, pages = {45--49}, } @inProceedings{ljunglof-2009-dialogue-95890, title = {Dialogue Management as Interactive Tree Building}, abstract = {We introduce a new dialogue model and a formalism for limited-domain dialogue systems, which works by interactively building dialogue trees. The model borrows its fundamental ideas from type theoretical grammars and Dynamic Syntax. The resulting dialogue theory is a simple and light-weight formalism, which is still capable of advanced dialogue behaviour.}, booktitle = {DiaHolmia'09, 13th Workshop on the Semantics and Pragmatics of Dialogue}, author = {Ljunglöf, Peter}, year = {2009}, } @inProceedings{ljunglof-olsson-2009-trik-99885, title = {TRIK: en talande och ritande robot för barn med kommunikativa funktionshinder}, booktitle = {Presentation vid 8:e Västsvenska Kommunikationskarnevalen, 1–2 juni 2009}, author = {Ljunglöf, Peter and Olsson, Maria}, year = {2009}, } @inProceedings{borin-etal-2009-thinking-110343, title = {Thinking Green: Toward Swedish FrameNet++}, abstract = {Access to multi-layered lexical, grammatical and semantic information representing text content is a prerequisite for efficient automatic understanding and generation of natural language. A FrameNet is considered a valuable resource for both linguistics and language technology research that may contribute to the achievement of these goals. Currently, FrameNet-like resources exist for a few languages,1 including some domain-specific and multilingual initiatives (Dolbey et al., 2006; Boas, 2009; Uematsu et al., 2009; Venturi et al., 2009), but are unavailable for most languages, including Swedish, although there have been some pilot studies exploring the semi-automatic acquisition of Swedish frames (Johansson & Nugues, 2006; Borin et al., 2007). At the University of Gothenburg, we are now embarking on a project to build a Swedish FrameNet-like resource. A novel feature of this project is that the Swedish FrameNetwill be an integral part of a largermany-faceted lexical resource. Hence the name Swedish FrameNet++ (SweFN++). }, booktitle = {FrameNet Masterclass and Workshop}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios}, year = {2009}, } @article{kokkinakis-gerdin-2009-issues-105140, title = {Issues on Quality Assessment of SNOMED CT® Subsets - Term Validation and Term Extraction}, abstract = {The aim of this paper is to apply and develop methods based on Natural Language Processing for automatically testing the validity, reliability and coverage of various Swedish SNOMED-CT subsets, the Systematized NOmenclature of MEDicine - Clinical Terms a multiaxial, hierarchical classification system which is currently being translated from English to Swedish. Our work has been developed across two dimensions. Initially a Swedish electronic text collection of scientific medical documents has been collected and processed to a uniform format. Secondly, a term processing activity has been taken place. In the first phase of this activity, various SNOMED CT subsets have been mapped to the text collection for evaluating the validity and reliability of the translated terms. In parallel, a large number of term candidates have been extracted from the corpus in order to examine the coverage of SNOMED CT. Term candidates that are currently not included in the Swedish SNOMED CT can be either parts of compounds, parts of potential multiword terms, terms that are not yet been translated or potentially new candidates. In order to achieve these goals a number of automatic term recognition algorithms have been applied to the corpus. The results of the later process is to be reviewed by domain experts (relevant to the subsets extracted) through a relevant interface who can decide whether a new set of terms can be incorporated in the Swedish translation of SNOMED CT or not. }, journal = {Proceedings of RANLP-2009 Workshop: Biomedical Information Extraction.}, author = {Kokkinakis, Dimitrios and Gerdin, Ulla}, year = {2009}, } @inProceedings{kokkinakis-gerdin-2009-kvalitetssakring-105141, title = {Kvalitetssäkring av SNOMED CT med hjälp av Läkartidningens arkiv. }, abstract = {Inom ramen för regeringens satsning ”Nationell IT-strategi för vård och omsorg” har Socialstyrelsen fått i uppdrag att översätta och anpassa begreppssystemet ’the Systematized Nomenclature of Medicine, Clinical Terms’ (SNOMED CT) till svenska. Arbetet är både omfattande och tidskrävande samtidigt som uppdragstagaren har krav om kvalitetssäkring av översättningen. Hur kan Läkartidningens arkiv bidra till kvalitetssäkringen? Med hjälp av Läkartidningens digitala arkiv, LDA, (årgångarna 1996-2009) har vi utvecklat metoder för att effektivisera kvalitetssäkringen av olika SNOMED CT-urval (t.ex. diabetestermer). Det innebär att vi underlättar för utförandet av empiriska, SNOMED CT-relaterade studier, som t.ex. framtagning av underlag om termernas användning, variation och frekvensdistribution över tid. Arkivets förädling: LDA:t omvandlades till ett enhetligt textbaserat format och textinnehållet normaliserades med avseenden på dokumentformat och teckenkodning för att kunna skapa ett bra underlag för den efterföljande språkteknologiska analysen. Alla artiklar i varje publicerad årgång extraherades och märktes upp dels med olika slags metainformation (t.ex. genretillhörighet) dels med lingvistisk och semantisk information, sammanlagt 27 000 artiklar. Den språkteknologiska bearbetningen innefattade automatiskt tillägg av lingvistisk information som t.ex. ordklasstillhörighet för varje ord i korpusen och automatiskt, semantisk mappning dels till den svenska MeSH-tesaurusen och dels till delar av den svensköversatta SNOMED-hierarkin. LDA i en ny skepnad: LDA utgör sedan länge en värdefull svensk medicinsk resurs för alla som yrkesmässigt jobbar med termer och språk. Vi har dock bidragit med att göra textmaterialet ännu mer välstrukturerat och förädlat, som kan vara till hjälp för explorativa studier där sökningar kan förfinas på ett flertal sätt och därmed ge forskare möjligheter att göra djupare innehållsanalyser av texterna och samla grundläggande kunskaper inom olika ämnesområden. Kombinationen av enstaka termer och ord med lingvistisk och semantisk information ger unika möjligheter till att skaffa information och generera fakta som kan leda till nya hypoteser och eventuellt ny kunskap om olika aspekter som gäller termanvändning och variation och vi kommer att redovisa exempel på sådana analyser. }, booktitle = {Svenska Läkaresällskapets Riksstämman }, author = {Kokkinakis, Dimitrios and Gerdin, Ulla}, year = {2009}, } @article{kokkinakis-gerdin-2009-uppbyggandet-105136, title = {Uppbyggandet av en svensk medicinsk korpus för termvalidering och termextrahering - hur bra täcker SNOMED CT olika delfackområden?}, abstract = {Syftet med denna studie är dels att skapa en stor samling svenska medicinska elektroniska texter, en korpus, och dels att validera och kvalitetssäkra existerande termer ur SNOMED CT (the Systematized NOmenclature of MEDicine - Clinical Terms) gentemot korpusinnehållet. På det sättet kan man få en objektiv uppfattning om SNOMED CT:s validitet, täckning och reliabilitet. Man kan även berika terminologin med nya termer eller termvarianter genom att automatiskt extrahera termkandidater inom olika delfackområden från korpusen med hjälp av olika statistiska och lingvistiska metoder. Resultat av de korpusbaserade, empiriska studierna ska kunna användas av terminologer i deras arbete med att göra SNOMED CT mer täckande, pålitlig och enhetlig. Samtidigt, genom användning av autentisk data, kan man försäkra sig om att termvarianterna (existerande eller nya) är vedertagna termer hos fackmän. I fall flera etablerade termvarianter (nya termkandidater) förekommer i korpusen kan dessa införas efter manuell granskning som synonymer till rekommenderade termer (med stöd av ett lämpligt granskningsgränssnitt) och därmed vidare utveckla innehållet i SNOMED CT. Följaktligen kommer vår presentation att innehålla en redovisning som bygger på tre huvudpelare – korpusuppbyggnad – termvalidering – termextrahering. Korpusen samlades in från två källor efter erhållet tillstånd. Texternas ursprung i korpusen kommer dels från Läkartidningens (LT) digitala arkiv <http://ltarkiv.lakartidningen.se> och dels från DiabetologNytts (DN) digitala arkiv <http://diabetolognytt.se/aterkommande/arkivet.html>. }, journal = {2009 års nationella termkonferens Språk och Kommunikation}, author = {Kokkinakis, Dimitrios and Gerdin, Ulla}, year = {2009}, } @incollection{kokkinakis-2009-lexical-73979, title = {Lexical granularity for automatic indexing and means to achieve it - the case of Swedish MeSH®}, abstract = {The identification and mapping of terminology from large repositories of life science data onto concept hierarchies constitute an important initial step for a deeper semantic exploration of unstructured textual content. Accurate and efficient mapping of this kind is likely to provide better means of enhancing indexing and retrieval of text, uncovering subtle differences, similarities and useful patterns, and hopefully new knowledge, among complex surface realisations, overlooked by shallow techniques based on various forms of lexicon look-up approaches. However, a finer-grained level of mapping between terms as they occur in natural language and domain concepts is a cumbersome enterprise that requires various levels of processing in order to make explicit relevant linguistic structures. This chapter highlights some of the challenges encountered in the process of bridging free to controlled vocabularies and thesauri and vice versa. We investigate how the extensive variability of lexical terms in authentic data can be efficiently projected to hierarchically structured codes, while means to increase the coverage of the underlying lexical resources are also investigated.}, booktitle = {Information Retrieval in Biomedicine : Natural Language Processing for Knowledge Integration}, author = {Kokkinakis, Dimitrios}, year = {2009}, publisher = {IGI Global }, address = {Hershey, Pennsylvania}, } @inProceedings{kokkinakis-2009-shallow-94705, title = {Shallow Features for Differentiating Disease-Treatment Relations using Supervised Learning, a pilot study}, abstract = {Clinical narratives provide an information rich, nearly unexplored corpus of evidential knowledge that is considered as a challenge for practitioners in the language technology field, particularly because of the nature of the texts (excessive use of terminology, abbreviations, orthographic term variation), the significant opportunities for clinical research that such material can provide and the potentially broad impact that clinical findings may have in every day life. It is therefore recognized that the capability to automatically extract key concepts and their relationships from such data will allow systems to properly understand the content and knowledge embedded in the free text which can be of great value for applications such as information extraction and question & answering. This paper gives a brief presentation of such textual data and its semantic annotation, and discuss the set of semantic relations that can be observed between diseases and treatments in the sample. The problem is then designed as a machine learning task in which the relations are tried to be learned in a supervised fashion, using pre-annotated data. The challenges designing the problem and empirical results are presented.}, booktitle = {Proceedings of the 12th International Conference TSD (Text, Speech and Dialogue). Springer Verlag, LNCS/LNAI series.}, author = {Kokkinakis, Dimitrios}, year = {2009}, } @techreport{borin-2009-bush-102214, title = {One in the bush: Low-density language technology}, author = {Borin, Lars}, year = {2009}, publisher = {University of Gothenburg}, address = {Göteborg}, } @inProceedings{borin-forsberg-2009-family-102212, title = {All in the family: A comparison of SALDO and WordNet}, booktitle = {Proceedings of the Nodalida 2009 Workshop on WordNets and other Lexical Semantic Resources - between Lexical Semantics, Lexicography, Terminology and Formal Ontologies. NEALT Proceedings Series}, author = {Borin, Lars and Forsberg, Markus}, year = {2009}, volume = {7}, } @inProceedings{ljunglof-etal-2009-trik-91892, title = {TRIK: A talking and drawing robot for children with communication disabilities}, abstract = {This paper describes an ongoing project where we develop and evaluate setup involving a communication board (for manual sign communication) and a drawing robot, which can communicate with each other via spoken language. The purpose is to help children with severe communication disabilities to learn language, language use and cooperation, in a playful and inspiring way. The communication board speaks and the robot is able to understand and talk back. This encourages the child to use the language and learn to cooperate to reach a common goal, which in this case is to get the robot to draw figures on a paper.}, booktitle = {Proceedings of the 17th Nordic Conference of Computational Linguistics NODALIDA 2009}, author = {Ljunglöf, Peter and Larsson, Staffan and Thunberg, Gunilla and Mühlenbock, Katarina}, year = {2009}, volume = {4}, } @inProceedings{derbring-etal-2009-subtts-148340, title = {SubTTS: Light-weight automatic reading of subtitles}, abstract = {We present a simple tool that enables the computer to read subtitles of movies and TV shows aloud. The tool works by reading subtitle files, which can be freely downloaded or extracted from a DVD using existing tools, and read the text aloud through a speech synthesizer. The target audience are people who have trouble reading subtitles while watching a movie, for example people with visual impairments and people with reading difficulties, such as dyslexia. The application will be evaluated together with user from these groups to see if this could be an accepted solution to their need. }, booktitle = {Proceedings of the 17th Nordic Conference of Computational Linguistics NODALIDA 2009}, author = {Derbring, Sandra and Ljunglöf, Peter and Olsson, Maria}, year = {2009}, } @inProceedings{ljunglof-2009-trik-99886, title = {TRIK: en talande och ritande robot för barn med kommunikativa funktionshinder}, booktitle = {Presentation på ID-dagarna, 7–9 oktober 2009, Stockholm}, author = {Ljunglöf, Peter}, year = {2009}, } @article{kokkinakis-2009-shallow-105133, title = {Shallow Features for Differentiating Disease-Treatment Relations using Supervised Learning; a pilot study.}, abstract = {Clinical narratives provide an information rich, nearly unexplored corpus of evidential knowledge that is considered as a challenge for practitioners in the language technology field, particularly because of the nature of the texts (excessive use of terminology, abbreviations, orthographic term variation), the significant opportunities for clinical research that such material can provide and the potentially broad impact that clinical findings may have in every day life. It is therefore recognized that the capability to automatically extract key concepts and their relationships from such data will allow systems to properly understand the content and knowledge embedded in the free text which can be of great value for applications such as information extraction and question & answering. This paper gives a brief presentation of such textual data and its semantic annotation, and discusses the set of semantic relations that can be observed between diseases and treatments in the sample. The problem is then designed as a supervised machine learning task in which the relations are tried to be learned using pre-annotated data. The challenges designing the problem and empirical results are presented.}, journal = {Lecture Notes in Computer Science}, author = {Kokkinakis, Dimitrios}, year = {2009}, volume = {5729}, pages = {395--402}, } @inProceedings{ljunglof-2009-trindikit-99883, title = {trindikit.py: An open-source Python library for developing ISU-based dialogue systems}, abstract = {TrindiKit is one of the main tools for developing ISU-based dialogue systems, but it is implemented in a non-free dialect of the programming language Prolog. Therefore we have translated the TrindiKit toolkit into an open-source Python package. We have tried to remain close to the original TrindiKit formulation, while making the most of Python classes and objects.}, booktitle = {IWSDS'09, 1st International Workshop on Spoken Dialogue Systems Technology}, author = {Ljunglöf, Peter}, year = {2009}, } @inProceedings{andreasson-etal-2009-swedish-102211, title = {Swedish CLARIN activities}, booktitle = {Proceedings of the Nodalida 2009 workshop on CLARIN activities in the Nordic countries. NEALT Proceedings Series}, author = {Andréasson, Maia and Borin, Lars and Forsberg, Markus and Beskow, Jonas and Carlson, Rolf and Edlund, Jens and Elenius, Kjell and Hellmer, Kahl and House, David and Merkel, Magnus and Forsbom, Eva and Megyesi, Beáta and Eriksson, Anders and Strömqvist, Sven}, year = {2009}, volume = {5}, pages = {1--5}, } @edited_book{lendvai-borin-2009-proceedings-91853, title = {Proceedings of the EACL 2009 Workshop on Language Technology and Resources for Cultural Heritage, Social Sciences, Humanities, and Education (LaTeCH -- SHELT&R 2009)}, editor = {Lendvai, Piroska and Borin, Lars}, year = {2009}, publisher = {ACL}, address = {Athens}, ISBN = {1-932432-21-3}, } @inProceedings{borin-2009-linguistic-102209, title = {Linguistic diversity in the information society}, booktitle = {Proceedings of the SALTMIL 2009 workshop on Information Retrieval and Information Extraction for Less Resourced Languages}, author = {Borin, Lars}, year = {2009}, ISBN = {978-84-692-4940-6}, pages = {1--7}, }