@incollection{Borin-Lars2018-269084, title = {Linguistics vs. language technology in constructicon building and use}, abstract = {In this chapter, we describe the close interaction of linguists and language technologists in the Swedish constructicon project. This kind of collaboration is not so common today, because of the way that language technology has developed in recent decades, but in our case the collaboration has been very successful, and constituted a genuine instance of cross-fertilization, where an evolving language technology infrastructure and a computational lexical macroresource described in the chapter has formed an integral part of the Swedish constructicon development environment, while at the same time the structured linguistic knowledge described in the constructicon has informed the language technology making up the infrastructure.}, booktitle = {Constructicography: Constructicon development across languages / edited by Benjamin Lyngfelt, Lars Borin, Kyoko Ohara, Tiago Timponi Torrent}, author = {Borin, Lars and Dannélls, Dana and Gruzitis, Normunds}, year = {2018}, publisher = {John Benjamins}, adress = {Amsterdam}, ISBN = {9789027263865}, pages = {229--253}, } @inProceedings{Dannélls-Dana2018-271181, title = {Integrating language resources in two OCR engines to improve processing of historical Swedish text.}, abstract = {We are aiming to address the difficulties that many History and Social Sciences researchers struggle with to bring in non-digitized text into language analysis workflows. In this paper we present the language resources and material we used for training two Optical Character Recognition engines for processing historical Swedish text written in Fraktur (blackletter). The trained models, resources and dictionaries are freely available and accessible through our web service, hosted at Språkbanken, to enable users and developers easy access for extraction of historical Swedish text a that are only available in images for further processing.}, booktitle = {CLARIN Annual Conference}, author = {Dannélls, Dana and Olsson, Leif-Jöran}, year = {2018}, } @article{Gruzitis-Normunds2017-225789, title = {A multilingual FrameNet-based grammar and lexicon for controlled natural language}, abstract = {Berkeley FrameNet is a lexico-semantic resource for English based on the theory of frame semantics. It has been exploited in a range of natural language processing applications and has inspired the development of framenets for many languages. We present a methodological approach to the extraction and generation of a computational multilingual FrameNet-based grammar and lexicon. The approach leverages FrameNet-annotated corpora to automatically extract a set of cross-lingual semantico-syntactic valence patterns. Based on data from Berkeley FrameNet and Swedish FrameNet, the proposed approach has been implemented in Grammatical Framework (GF), a categorial grammar formalism specialized for multilingual grammars. The implementation of the grammar and lexicon is supported by the design of FrameNet, providing a frame semantic abstraction layer, an interlingual semantic application programming interface (API), over the interlingual syntactic API already provided by GF Resource Grammar Library. The evaluation of the acquired grammar and lexicon shows the feasibility of the approach. Additionally, we illustrate how the FrameNet-based grammar and lexicon are exploited in two distinct multilingual controlled natural language applications. The produced resources are available under an open source license.}, author = {Gruzitis, Normunds and Dannélls, Dana}, year = {2017}, volume = {51}, number = {1}, pages = {37–66}, } @techreport{Borin-Lars2016-233768, title = {A free cloud service for OCR / En fri molntjänst för OCR}, author = {Borin, Lars and Bouma, Gerlof and Dannélls, Dana}, year = {2016}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @inProceedings{Gruzitis-Normunds2016-233921, title = {Grammatical Framework for implementing multilingual frames and constructions}, booktitle = {Book of Abstracts. The 9th International Conference on Construction Grammar (ICCG9) theme session on Computational Semantics with Frames and Constructions. October 05-­09, 2016, Juiz de Fora, Brazil }, author = {Gruzitis, Normunds and Dannélls, Dana and Ranta, Aarne and Tyers, Francis M.}, year = {2016}, } @inProceedings{FribergHeppin-Karin2015-218276, title = {Polysemy and questions of lumping or splitting in the construction of Swedish FrameNet}, abstract = {When working on a lexical resource, such as Swedish FrameNet (SweFN), assumptions based on linguistic theories are made, and methodological directions based upon them are taken. These directions often need to be revised when not beforehand foreseen problems arise. One assumption that was made already in the early development stages of SweFN was that each lexical entry from the reference lexicon, SALDO, would evoke only one semantic frame in SweFN. If a lexical entry evoked more than one frame, it entailed more than one sense and therefore required a new entry in the lexicon. As work progressed, this inclination towards splitting, in the perpetual lumpers and splitters discussion proved to be progressively untenable. This paper will give an account of the problems which were encountered and suggestions for solutions on polysemy issues forcing a discussion on lumping or splitting.}, booktitle = {Proceedings of the Workshop on Semantic resources and Semantic Annotation for Natural Language Processing and the Digital Humanities at NODALIDA 2015, Vilnius, 11th May, 2015}, author = {Friberg Heppin, Karin and Dannélls, Dana}, year = {2015}, pages = {12--20}, } @inProceedings{Gruzitis-Normunds2015-220419, title = {Formalising the Swedish Constructicon in Grammatical Framework}, abstract = {This paper presents a semi-automatic approach to acquire a computational construction grammar from the semi-formal Swedish Constructicon. The implementation is based on the resource grammar library provided by Grammatical Framework and can be seen as an extension to the existing Swedish resource grammar. An important consequence of this work is that it generates feedback, explicit and implicit, on how to improve the annotation consistency and adequacy of the original construction resource. }, booktitle = {Proceedings of the Grammar Engineering Across Frameworks (GEAF) Workshop, 53rd Annual Meeting of the ACL and 7th IJCNLP, Beijing, China, July 26-31, 2015}, author = {Gruzitis, Normunds and Dannélls, Dana and Lyngfelt, Benjamin and Ranta, Aarne}, year = {2015}, ISBN = {978-1-932432-66-4}, pages = {49----56}, } @incollection{Damova-Mariana2014-178094, title = {Natural Language Interaction with Semantic Web Knowledge Bases and Linked Open Data}, abstract = {Cultural heritage appears to be a very useful use case for Semantic Web technologies. The domain provides with plenty of circumstances where linkages between different knowledge sources are required to ensure access to rich information and respond to the needs of professionals dealing with cultural heritage content. Semantic Web technologies offer the technological backbone to meet the requirement of integrating heterogeneous data easily, but they are still more adapted to be consumed by computers than by humans, especially non-engineers or developers. This chapter is about a technique which allows interaction in natural language with semantic knowledge bases. The proposed technique offers a method that allows querying a semantic repository in natural language and obtaining results from it as a coherent text. This unique solution includes several steps of transition from natural language to SPARQL and from RDF to coherent multilingual descriptions, using the Grammatical Framework, GF. The approach builds on a semantic knowledge infrastructure in RDF, it is based on OWLIM-SE and the data integration method Reason-able View supplied with an ontological reference layer. The latter is connected via formal rules with abstract representations derived from the syntactic trees of natural language input using the GF resource grammar library. }, booktitle = {Towards multilingual Semantic Web}, author = {Damova, Mariana and Dannélls, Dana and Mateva, Maria and Enache, Ramona and Ranta, Aarne}, year = {2014}, publisher = {Springer}, adress = {Berlin}, ISBN = {978-3-662-43585-4}, pages = {211--226}, } @article{Borin-Lars2014-198286, title = {Geographic visualization of place names in Swedish literary texts}, abstract = {This article describes the development of a geographical information system (GIS) at Språkbanken as part of a visualization solution to be used in an archive of historical Swedish literary texts. The research problems we are aiming to address concern orthographic and morphological variation, missing place names, and missing place name coordinates. Some of these problems form a central part in the development of methods and tools for the automatic analysis of historical Swedish literary texts at our research unit. We discuss the advantages and challenges of covering large-scale spelling variation in place names from different sources and in generating maps with focus on different time periods. }, author = {Borin, Lars and Dannélls, Dana and Olsson, Leif-Jöran}, year = {2014}, volume = {29}, number = {3}, pages = {400--404}, } @inProceedings{Dannélls-Dana2014-198499, title = {Extracting a bilingual semantic grammar from FrameNet-annotated corpora}, abstract = {We present the creation of an English-Swedish FrameNet-based grammar in Grammatical Framework. The aim of this research is to make existing framenets computationally accessible for multilingual natural language applications via a common semantic grammar API, and to facilitate the porting of such grammar to other languages. In this paper, we describe the abstract syntax of the semantic grammar while focusing on its automatic extraction possibilities. We have extracted a shared abstract syntax from ~58,500 annotated sentences in Berkeley FrameNet (BFN) and ~3,500 annotated sentences in Swedish FrameNet (SweFN). The abstract syntax defines 769 frame-specific valence patterns that cover 77,8% examples in BFN and 74,9% in SweFN belonging to the shared set of 471 frames. As a side result, we provide a unified method for comparing semantic and syntactic valence patterns across framenets.}, booktitle = {Proceedings of the 9th International Conference on Language Resources and Evaluation (LREC)}, author = {Dannélls, Dana and Gruzitis, Normunds}, year = {2014}, publisher = {European Language Resources Association}, ISBN = {978-2-9517408-8-4}, } @inProceedings{Dannélls-Dana2014-201944, title = {Controlled Natural Language Generation from a Multilingual FrameNet-based Grammar}, abstract = {This paper presents a currently bilingual but potentially multilingual FrameNet-based grammar library implemented in Grammatical Framework. The contribution of this paper is two-fold. First, it offers a methodological approach to automatically generate the grammar based on semantico-syntactic valence patterns extracted from FrameNet-annotated corpora. Second, it provides a proof of concept for two use cases illustrating how the acquired multilingual grammar can be exploited in different CNL applications in the domains of arts and tourism.}, booktitle = {Lecture Notes in Computer Science}, author = {Dannélls, Dana and Gruzitis, Normunds}, year = {2014}, volume = {8625}, ISBN = {978-3-319-10222-1}, pages = {155--166}, } @inProceedings{Dannélls-Dana2014-201951, title = {Using language technology resources and tools to construct Swedish FrameNet}, abstract = {Having access to large lexical and grammatical resources when creating a new language resource is essential for its enhancement and enrichment. This paper describes the interplay and interac- tive utilization of different language technology tools and resources, in p articular the Swedish lexicon SALDO and Swedish Constructicon, in the creation of Swedish Frame Net. We show how integrating resources in a larger infrastructure is much more than the su m of the parts. }, booktitle = {Proceedings of the Workshop on Lexical and Grammatical Resources for Language Processing, Dublin Ireland, August 24, 2014}, author = {Dannélls, Dana and Friberg Heppin, Karin and Ehrlemark, Anna}, year = {2014}, ISBN = {978-1-873769-44-7}, pages = {8--17}, } @inProceedings{Borin-Lars2014-204731, title = {Representing Swedish Lexical Resources in RDF with lemon}, abstract = {The paper presents an ongoing project which aims to publish Swedish lexical-semantic resources using Semantic Web and Linked Data technologies. In this article, we highlight the practical conversion methods and challenges of converting three of the Swedish language resources in RDF with lemon.}, booktitle = { Proceedings of the ISWC 2014 Posters & Demonstrations Track a track within the 13th International Semantic Web Conference (ISWC 2014)}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and McCrae, John P.}, year = {2014}, volume = {1272 }, pages = {329--332}, } @inProceedings{Dannélls-Dana2014-204733, title = {A Multilingual SPARQL-Based Retrieval Interface for Cultural Heritage Objects}, booktitle = {Proceedings of the ISWC 2014 Posters & Demonstrations Track a track within the 13th International Semantic Web Conference (ISWC 2014)}, author = {Dannélls, Dana and Enache, Ramona and Damova, Mariana}, year = {2014}, volume = {1272}, pages = {205--208}, } @inProceedings{Ahlberg-Malin2014-210083, title = {Swedish FrameNet++ The Beginning of the End and the End of the Beginning}, booktitle = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014}, author = {Ahlberg, Malin and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Friberg Heppin, Karin and Johansson, Richard and Kokkinakis, Dimitrios and Olsson, Leif-Jöran and Uppström, Jonatan}, year = {2014}, } @inProceedings{Dannélls-Dana2013-178095, title = {MapServer for Swedish Language Technology}, abstract = {The MapServer application used by the Swedish Language Bank provides new opportunities for visualizing geographical information found in its large repository of written texts, in particular literary texts. The application is capable of performing coordinate search on the basis of recognized place names and rendering both static and dynamic maps that display their geographical locations. }, booktitle = {Digital Humanities}, author = {Dannélls, Dana and Borin, Lars and Olsson, Leif-Jöran}, year = {2013}, } @inProceedings{Dannélls-Dana2013-178096, title = {Multilingual access to cultural heritage content on the Semantic Web}, abstract = {As the amount of cultural data available on the Semantic Web is expanding, the demand of accessing this data in multiple languages is increasing. Previous work on multilingual access to cultural heritage information has shown that mapping from ontologies to natural language requires at least two different steps: (1) mapping multilingual metadata to interoperable knowledge sources; (2) assigning multilingual knowledge to cultural data. This paper presents our work on making cultural heritage content available on the Semantic Web and accessible in 15 languages. The objective of our work is both to form queries and to retrieve semantic content in multiple languages. We describe our experiences with processing museum data extracted from two different sources, harmonizing this data and making its content accessible in natural language. }, booktitle = {Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH)}, author = {Dannélls, Dana and Ranta, Aarne and Enache, Ramona and Damova, Mariana and Mateva, Maria}, year = {2013}, } @misc{Dannélls-Dana2013-189699, title = {Grammar-ontology interoperability -- Final Work and Overview}, abstract = {D4.3A is an annex to the D4.3 deliverable of WP4 of the MOLTO project. It aims to address the reviewers’ remarks and recommendations for D4.3, as well as to present a final overview of the prototypes built in the scope of MOLTO with respect to grammar-ontology interoperabilty. D4.3A also describes the work after M24 and gives a general overview of the achievements in MOLTO with focus on WP4 - Knowledge Engineering, WP7 - Patents use case, and WP8 - Cultural Heritage use case. }, author = {Dannélls, Dana and Ranta, Aarne and Enache, Ramona and Listenmaa, Inari and Tolosi, Laura and Mateva, Maria}, year = {2013}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @misc{Dannélls-Dana2013-189698, title = {Translation and retrieval system for museum object descriptions}, abstract = {This is the final report of Workpackage 8: Case Study: Cultural Heritage. The major contributions reported are ontology-based multilingual grammar covering 15 languages and cross-language retrieval system for museum object descriptions using Semantic Web technology. Our groundwork for this deliverable was laid in D8.1: Ontology and corpus study of the cultural heritage domain, and D8.2: Multilingual grammar for museum object descriptions. }, author = {Dannélls, Dana and Ranta, Aarne and Enache, Ramona and Damova, Mariana and Mateva, Maria}, year = {2013}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @inProceedings{Dannélls-Dana2012-156502, title = {Toward language independent methodology for generating artwork descriptions – Exploring FrameNet information}, abstract = {Today museums and other cultural heritage institutions are increasingly storing object descriptions using semantic web domain ontologies. To make this content accessible in a multilingual world, it will need to be conveyed in many languages, a language generation task which is domain specific and language dependent. This paper describes how semantic and syntactic information such as that provided in a framenet can contribute to solving this task. It is argued that the kind of information offered by such lexical resources enhances the output quality of a multilingual language generation application, in particular when generating domain specific content. }, booktitle = {EACL 2012 workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH)}, author = {Dannélls, Dana and Borin, Lars}, year = {2012}, pages = {18–23}, } @inProceedings{Dannélls-Dana2012-156501, title = {Multilingual Online Generation from Semantic Web Ontologies}, abstract = {In this paper we report on our ongoing work in the EU project Multilingual Online Translation (MOLTO), supported by the European Union Seventh Framework Programme under grant agreement FP7-ICT-247914. More specifically, we present work workpackage 8 (WP8): Case Study: Cultural Heritage. The objective of the work is to build an ontology-based multilingual application for museum information on the Web. Our approach relies on the innovative idea of Reason-able View of the Web of linked data applied to the domain of cultural heritage. We have been developing a Web application that uses Semantic Web ontologies for generating coherent multilingual natural language descriptions about museum objects. We have been experimenting with museum data to test our approach and find that it performs well for the examined languages. }, booktitle = {The World Wide Web Conference (WWW2012), 16th-20th April 2012}, author = {Dannélls, Dana and Enache, Ramona and Mariana, Damova and Milen, Chechev}, year = {2012}, pages = {239--242}, } @inProceedings{Dannélls-Dana2012-156504, title = {On generating coherent multilingual descriptions of museum objects from Semantic Web ontologies}, abstract = {During the last decade, there has been a shift from developing natural language systems to developing generic systems that are capable of producing natural language descriptions directly from Web ontologies. To make these descriptions coherent and accessible in different languages, a methodology is needed for identifying the general principles that would determine the distribution of referential forms. Previous work has proved through crosslinguistic investigations that strategies for building co-reference are language dependent. However, to our knowledge, there is no language generation methodology that makes a distinction between languages about the generation of referential chains. To determine the principles governing referential chains, we gathered data from three languages: English, Swedish and Hebrew, and studied how co-reference is expressed in a discourse. As a result of the study, a set of language specific co-reference strategies were identified. Using these strategies, an ontology based multilingual grammar for generating written natural language descriptions about paintings was implemented in the Grammatical Framework. A preliminary evaluation of our method shows language-dependent coreference strategies lead to better generation results. }, booktitle = {The 7th International Conference on Natural Language Generation (INLG 2012)}, author = {Dannélls, Dana}, year = {2012}, } @book{Dannélls-Dana2012-178092, title = {Multilingual text generation from structured formal representations. }, abstract = {This thesis aims to identify the optimal ways in which natural language generation techniques can be brought to bear upon the problem of processing a structured body of information in order to devise a coherent presentation of text content in multiple languages. We investigate how chains of referential expressions are realized in English, Swedish and Hebrew, and suggest several coreference strategies that can be used to generate coherent descriptions about paintings. The suggested strategies focus on the need to produce paragraph-sized written natural language descriptions from formal structured representations presented in the Semantic Web. We account for principles of coreference by introducing a new modularized approach to automatically generate chains of referential expressions from ontologies. We demonstrate the feasibility of the approach by implementing a system where a Semantic Web domain ontology serves as the background knowledge representation and where the language-specific coreference strategies are incorporated. The system uses both the principles of discourse structures and coreference strategies to guide the generation process. We show how the system successfully generates coherent, well-formed descriptions in multiple languages.}, author = {Dannélls, Dana}, year = {2012}, publisher = {University of Gothenburg}, adress = {Göteborg}, ISBN = {978-91-87850-48-6}, } @inProceedings{Caprotti-Olga2012-178183, title = {High-quality translation: Molto tools and applications}, abstract = {MOLTO (Multilingual On Line Translation, FP7-ICT-247914, www.molto-project.eu) is a European project focusing on translation on the web. MOLTO targets translation that has production quality, that is, usable for quick and reliable dissemination of information. MOLTO’s main focus is to increase the productivity of such translation systems, building on the technology of GF (Grammatical Framework) and its Resource Grammar Library. But MOLTO also develops hybrid methods which increase the quality of Statistical Machine Translation (SMT) by adding linguistic information, or bootstrap grammatical models from statistical models. This paper gives a brief overview of MOLTO’s latest achievements, many of which are more thoroughly described in separate papers and available as web-based demos and as open-source software.}, booktitle = {The fourth Swedish Language Technology Conference (SLTC)}, author = {Caprotti, Olga and Ranta, Aarne and Angelov, Krasimir and Enache, Ramona and Camilleri, John J. and Dannélls, Dana and Détrez, Grégoire and Hallgren, Thomas and Prasad, K. V. S. and Virk, Shafqat}, year = {2012}, } @inProceedings{Dannélls-Dana2011-145391, title = {Reason-able View of Linked Data for Cultural Heritage}, abstract = {This paper presents a novel approach that relies on the innovative idea of Reason-able View of the Web of linked data applied to the domain of cultural heritage. We describe an application of data integration based on Semantic Web technologies and the methods necessary to create an integrated semantic knowledge base composed of real museum data that are interlinked with data from the Linked Open Data (LOD) cloud. Thus, creating an infrastructure to allow for easy extension of the domain specific data, and convenient querying of multiple datasets. Our approach is based on a model of schema level and an instance level alignment. The models use several ontologies, e.g. PROTON and CIDOC-CRM, showing their integration by using real data from the Gothenburg City Museum. }, booktitle = {Advances in Intelligent and Soft Computing / The Third International Conference on Software, Services & Semantic Technologies (S3T)}, author = {Dannélls, Dana and Damova, Mariana}, year = {2011}, volume = {101}, pages = {17--24}, } @inProceedings{Dannélls-Dana2011-145395, title = {A Framework for Improved Access to Museum Databases in the Semantic Web}, abstract = {Digital museum databases have extremely heterogeneous data structures which require advanced mapping and vocabulary integration for them to benefit from the interoperability enabled by semantic technologies. In addition to establishing ways of extracting and manipulating digitally encoded cultural material, there exists a need to make this material available and accessible to human users in different forms and languages that are available to them. In this paper we describe a method to manage and access museum data by integrating it within a series of interlinked ontological models. The method allows querying and generation of query results in natural language. We report on the results of applying this method from experiments we have been pursuing. }, booktitle = {Language Technologies for Digital Humanities and Cultural Heritage (RANLPDigHum 2011)}, author = {Dannélls, Dana and Damova, Mariana and Enache, Ramona and Chechev, Milen}, year = {2011}, ISBN = {978-954-452-019-9}, pages = {3--10}, } @article{Dannélls-Dana2010-110876, title = {Discourse Generation from Formal Specifications Using the Grammatical Framework, GF}, abstract = {Semantic web ontologies contain structured information that do not have discourse structure embedded in them. Hence, it becomes increasingly hard to devise multilingual texts that humans comprehend. In this paper we show how to generate coherent multilingual texts from formal representations using discourse strategies. We demonstrate how discourse structures are mapped to GF’s abstract grammar specifications from which multilingual descriptions of work of art objects are generated automatically. }, author = {Dannélls, Dana}, year = {2010}, volume = {46}, pages = {167--178}, } @inProceedings{Dannélls-Dana2010-119938, title = {Verb Morphology of Hebrew and Maltese - Towards an Open Source Type Theoretical Resource Grammar in GF.}, abstract = {One of the first issues that a programmer must tackle when writing a complete computer program that processes natural language is how to design the morphological component. A typical morphological component should cover three main aspects in a given language: (1) the lexicon, i.e. how morphemes are encoded, (2) orthographic changes, and (3) morphotactic variations. This is in particular challenging when dealing with Semitic languages because of their non-concatenative morphology called root-and pattern morphology. In this paper we describe the design of two morphological components for Hebrew and Maltese verbs in the context of the Grammatical Framework (GF). The components are implemented as a part of larger grammars and are currently under development. We found that although Hebrew and Maltese share some common characteristics in their morphology, it seems difficult to generalize morphosyntactic rules across Semitic verbs when the focus is towards computational linguistics motivated lexicons. We describe and compare the verb morphology of Hebrew and Maltese and motivate our implementation efforts towards a complete open source type theoretical resource grammars for Semitic languages. Future work will focus on semantic aspects of morphological processing.}, booktitle = {Proceedings of LREC 2010. Workshop on Language Resources (LRs) and Human Language Technologies (HLT) for Semitic Languages Status, Updates, and Prospects.}, author = {Dannélls, Dana and Camilleri, John J.}, year = {2010}, } @inProceedings{Dannélls-Dana2010-121404, title = {Applying semantic frame theory to automate natural language templates generation from ontology statements}, abstract = {Today there exist a growing number of framenet-like resources offering semantic and syntactic phrase specifications that can be exploited by natural language generation systems. In this paper we present on-going work that provides a starting point for exploiting framenet information for multilingual natural language generation. We describe the kind of information offered by modern computational lexical resources and discuss how template-based generation systems can benefit from them.}, booktitle = {The 6th International Natural Language Generation Conference}, author = {Dannélls, Dana}, year = {2010}, } @inProceedings{Borin-Lars2010-110368, title = {The past meets the present in Swedish FrameNet++}, abstract = {The paper is about a recently initiated project which aims at the development of a Swedish FrameNet as an integral part of a larger lexical resource, hence the name “Swedish FrameNet++” (SweFN++). It focuses on reuse of free electronic resources and their role in the acquisition and population of Swedish frames. After a brief overview of Swedish resources, we reflect on three approaches to recycling the available lexical data in a semi-automatic manner. SweFN++ will be a multi-functional resource supporting research within lexicology and linguistics as well as different applications within computational lexicography and language technology, not to mention e-science.}, booktitle = {14th EURALEX International Congress}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios}, year = {2010}, pages = {269--281}, } @article{Borin-Lars2010-129126, title = {Swedish FrameNet++}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios}, year = {2010}, } @techreport{Dannélls-Dana2010-179443, title = {MapServer at Språkbanken}, author = {Dannélls, Dana}, year = {2010}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @inProceedings{Borin-Lars2009-110343, title = {Thinking Green: Toward Swedish FrameNet++}, abstract = {Access to multi-layered lexical, grammatical and semantic information representing text content is a prerequisite for efficient automatic understanding and generation of natural language. A FrameNet is considered a valuable resource for both linguistics and language technology research that may contribute to the achievement of these goals. Currently, FrameNet-like resources exist for a few languages,1 including some domain-specific and multilingual initiatives (Dolbey et al., 2006; Boas, 2009; Uematsu et al., 2009; Venturi et al., 2009), but are unavailable for most languages, including Swedish, although there have been some pilot studies exploring the semi-automatic acquisition of Swedish frames (Johansson & Nugues, 2006; Borin et al., 2007). At the University of Gothenburg, we are now embarking on a project to build a Swedish FrameNet-like resource. A novel feature of this project is that the Swedish FrameNetwill be an integral part of a largermany-faceted lexical resource. Hence the name Swedish FrameNet++ (SweFN++). }, booktitle = {FrameNet Masterclass and Workshop}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios}, year = {2009}, } @inProceedings{Dannélls-Dana2009-95195, title = {The Value of Weights in Automatically Generated Text Structures}, abstract = {One question that arises if we want to evolve generation techniques to accommodate Web ontologies is how to capture and expose the relevant ontology content to the user. This paper presents an attempt to answer the question about how to select the ontology statements that are significant for the user and present those statements in a way that helps the user to learn. Our generation approach combines bottom-up and top-down techniques with enhanced comparison methods to tailor descriptions about a concept described in an ontology. A preliminary evaluation indicates that the process of computing preferable property weights in addition to enhanced generation methods has a positive effect on the text structure and its content. Future work aims to assign grammar rules and lexical entries in order to produce coherent texts that follow on from the generated text structures in several languages.}, booktitle = {Proceedings of the 10th International Conference on Intelligent Text Processing and Computational Linguistics}, author = {Dannélls, Dana}, year = {2009}, number = {LNCS 5449}, pages = {233--244}, } @inProceedings{Dannélls-Dana2009-104122, title = { Improving Information Access to Cultural Content through Discourse Strategies.}, abstract = {This paper describes a grammar driven approach for generating multilingual cultural heritage information of objects held by museums and galleries. Discourse strategies are utilized to select and organize onto- logical statements. The discourse structure is translated to abstract grammar specifications that are mapped to natural language. }, booktitle = {Workshop proceedings of the eleventh International Conference of the Italian Association for Artificial Intelligence (AI*IA)}, author = {Dannélls, Dana}, year = {2009}, ISBN = {978-88-903581-1-1}, } @inProceedings{Dannélls-Dana2008-73692, title = {Generating Tailored Texts for Museum Exhibits}, abstract = {This paper reports work that aims to generate texts in multiple languages from ontologies following the Conceptual Reference Model (CRM) ISO standard for conceptual models of museums. The rationale of this work is to increase users’ knowledge and interest in the cultural heritage domain by allowing the user to select his preferable syntax presentation and influence the order of the generated information using generation techniques and Semantic Web technologies. We chose for study a small amount of logical relations represented in the ontology and wrote a grammar that is capable to describe them in natural language through user editing. We present the multilingual source authoring environment, which is built upon the grammatical framework (GF) formalism and show how it is utilized to generate multiple texts from the CRM domain ontology. The initial results comprise texts, which vary in syntax and content.}, booktitle = {Proceedings of the 6th edition of LREC 2008, Workshop on Language Technology for Cultural Heritage Data (LaTeCH), Marrakech, Morocco.}, author = {Dannélls, Dana}, year = {2008}, pages = {17--20}, } @inProceedings{Dannélls-Dana2008-73695, title = {A System Architecture for Conveying Historical Knowledge to Museum Visitors}, abstract = {One of the requirements posed by cultural organizations is how to accommodate cultural content of formal ontology object descriptions to different user needs. This paper introduces a personal museum guide system architecture that is being developed to exploit linguistic aspects of realization of a domain-specific ontology in relation to the user’s interaction with this ontology.}, booktitle = {Proceedings of ECDL 2008, Århus, Danmark}, author = {Dannélls, Dana}, year = {2008}, ISBN = {978-90-813489-1-1}, } @inProceedings{Dannélls-Dana2008-73693, title = {The production of documents from ontologies}, abstract = {The production of documents from an ontology is a challenging task which requires a significant effort from a natural language generator. Addressing this problem involves a careful examination of how the knowledge formalized in an ontology can be verbalized and realized.We have started to exploit the abilities of generating natural language texts from a Web Ontology Language (OWL) and to examine how the content of the ontology can be rendered in natural language texts that support reader and listener preferences. In this paper we present our line of research and exemplify some of the difficulties we encountered while attempting to generate fragments of texts from a domain specific ontology.}, booktitle = {Proceedings of the 18th European Conference on Artificial Intelligence (ECAI). Workshop on Contexts and Ontologies, Patras, Greece.}, author = {Dannélls, Dana}, year = {2008}, pages = {36--38}, } @inProceedings{Dannélls-Dana2007-66462, title = {Multilingual generation of medical information}, abstract = {Multilingual generation systems aim to produce understandable texts in multiple languages from one knowledge representation. We adapted an existing prototype multilingual generator that presents simulated breast cancer Electronic Health Records (EHRs) in English to French and Swedish. The purpose of this work was to test how much effort it would require to modify this limited-domain, template-based English generator to enable it to generate in French and Swedish. We describe the adaptation to both languages, viewing the grammatical aspects involved and explaining the modifications performed. This work illustrates how the same underlying knowledge representation can be used to generate output texts in multiple languages with only minor linguistic modifications.}, booktitle = {In the 9th Bar-Ilan Symposium on the Foundations of Artificial Intelligence (BISFAI) }, author = {Dannélls, Dana and Deléger, Louise}, year = {2007}, } @inProceedings{Kokkinakis-Dimitrios2006-33936, title = {Recognizing Acronyms and their Definitions in Swedish Medical Texts}, abstract = {This paper addresses the task of recognizing acronym-definition pairs in Swedish (medical) texts as well as the compilation of a freely available sample of such manually annotated pairs. A material suitable not only for supervised learning experiments, but also as a testbed for the evaluation of the quality of future acronym-definition recognition systems. There are a number of approaches to the identification described in the literature, particularly within the biomedical domain, but none of those addresses the variation and complexity exhibited in a language other than English. This is realized by the fact that we can have a mixture of two languages in the same document and/or sentence, i.e. Swedish and English; that Swedish is a compound language that significantly deteriorates the performance of previous approaches (without adaptations) and, most importantly, the fact that there is a large variation of possible acronym-definition permutations realized in the analysed corpora, a variation that is usually ignored in previous studies. }, booktitle = {roceedings of the 5th Languages Resources and Evalutaion (LREC). }, author = {Kokkinakis, Dimitrios and Dannélls, Dana}, year = {2006}, } @inProceedings{Dannélls-Dana2006-66478, title = {Automatic Acronym Recognition}, abstract = {This paper deals with the problem of recognizing and extracting acronym- definition pairs in Swedish medical texts. This project applies a rule-based method to solve the acronym recognition task and compares and evaluates the results of different machine learning algorithms on the same task. The method proposed is based on the approach that acronym-definition pairs follow a set of patterns and other regularities that can be usefully applied for the acronym identification task. Supervised machine learning was applied to monitor the performance of the rule-based method, using Memory Based Learning (MBL). The rule-based algorithm was evaluated on a hand tagged acronym corpus and performance was measured using standard measures recall, precision and f-score. The results show that performance could further improve by increasing the training set and modifying the input settings for the machine learning algorithms. An analysis of the errors produced indicates that further improvement of the rule-based method requires the use of syntactic information and textual pre-processing.}, booktitle = {Proceedings of the 11th conference on European chapter of the Association for Computational Linguistics (EACL)}, author = {Dannélls, Dana}, year = {2006}, ISBN = {1-932432-59-0}, }