@inProceedings{Dannélls-Dana2018-271181, title = {Integrating language resources in two OCR engines to improve processing of historical Swedish text.}, abstract = {We are aiming to address the difficulties that many History and Social Sciences researchers struggle with to bring in non-digitized text into language analysis workflows. In this paper we present the language resources and material we used for training two Optical Character Recognition engines for processing historical Swedish text written in Fraktur (blackletter). The trained models, resources and dictionaries are freely available and accessible through our web service, hosted at Språkbanken, to enable users and developers easy access for extraction of historical Swedish text a that are only available in images for further processing.}, booktitle = {CLARIN Annual Conference}, author = {Dannélls, Dana and Olsson, Leif-Jöran}, year = {2018}, } @inProceedings{Bergenmar-Jenny2015-228773, title = {Tracing Cultural Transfer Through Multiple Translation Analysis. The Case of the Swedish 19th-Century Bourgeois Novel in German and Czech}, abstract = {In the last decades, Comparative Literature has become more directed towards questions of transculturality. This renders translations of literary texts an important role as a vehicle not just for the transfer of text and language, but also of ideas and cultures. Digital methods for comparing multiple translations within and across languages might prove to be important for exploring how, for example, a Swedish 19th century bourgeois novel is reframed in Czech translations. The chosen example is A Merchant House (1859) by Emilie Flygare–Carlén (1807–1892) who was one of the most popular authors in Czech speaking regions in the late 19th Century. In this paper existing collation tools are used for comparing two different Czech translations (1872 and 1910), by two different translators. This might both reveal how the gender, context and position of the translator colours the literary text and how the translations are adapted to changing literary trends. Furthermore, parallel text alignment is tried as a method for comparing across languages, since the Czech translation is made from a German translation. Are the Czech translations subject to “foreignization” or “domestication”? Or do they retain the same traits as the German translation, which is the source of the first Czech translation? Does the systematic comparison of multiple translations contribute to the understanding of how texts move from certain gendered cultural contexts and ideologies to others? }, booktitle = {Digital Literary Studies. International Conference May 14-15 2015, Coimbra, Portugal}, author = {Bergenmar, Jenny and Olsson, Leif-Jöran}, year = {2015}, } @article{Borin-Lars2014-198286, title = {Geographic visualization of place names in Swedish literary texts}, abstract = {This article describes the development of a geographical information system (GIS) at Språkbanken as part of a visualization solution to be used in an archive of historical Swedish literary texts. The research problems we are aiming to address concern orthographic and morphological variation, missing place names, and missing place name coordinates. Some of these problems form a central part in the development of methods and tools for the automatic analysis of historical Swedish literary texts at our research unit. We discuss the advantages and challenges of covering large-scale spelling variation in place names from different sources and in generating maps with focus on different time periods. }, author = {Borin, Lars and Dannélls, Dana and Olsson, Leif-Jöran}, year = {2014}, volume = {29}, number = {3}, pages = {400--404}, } @inProceedings{Lyngfelt-Benjamin2014-208457, title = {Ett svenskt konstruktikon. Grammatik möter lexikon}, booktitle = {Svenskans beskrivning : Förhandlingar vid Trettiotredje sammankomsten för svenskans beskrivning. Helsingfors den 15–17 maj 2013}, author = {Lyngfelt, Benjamin and Borin, Lars and Bäckström, Linnéa and Forsberg, Markus and Olsson, Leif-Jöran and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia and Uppström, Jonatan}, year = {2014}, volume = {33}, ISBN = {978-951-51-0120-4}, pages = {268--279}, } @inProceedings{Ahlberg-Malin2014-210083, title = {Swedish FrameNet++ The Beginning of the End and the End of the Beginning}, booktitle = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014}, author = {Ahlberg, Malin and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Friberg Heppin, Karin and Johansson, Richard and Kokkinakis, Dimitrios and Olsson, Leif-Jöran and Uppström, Jonatan}, year = {2014}, } @inProceedings{Dannélls-Dana2013-178095, title = {MapServer for Swedish Language Technology}, abstract = {The MapServer application used by the Swedish Language Bank provides new opportunities for visualizing geographical information found in its large repository of written texts, in particular literary texts. The application is capable of performing coordinate search on the basis of recognized place names and rendering both static and dynamic maps that display their geographical locations. }, booktitle = {Digital Humanities}, author = {Dannélls, Dana and Borin, Lars and Olsson, Leif-Jöran}, year = {2013}, } @inProceedings{Ahlberg-Malin2013-178355, title = {Korp and Karp – a bestiary of language resources: the research infrastructure of Språkbanken}, abstract = {A central activity in Språkbanken, an R&D unit at the University of Gothenburg, is the systematic construction of a research infrastructure based on interoperability and widely accepted standards for metadata and data. The two main components of this infrastructure deal with text corpora and with lexical resources. For modularity and flexibility, both components have a backend, or server-side part, accessed through an API made up of a set of well-defined web services. This means that there can be any number of different user interfaces to these components, corresponding, e.g., to different research needs. Here, we will demonstrate the standard corpus and lexicon search interfaces, designed primarily for linguistic searches: Korp and Karp.}, booktitle = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013), May 22–24, 2013, Oslo University, Norway. NEALT Proceedings Series 16}, author = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Hammarstedt, Martin and Olsson, Leif-Jöran and Olsson, Olof and Roxendal, Johan and Uppström, Jonatan}, year = {2013}, number = {16}, pages = {429--433}, } @inProceedings{Borin-Lars2013-186032, title = {The lexical editing system of Karp}, abstract = {Karp is the open lexical infrastructure of Språkbanken (the Swedish Language Bank). The infrastructure has three main functions: (1) to support the work on creating, curating, and integrating our various lexical resources; (2) to publish the resources, making them searchable and downloadable; and (3) to offer advanced editing functionalities. An important feature of the lexical infrastructure is also that we maintain a strong bidirectional connection to our corpus infrastructure. At the heart of the infrastructure is the SweFN++ project with the goal to create free Swedish lexical resources geared towards language technology applications. The infrastructure currently hosts 23 Swedish lexical resources. The resources are integrated through links to a pivot lexical resource, SALDO, a large morphological and lexical-semantic resource for modern Swedish.}, booktitle = {Kosem, I., Kallas, J., Gantar, P., Krek, S., Langemets, M., Tuulik, M. (eds.) 2013. Electronic lexicography in the 21st century: thinking outside the paper. Proceedings of the eLex 2013 conference, 17-19 October 2013, Tallinn, Estonia.}, author = {Borin, Lars and Forsberg, Markus and Olsson, Leif-Jöran and Olsson, Olof and Uppström, Jonatan}, year = {2013}, volume = {2013}, ISBN = { 978-961-93594-0-2}, pages = {503--516}, } @inProceedings{Sköldberg-Emma2013-186041, title = {Between Grammars and Dictionaries: a Swedish Constructicon }, abstract = {This paper introduces the Swedish Constructicon (SweCxn), a database of Swedish constructions currently under development. We also present a small study of the treatment of constructions in Swedish (paper) dictionaries, thus illustrating the need for a constructionist approach, and discuss three different methods used to identify potential constructions for inclusion in the constructicon. SweCxn is a freely available electronic resource, with a particular focus on semi-general linguistic patterns of the type that are difficult to account for from a purely lexicographic or a purely grammatical perspective, and which therefore have tended to be neglected in both dictionaries and grammars. Far from being a small set of borderline cases, such constructions are both numerous and common. They are also quite problematic for second language acquisition as well as LT applications. Accordingly, various kinds of multi-word units have received more attention in recent years, not least from a lexicographic perspective. The coverage, however, is only partial, and the productivity of many constructions is hard to capture from a lexical viewpoint. To identify constructions for SweCxn, we use a combination of methods, such as working from existing construction descriptions for Swedish and other languages, applying LT tools to discover recurring patterns in texts, and extrapolating constructional information from dictionaries. }, booktitle = {Kosem, I., Kallas, J., Gantar, P., Krek, S., Langemets, M., Tuulik, M. (eds.) 2013. Electronic lexicography in the 21st century: thinking outside the paper. Proceedings of the eLex 2013 conference, 17-19 October 2013, Tallinn, Estonia. Ljubljana/Tallinn: Trojina, Institute for Applied Slovene Studies/Eesti Keele Instituut.}, author = {Sköldberg, Emma and Bäckström, Linnéa and Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Olsson, Leif-Jöran and Prentice, Julia and Rydstedt, Rudolf and Tingsell, Sofia and Uppström, Jonatan}, year = {2013}, pages = {310--327}, } @inProceedings{Borin-Lars2012-156079, title = {The open lexical infrastructure of Språkbanken}, abstract = {We present our ongoing work on Karp, Språkbanken’s (the Swedish Language Bank) open lexical infrastructure, which has two main functions: (1) to support the work on creating, curating, and integrating our various lexical resources; and (2) to publish daily versions of the resources, making them searchable and downloadable. An important requirement on the lexical infrastructure is also that we maintain a strong bidirectional connection to our corpus infrastructure. At the heart of the infrastructure is the SweFN++ project with the goal to create free Swedish lexical resources geared towards language technology applications. The infrastructure currently hosts 15 Swedish lexical resources, including historical ones, some of which have been created from scratch using existing free resources, both external and in-house. The resources are integrated through links to a pivot lexical resource, SALDO, a large morphological and lexical-semantic resource for modern Swedish. SALDO has been selected as the pivot partly because of its size and quality, but also because its form and sense units have been assigned persistent identifiers (PIDs) to which the lexical information in other lexical resources and in corpora are linked.}, booktitle = {Proceedings of the 8th International Conference on Language Resources and Evaluation : May 23-25, 2012 / eds. Nicoletta Calzolari }, author = {Borin, Lars and Forsberg, Markus and Olsson, Leif-Jöran and Uppström, Jonatan}, year = {2012}, ISBN = {978-2-9517408-7-7}, pages = {3598--3602}, } @inProceedings{Bergenmar-Jenny2012-169845, title = {Connecting European Women Writers. The Selma Lagerlöf Archive and Women Writers Database}, booktitle = {Digital Humanities 2012. 16-20 July 2012, Hamburg. Book of Abstracts}, author = {Bergenmar, Jenny and Olsson, Leif-Jöran}, year = {2012}, } @techreport{Borin-Lars2011-142495, title = {Metadata descriptions and other interoperability standards}, abstract = {An important aim of META-NORD is to upgrade and harmonize national language resources and tools in order to make them interoperable, within languages and across languages, with respect to their data formats and as far as possible also as regards their content. Since resources and to some extent tools will remain in one location – one of a number of META-NORD centers – the preferred way of accessing and utilizing resources and tools will be through metadata and APIs, allowing the assembly of on-the-fly tool-chains made up of standardized component language technology tools, processing distributed – and in many cases interlinked – language resources in standardized formats.}, author = {Borin, Lars and Lindh, Jonas and Brandt, Martha and Olsson, Leif-Jöran}, year = {2011}, } @inProceedings{Olsson-Leif-Jöran2008-116044, title = {Välkommen till eXist}, abstract = {A walkthrough of the versatility of the native XML database eXist, (http://exist-db.org), combined with overwiews of XPath and XQuery.}, booktitle = {FSCONS (http://fscons.org), 24–26 oktober 2008, Göteborg}, author = {Olsson, Leif-Jöran}, year = {2008}, } @inProceedings{Borin-Lars2007-44954, title = {Naming the past: Named entity and animacy recognition in 19th century Swedish literature}, booktitle = {ACL 2007 Workshop on Language Technology for Cultural Heritage Data (LaTeCH 2007)}, author = {Borin, Lars and Kokkinakis, Dimitrios and Olsson, Leif-Jöran}, year = {2007}, pages = {1--8}, } @inProceedings{Olsson-Leif-Jöran2007-66850, title = {How do you do eXist}, booktitle = {Javaforum 2007-05-23}, author = {Olsson, Leif-Jöran}, year = {2007}, } @inProceedings{Borin-Lars2006-116093, title = {ITG-plattformen som korpusverktyg}, abstract = {En genomgång och handfast presentation om hur ITG-plattformen kan användas som korpusverktyg.}, booktitle = {Fjärde svenska lingvistikkonferensen (Sling 2006), 27–28 april 2006, Stockholm}, author = {Borin, Lars and Olsson, Leif-Jöran}, year = {2006}, } @inProceedings{Olsson-Leif-Jöran2006-116086, title = {Does the ITG platform eXist?}, abstract = {An overview of the ITG platform and its backend eXist (http://exist-db.org). }, booktitle = {XML Prague 2006 (http://xmlprague.cz), 17–18 juni 2006, Prag}, author = {Olsson, Leif-Jöran}, year = {2006}, }