Språkbanken Text är en avdelning inom Språkbanken.
BibTeX

@inProceedings{ahlfeldt-matsson-2024-digarv-334595,
	title        = {The DIGARV Platform: A collaborative platform for working with cultural heritage data and research data},
	abstract     = {This article covers an easy-to-use research tool for collaborative work. The tool has been adapted for structured data and high-resolution images within four research projects at GRIDH. The platform is especially designed for working with temporal and spatial data. Furthermore, the platform gives researchers access to a relational database system through input forms and access to external cultural heritage data including high-resolution images. This way the platform also aims to utilize external data published as Linked Open Data (LOD) and, at the same time, prepare its own research data for publishing as LOD. Because of the spatial and temporal nature of the data, it is visualized in time and space through maps and timelines to give overview and context during the data management phase.},
	booktitle    = {Proceedings of the Huminfra Conference, 10-11 January, 2024, Gothenburg, Sweden},
	editor       = {Elena Volodina and Gerlof Bouma and Markus Forsberg and Dimitrios Kokkinakis and David Alfter and Mats Fridlund and Christian Horn and Lars Ahrenberg and Anna Blåder},
	author       = {Åhlfeldt, Johan  and Matsson, Arild},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping },
	ISBN         = {978-91-8075-512-2},
}

@incollection{alimohammed-etal-2022-annotation-321989,
	title        = {Annotation Management Tool: A Requirement for Corpus Construction},
	abstract     = {We present an annotation management tool, SweLL portal, that has been developed for the purposes of the SweLL infrastructure project for building a learner corpus of Swedish (Volodina et al., 2019). The SweLL portal has been used for supervised access to the database, data versioning, import and export of data and metadata, statistical overview, administration of annotation tasks, monitoring of annotation tasks and reliability controls. The development of the portal was driven by visions of longitudinal sustainable data storage and was partially shaped by situational needs reported by portal users, including project managers, researchers, and annotators.},
	booktitle    = {Selected Papers from the CLARIN Annual Conference 2021, Virtual Event, 2021, 27–29 September / Monica Monachini and Maria Eskevich (eds.)},
	author       = {Ali Mohammed, Yousuf and Matsson, Arild and Volodina, Elena},
	year         = {2022},
	publisher    = {Linköping Electronic Conference },
	address      = {Linköping, Sweden},
	ISBN         = {978-91-7929-444-1},
	pages        = {101--108},
}

@inProceedings{humlesjo-etal-2024-queerlit-334589,
	title        = {Queerlit – a bibliography of Swedish fiction with LGBTQI topics},
	abstract     = {This paper summarizes the project Queerlit: Metadata and Searchability for LGBTQ+ Literary Heritage 2020-2023 and discusses some challenges in the development of this resource. The Queerlit project consist of four parts: 1. Creating a bibliography of Swedish fiction with LGBTQI themes 2. Creating a Swedish thesaurus (QLIT), adapted from the of the linked open data thesaurus Homosaurus 3. Assigning all material in the bibliography with subject headings from QLIT. 4. A web user interface for searching the material All four parts are integrated with the Swedish union catalog, Libris, making the results of the project available for all under a CC0 license. QLIT is the first external thesaurus integrated in the linked open data framework used in the technical platform of Libris, XL. The bibliography spans from rune stones from the 7th century to recently published fiction. When applying subject headings for the material both general aspects of the work and specific LGBTQI topics are described, making this the most comprehensive retrospective indexing project of Swedish literature to date. The underlying knowledge organization is made a prominent method of interacting with the search interface, which is empirically designed around the needs of various user groups.},
	booktitle    = {Proceedings of the Huminfra Conference,  10-11 January 2024, Gothenburg, Sweden / Editors: Elena Volodina, Gerlof Bouma, Markus Forsberg, Dimitrios Kokkinakis, David Alfter, Mats Fridlund, Christian Horn, Lars Ahrenberg, Anna Blåder},
	author       = {Humlesjö, Siska  and Bergenmar, Jenny and Matsson, Arild},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-512-2},
}

@inProceedings{matsson-kristrom-2023-building-329957,
	title        = {Building and Serving the Queerlit Thesaurus as Linked Open Data},
	abstract     = {This paper describes the creation of the Queer Literature Indexing Thesaurus (QLIT) as well as the digital infrastructure supporting the workflow for editing and publishing it. The purpose of QLIT is to adequately catalogue Swedish fiction with LGBTQI themes. It is continually edited in plain-text RDF and automatically processed for correctness and storage. Finally, it is published online as Linked Open Data and used with external systems. The technical approach relies on scripts and applications developed ad hoc, rather than existing solutions. Code is available on https://github.com/gu-gridh/queerlit-terms},
	booktitle    = {DHNB2023 Conference Proceedings. Sustainability: Environment - Community - Data. The 7thDigital Humanities in the Nordic and BalticCountries Conference. Oslo – Stavanger – Bergen, Norway. March 8–10, 2023},
	author       = {Matsson, Arild and Kriström, Olov},
	year         = {2023},
	publisher    = {Universitetet i Oslo},
	address      = {Oslo},
}

@inProceedings{volodina-etal-2020-towards-300069,
	title        = {Towards Privacy by Design in Learner Corpora Research: A Case of On-the-fly Pseudonymization of Swedish Learner Essays},
	abstract     = {This article reports on an ongoing project aiming at automatization of pseudonymization of learner essays. The process includes three steps: identification of personal information in an unstructured text, labeling for a category, and pseudonymization. We experiment with rule-based methods for detection of 15 categories out of the suggested 19 (Megyesi et al., 2018) that we deem important and/or doable with automatic approaches. For the detection and labeling steps, we use resources covering personal names, geographic names, company and university names and others. For the pseudonymization step, we replace the item using another item of the same type from the above-mentioned resources. Evaluation of the detection and labeling steps are made on a set of manually anonymized essays. The results are promising and show that 89% of the personal information can be successfully identified in learner data, and annotated correctly with an inter-annotator agreement of 86% measured as Fleiss kappa and Krippendorff's alpha.},
	booktitle    = {Proceedings of the 28th International Conference on Computational Linguistics (COLING), December 8-13, 2020, Barcelona, Spain (Online)},
	author       = {Volodina, Elena and Ali Mohammed, Yousuf and Derbring, Sandra and Matsson, Arild and Megyesi, Beata},
	year         = {2020},
	publisher    = {International Committee on Computational Linguistics},
	ISBN         = {978-1-952148-27-9},
}

@inProceedings{matsson-etal-2019-imagettr-284011,
	title        = {ImageTTR: Grounding Type Theory with Records in Image Classification for Visual Question Answering},
	abstract     = {We present ImageTTR, an extension to the Python implementation of Type Theory with Records (pyTTR) which connects formal record type representation with image classifiers implemented as deep neural networks. The Type Theory with Records framework serves as a knowledge representation system for natural language the representations of which are grounded in perceptual information of neural networks. We demonstrate the benefits of this symbolic and data-driven hybrid approach on the task of visual question answering.},
	booktitle    = {Proceedings of the IWCS 2019 Workshop on Computing Semantics with Types, Frames and Related Structures, May 24, 2019, Gothenburg, Sweden / Rainer Osswald, Christian Retoré, Peter Sutton (Editors)},
	author       = {Matsson, Arild and Dobnik, Simon and Larsson, Staffan},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-950737-25-3},
}

@article{volodina-etal-2019-swell-285609,
	title        = {The SweLL Language Learner Corpus: From Design to Annotation},
	abstract     = {The article presents a new language learner corpus for Swedish, SweLL, and the methodology from collection and pesudonymisation to protect personal information of learners to annotation adapted to second language learning. The main aim is to deliver a well-annotated corpus of essays written by second language learners of Swedish and make it available for research through a browsable environment. To that end, a new annotation tool and a new project management tool have been implemented, – both with the main purpose to ensure reliability and quality of the final corpus. In the article we discuss reasoning behind metadata selection, principles of gold corpus compilation and argue for separation of normalization from correction annotation.},
	journal      = {Northern European Journal of Language Technology},
	author       = {Volodina, Elena and Granstedt, Lena and Matsson, Arild and Megyesi, Beáta and Pilán , Ildikó  and Prentice, Julia and Rosén, Dan and Rudebeck, Lisa  and Schenström, Carl-Johan and Sundberg, Gunlög and Wirén, Mats},
	year         = {2019},
	volume       = {6},
	pages        = {67--104},
}

@inProceedings{volodina-etal-2019-svala-285617,
	title        = {SVALA: an Annotation Tool for Learner Corpora generating parallel texts},
	abstract     = {Learner corpora are actively used for research on Language Acquisition and in Learner Corpus Research (LCR).  The  data  is,  however,  very  expensive  to  collect  and  manually  annotate,  and  includes  steps  like  anonymization,  normalization, error annotation, linguistic annotation. In the past, projects often re - used tools from a number of  different projects for the above steps. As a result, various input and output formats between the tools needed to  be converted, which increased the complexity of the task. In  the  present  project,  we  are  developing  a  tool  that  handles  all  of  the  above - mentioned  steps  in  one  environment maintaining a stable interpretable  format between the  steps. A distinguishing feature of the tool is  that users work in a usual environment (plain text) while the tool visualizes all performed edits via a graph that  links an original learner text with an edited one, token by token.},
	booktitle    = {Learner Corpus Research conference (LCR-2019), Warsaw, 12-14 September 2019, Book of abstracts},
	author       = {Volodina, Elena and Matsson, Arild and Rosén, Dan and Wirén, Mats },
	year         = {2019},
}

@inProceedings{wiren-etal-2018-svala-285624,
	title        = {SVALA: Annotation of Second-Language Learner Text Based on Mostly Automatic Alignment of Parallel Corpora},
	abstract     = {Annotation of second-language learner text is a cumbersome manual task which in turn requires interpretation to postulate the intended meaning of the learner’s language. This paper describes SVALA, a tool which separates the logical steps in this process while providing rich visual support for each of them. The first step is to pseudonymize the learner text to fulfil the legal and ethical requirements for a distributable learner corpus. The second step is to correct the text, which is carried out in the simplest possible way by text editing. During the editing, SVALA automatically maintains a parallel corpus with alignments between words in the learner source text and corrected text, while the annotator may repair inconsistent word alignments. Finally, the actual labelling of the corrections (the postulated errors) is performed. We describe the objectives, design and workflow of SVALA, and our plans for further development.
},
	booktitle    = {Selected papers from the CLARIN Annual Conference 2018, Pisa, 8-10 October 2018},
	editor       = {Inguna Skadina and Maria Eskevich},
	author       = {Wirén, Mats and Matsson, Arild and Rosén, Dan and Volodina, Elena},
	year         = {2018},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7685-034-3},
}
Sidansvarig: sb-webb