Hoppa till huvudinnehåll

BibTeX

@inProceedings{r?dveneide-2019-swedish-289474,
	title        = {The Swedish PoliGraph},
	abstract     = {As part of a larger project on argument mining of Swedish parliamentary data, we have created a semantic graph that, together with named entity recognition and resolution (NER), should make it easier to establish connections between arguments in a given debate. The graph is essentially a semantic database that keeps track of Members of Parliament (MPs), in particular their presence in the parliament and activity in debates, but also party affiliation and participation in commissions. The hope is that the Swedish PoliGraph will enable us to perform named entity resolution on debates in the Swedish parliament with a high accuracy, with the aim of determining to whom an argument is directed.},
	booktitle    = {Proceedings of the 6th Workshop on Argument Mining, August 1, 2019 Florence, Italy / Benno Stein, Henning Wachsmuth (Editors)},
	author       = {Rødven-Eide, Stian },
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-33-8},
}

@inProceedings{r?dveneide-etal-2016-swedish-250073,
	title        = {The Swedish Culturomics Gigaword Corpus: A One Billion Word Swedish Reference Dataset for NLP},
	abstract     = {In this paper we present a dataset of contemporary Swedish containing one billion words. The dataset consists of a wide range of sources, all annotated using a state-of-the-art corpus annotation pipeline, and is intended to be a static and clearly versioned dataset. This will facilitate reproducibility of experiments across institutions and make it easier to compare NLP algorithms on contemporary Swedish. The dataset contains sentences from 1950 to 2015 and has been carefully designed to feature a good mix of genres balanced over each included decade. The sources include literary, journalistic, academic and legal texts, as well as blogs and web forum entries.},
	booktitle    = {Linköping Electronic Conference Proceedings. Digital Humanities 2016. From Digitization to Knowledge 2016: Resources and Methods for Semantic Processing of Digital Works/Texts, July 11, 2016, Krakow, Poland},
	author       = {Rødven-Eide, Stian  and Tahmasebi, Nina and Borin, Lars},
	year         = {2016},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-733-5},
}

@inProceedings{rouces-etal-2018-defining-264721,
	title        = {Defining a gold standard for a Swedish sentiment lexicon: Towards higher-yield text mining in the digital humanities},
	abstract     = {There is an increasing demand for multilingual sentiment analysis, and most work on
sentiment lexicons is still carried out based on English lexicons like WordNet. In addition, many
of the non-English sentiment lexicons that do exist have been compiled by (machine) translation
from English resources, thereby arguably obscuring possible language-specific characteristics
of sentiment-loaded vocabulary. In this paper we describe the creation from scratch of a gold
standard for the sentiment annotation of Swedish terms as a first step towards the creation of a
full-fledged sentiment lexicon for Swedish.},
	booktitle    = {CEUR Workshop Proceedings vol. 2084.  Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018.  Edited by  Eetu Mäkelä Mikko Tolonen Jouni Tuominen },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@inProceedings{rouces-etal-2018-generating-264719,
	title        = {Generating a Gold Standard for a Swedish Sentiment Lexicon},
	abstract     = {We create a gold standard for sentiment annotation of Swedish terms, using the freely available SALDO lexicon and the Gigaword
corpus. For this purpose, we employ a multi-stage approach combining corpus-based frequency sampling, direct score annotation and
Best-Worst Scaling. In addition to obtaining a gold standard, we analyze the data from our process and we draw conclusions about the
optimal sentiment model.},
	booktitle    = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, May 7-12, 2018, Miyazaki (Japan)},
	author       = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-00-9},
}

@inProceedings{rouces-etal-2018-sensaldo-264720,
	title        = {SenSALDO: Creating a Sentiment Lexicon for Swedish},
	abstract     = {The natural language processing subfield known as sentiment analysis or opinion mining has seen an explosive expansion over the
last decade or so, and sentiment analysis has become a standard item in the NLP toolbox. Still, many theoretical and methodological
questions remain unanswered and resource gaps unfilled. Most work on automated sentiment analysis has been done on English and
a few other languages; for most written languages of the world, this tool is not available. This paper describes the development of an
extensive sentiment lexicon for written (standard) Swedish. We investigate different methods for developing a sentiment lexicon for
Swedish. We use an existing gold standard dataset for training and testing. For each word sense from the SALDO Swedish lexicon,
we assign a real value sentiment score in the range [-1,1] and produce a sentiment label. We implement and evaluate three methods:
a graph-based method that iterates over the SALDO structure, a method based on random paths over the SALDO structure and a
corpus-driven method based on word embeddings. The resulting sense-disambiguated sentiment lexicon (SenSALDO) is an open source
resource and freely available from Språkbanken, The Swedish Language Bank at the University of Gothenburg.},
	booktitle    = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, 7-12 May 2018, Miyazaki (Japan)},
	author       = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-00-9},
}

@inProceedings{volodina-etal-2014-what-206132,
	title        = {You get what you annotate: a pedagogically annotated corpus of coursebooks for Swedish as a Second Language.},
	abstract     = {We present the COCTAILL corpus, containing over 700.000 tokens of Swedish texts from 12 coursebooks aimed at second/foreign  language (L2) learning. Each text in the corpus is labelled with a proficiency level according to the CEFR proficiency scale. Genres, topics, associated activities, vocabulary lists and other types of information are annotated in the coursebooks to facilitate Second Language Acquisition (SLA)-aware studies and experiments aimed at
Intelligent Computer-Assisted Language Learning (ICALL). Linguistic annotation in the form of parts-of-speech (POS; e.g. nouns, verbs), base forms (lemmas) and syntactic relations (e.g. subject, object) has been also added to the corpus.
In the article we describe our annotation scheme and the editor we have developed for the content mark-up of the coursebooks, including the taxonomy of pedagogical activities and linguistic skills. Inter-annotator agreement has been computed and reported
on a subset of the corpus.
Surprisingly, we have not found any other examples of pedagogically marked-up corpora based on L2 coursebooks to draw on existing experiences. Hence, our work may be viewed as “groping in the darkness” and eventually a starting point for others.
The paper also presents our first quantitative exploration of the corpus where we focus on
textually and pedagogically annotated features of the coursebooks to exemplify what types of studies can be performed using the presented annotation scheme. We explore trends shown in use of topics and genres over proficiency levels and compare pedagogical
focus of exercises across levels.
The final section of the paper summarises the potential this corpus holds for research within SLA and various ICALL tasks. },
	booktitle    = {NEALT Proceedings Series},
	author       = {Volodina, Elena and Pilán, Ildikó and Rødven-Eide, Stian  and Heidarsson, Hannes},
	year         = {2014},
	volume       = {22},
	ISBN         = {978-91-7519-175-1},
	pages        = {128--144},
}