Hoppa till huvudinnehåll
Språkbanken Text är en avdelning inom Språkbanken.

BibTeX

@inProceedings{rouces-etal-2020-creating-290695,
	title        = {Creating an Annotated Corpus for Aspect-Based Sentiment Analysis in Swedish},
	abstract     = {Aspect-Based Sentiment Analysis constitutes a more fine-grained alternative to traditional sentiment analysis at sentence level. In addition to a sentiment value denoting how positive or negative a particular opinion or sentiment expression is, it identifies additional aspects or 'slots' that characterize the opinion. Some typical aspects are target and source, i.e. who holds the opinion and about which entity or aspect is the opinion. We present a large Swedish corpus annotated for Aspect-Based Sentiment Analysis. Each sentiment expression is annotated as a tuple that contains the following fields: one among 5 possible sentiment values, the target, the source, and whether the sentiment expressed is ironic.  In addition, the linguistic element that conveys the sentiment is identified too. Sentiment for a particular topic is also annotated at title, paragraph and document level.
The documents are articles obtained from two Swedish media (Svenska Dagbladet and Aftonbladet) and one online forum (Flashback), totalling around 4000 documents. The corpus is freely available and we plan to use it for training and testing an Aspect-Based Sentiment Analysis system.},
	booktitle    = {Proceedings of the 5th conference in Digital Humanities in the Nordic Countries, Riga, Latvia, October 21-23, 2020.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {CEUR Workshop Proceedings},
}

@inProceedings{schlechtweg-etal-2020-semeval-295463,
	title        = {SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {Lexical Semantic Change detection, i.e., the task of identifying words that change meaning over time, is a very active research area, with applications in NLP, lexicography, and linguistics. Evaluation is currently the most pressing problem in Lexical Semantic Change detection, as no gold standards are available to the community, which hinders progress. We present the results of the first shared task that addresses this gap by providing researchers with an evaluation framework and manually annotated, high-quality datasets for English, German, Latin, and Swedish. 33 teams submitted 186 systems, which were evaluated on two subtasks. },
	booktitle    = {Proceedings of the Fourteenth Workshop on Semantic Evaluation (SemEval2020), Barcelona, Spain (Online), December 12, 2020.},
	author       = {Schlechtweg, Dominik and McGillivray, Barbara  and Hengchen, Simon and Dubossarsky, Haim  and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {ACL},
}

@misc{schlechtweg-etal-2020-post-295466,
	title        = {Post-Evaluation Data for SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {This data collection contains the post-evaluation data for SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection: (1) the starting kit to download data, and examples for competing in the CodaLab challenge including baselines; (2) the true binary change scores of the targets for Subtask 1, and their true graded change scores for Subtask 2 (test_data_truth/); (3)the scoring program used to score submissions against the true test data in the evaluation and post-evaluation phase (scoring_program/); and (4) the results of the evaluation phase including, for example, analysis plots (plots/) displaying the results:},
	author       = {Schlechtweg, Dominik and McGillivray, Barbara and Hengchen, Simon and Dubossarsky, Haim and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {Zenodo},
}

@misc{tahmasebi-etal-2020-swedish-295465,
	title        = {Swedish Test Data for SemEval 2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {This data collection contains the Swedish test data for SemEval 2020 Task 1: Unsupervised Lexical Semantic Change Detection. It consists of a Swedish text corpus pair (corpus1/, corpus2/) and 31 lemmas which have been annotated for their lexical semantic change between the two corpora (targets.txt). We sample from the KubHist2 corpus, digitized by the National Library of Sweden, and available through the Språkbanken corpus infrastructure Korp (Borin et al., 2012). The full corpus is available through a CC BY (attribution) license. Each word for which the lemmatizer in the Korp pipeline has found a lemma is replaced with the lemma. In cases where the lemmatizer cannot find a lemma, we leave the word as is (i.e., unlemmatized, no lower-casing). KubHist contains very frequent OCR errors, especially for the older data.More detail about the properties and quality of the Kubhist corpus can be found in (Adesam et al., 2019).},
	author       = {Tahmasebi, Nina and Hengchen, Simon and Schlechtweg, Dominik and McGillivray, Barbara and Dubossarsky, Haim},
	year         = {2020},
}