@inProceedings{Tahmasebi-Nina2018-264722, title = {A Study on Word2Vec on a Historical Swedish Newspaper Corpus}, abstract = {Detecting word sense changes can be of great interest in the field of digital humanities. Thus far, most investigations and automatic methods have been developed and carried out on English text and most recent methods make use of word embeddings. This paper presents a study on using Word2Vec, a neural word embedding method, on a Swedish historical newspaper collection. Our study includes a set of 11 words and our focus is the quality and stability of the word vectors over time. We investigate if a word embedding method like Word2Vec can be effectively used on texts where the volume and quality is limited.}, booktitle = {CEUR Workshop Proceedings. Vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference, Helsinki Finland, March 7-9, 2018. Edited by Eetu Mäkelä, Mikko Tolonen, Jouni Tuominen }, author = {Tahmasebi, Nina}, year = {2018}, publisher = {University of Helsinki, Faculty of Arts}, adress = {Helsinki}, } @inProceedings{Rouces-Jacobo2018-264721, title = {Defining a gold standard for a Swedish sentiment lexicon: Towards higher-yield text mining in the digital humanities}, abstract = {There is an increasing demand for multilingual sentiment analysis, and most work on sentiment lexicons is still carried out based on English lexicons like WordNet. In addition, many of the non-English sentiment lexicons that do exist have been compiled by (machine) translation from English resources, thereby arguably obscuring possible language-specific characteristics of sentiment-loaded vocabulary. In this paper we describe the creation from scratch of a gold standard for the sentiment annotation of Swedish terms as a first step towards the creation of a full-fledged sentiment lexicon for Swedish.}, booktitle = {CEUR Workshop Proceedings vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018. Edited by Eetu Mäkelä Mikko Tolonen Jouni Tuominen }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina and Eide, Stian Rødven}, year = {2018}, publisher = {University of Helsinki, Faculty of Arts}, adress = {Helsinki}, } @inProceedings{Rouces-Jacobo2018-264720, title = {SenSALDO: Creating a Sentiment Lexicon for Swedish}, abstract = {The natural language processing subfield known as sentiment analysis or opinion mining has seen an explosive expansion over the last decade or so, and sentiment analysis has become a standard item in the NLP toolbox. Still, many theoretical and methodological questions remain unanswered and resource gaps unfilled. Most work on automated sentiment analysis has been done on English and a few other languages; for most written languages of the world, this tool is not available. This paper describes the development of an extensive sentiment lexicon for written (standard) Swedish. We investigate different methods for developing a sentiment lexicon for Swedish. We use an existing gold standard dataset for training and testing. For each word sense from the SALDO Swedish lexicon, we assign a real value sentiment score in the range [-1,1] and produce a sentiment label. We implement and evaluate three methods: a graph-based method that iterates over the SALDO structure, a method based on random paths over the SALDO structure and a corpus-driven method based on word embeddings. The resulting sense-disambiguated sentiment lexicon (SenSALDO) is an open source resource and freely available from Språkbanken, The Swedish Language Bank at the University of Gothenburg.}, booktitle = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation}, author = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Eide, Stian Rødven}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-00-9}, } @inProceedings{Rouces-Jacobo2018-264719, title = {Generating a Gold Standard for a Swedish Sentiment Lexicon}, abstract = {We create a gold standard for sentiment annotation of Swedish terms, using the freely available SALDO lexicon and the Gigaword corpus. For this purpose, we employ a multi-stage approach combining corpus-based frequency sampling, direct score annotation and Best-Worst Scaling. In addition to obtaining a gold standard, we analyze the data from our process and we draw conclusions about the optimal sentiment model.}, booktitle = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation}, author = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Eide, Stian Rødven}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-00-9}, }