@article{rouces-etal-2017-framebase-256293, title = {FrameBase: Enabling integration of heterogeneous knowledge}, abstract = {Large-scale knowledge graphs such as those in the Linked Open Data cloud are typically stored as subject-predicateobject triples. However, many facts about the world involve more than two entities. While n-ary relations can be converted to triples in a number of ways, unfortunately, the structurally different choices made in different knowledge sources significantly impede our ability to connect them. They also increase semantic heterogeneity, making it impossible to query the data concisely and without prior knowledge of each individual source. This article presents FrameBase, a wide-coverage knowledge base schema that uses linguistic frames to represent and query n-ary relations from other knowledge bases, providing multiple levels of granularity connected via logical entailment. Overall, this provides a means for semantic integration from heterogeneous sources under a single schema and opens up possibilities to draw on natural language processing techniques for querying and data mining.}, journal = {Semantic Web}, author = {Rouces, Jacobo and de Melo, G. and Hose, K.}, year = {2017}, volume = {8}, number = {6}, pages = {817--850}, } @inProceedings{rouces-etal-2019-tracking-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019. }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, address = {Aachen }, } @inProceedings{rouces-etal-2019-political-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019.}, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR }, address = {Aachen }, } @inProceedings{rouces-etal-2018-generating-264719, title = {Generating a Gold Standard for a Swedish Sentiment Lexicon}, abstract = {We create a gold standard for sentiment annotation of Swedish terms, using the freely available SALDO lexicon and the Gigaword corpus. For this purpose, we employ a multi-stage approach combining corpus-based frequency sampling, direct score annotation and Best-Worst Scaling. In addition to obtaining a gold standard, we analyze the data from our process and we draw conclusions about the optimal sentiment model.}, booktitle = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, May 7-12, 2018, Miyazaki (Japan)}, author = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian}, year = {2018}, publisher = {ELRA}, address = {Miyazaki}, ISBN = {979-10-95546-00-9}, } @inProceedings{rouces-etal-2018-sensaldo-264720, title = {SenSALDO: Creating a Sentiment Lexicon for Swedish}, abstract = {The natural language processing subfield known as sentiment analysis or opinion mining has seen an explosive expansion over the last decade or so, and sentiment analysis has become a standard item in the NLP toolbox. Still, many theoretical and methodological questions remain unanswered and resource gaps unfilled. Most work on automated sentiment analysis has been done on English and a few other languages; for most written languages of the world, this tool is not available. This paper describes the development of an extensive sentiment lexicon for written (standard) Swedish. We investigate different methods for developing a sentiment lexicon for Swedish. We use an existing gold standard dataset for training and testing. For each word sense from the SALDO Swedish lexicon, we assign a real value sentiment score in the range [-1,1] and produce a sentiment label. We implement and evaluate three methods: a graph-based method that iterates over the SALDO structure, a method based on random paths over the SALDO structure and a corpus-driven method based on word embeddings. The resulting sense-disambiguated sentiment lexicon (SenSALDO) is an open source resource and freely available from Språkbanken, The Swedish Language Bank at the University of Gothenburg.}, booktitle = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, 7-12 May 2018, Miyazaki (Japan)}, author = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian}, year = {2018}, publisher = {ELRA}, address = {Miyazaki}, ISBN = {979-10-95546-00-9}, } @inProceedings{rouces-etal-2018-defining-264721, title = {Defining a gold standard for a Swedish sentiment lexicon: Towards higher-yield text mining in the digital humanities}, abstract = {There is an increasing demand for multilingual sentiment analysis, and most work on sentiment lexicons is still carried out based on English lexicons like WordNet. In addition, many of the non-English sentiment lexicons that do exist have been compiled by (machine) translation from English resources, thereby arguably obscuring possible language-specific characteristics of sentiment-loaded vocabulary. In this paper we describe the creation from scratch of a gold standard for the sentiment annotation of Swedish terms as a first step towards the creation of a full-fledged sentiment lexicon for Swedish.}, booktitle = {CEUR Workshop Proceedings vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018. Edited by Eetu Mäkelä Mikko Tolonen Jouni Tuominen }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina and Rødven-Eide, Stian}, year = {2018}, publisher = {University of Helsinki, Faculty of Arts}, address = {Helsinki}, } @inProceedings{lindahl-etal-2019-towards-286588, title = {Towards Assessing Argumentation Annotation - A First Step}, abstract = {This paper presents a first attempt at using Walton’s argumentation schemes for annotating arguments in Swedish political text and assessing the feasibility of using this particular set of schemes with two linguistically trained annotators. The texts are not pre-annotated with argumentation structure beforehand. The results show that the annotators differ both in number of annotated arguments and selection of the conclusion and premises which make up the arguments. They also differ in their labeling of the schemes, but grouping the schemes increases their agreement. The outcome from this will be used to develop guidelines for future annotations.}, booktitle = {Proceedings of the 6th Workshop on Argument Mining, August 1, 2019, Florence, Italy / Benno Stein, Henning Wachsmuth (Editors)}, author = {Lindahl, Anna and Borin, Lars and Rouces, Jacobo}, year = {2019}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {978-1-950737-33-8}, } @inProceedings{rouces-etal-2020-creating-290695, title = {Creating an Annotated Corpus for Aspect-Based Sentiment Analysis in Swedish}, abstract = {Aspect-Based Sentiment Analysis constitutes a more fine-grained alternative to traditional sentiment analysis at sentence level. In addition to a sentiment value denoting how positive or negative a particular opinion or sentiment expression is, it identifies additional aspects or 'slots' that characterize the opinion. Some typical aspects are target and source, i.e. who holds the opinion and about which entity or aspect is the opinion. We present a large Swedish corpus annotated for Aspect-Based Sentiment Analysis. Each sentiment expression is annotated as a tuple that contains the following fields: one among 5 possible sentiment values, the target, the source, and whether the sentiment expressed is ironic. In addition, the linguistic element that conveys the sentiment is identified too. Sentiment for a particular topic is also annotated at title, paragraph and document level. The documents are articles obtained from two Swedish media (Svenska Dagbladet and Aftonbladet) and one online forum (Flashback), totalling around 4000 documents. The corpus is freely available and we plan to use it for training and testing an Aspect-Based Sentiment Analysis system.}, booktitle = {Proceedings of the 5th conference in Digital Humanities in the Nordic Countries, Riga, Latvia, October 21-23, 2020.}, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2020}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{hoang-etal-2019-aspect-284269, title = {Aspect-Based Sentiment Analysis using BERT}, booktitle = {Proceedings of the 22nd Nordic Conference on Computational Linguistics, 30 September–2 October, 2019, Turku, Finland / Mareike Hartmann, Barbara Plank (Editors)}, author = {Hoang, M. and Bihorac, O. A. and Rouces, Jacobo}, year = {2019}, publisher = {Linköping University Electronic Press}, address = {Sweden}, ISBN = {978-91-7929-995-8}, }