@inProceedings{fridlund-etal-2020-trawling-299694, title = {Trawling the Gulf of Bothnia of News: A Big Data Analysis of the Emergence of Terrorism in Swedish and Finnish Newspapers, 1780–1926}, abstract = {This study combines history domain knowledge and language technology expertise to evaluate and expand on research claims regarding the historical meanings associated with terrorism in Swedish and Finnish contexts. Using a cross-border comparative approach and large newspaper corpora made available by the CLARIN research infrastructure, we explore overlapping national discourses on terrorism, the concept’s historical diversity and its relations to different national contexts. We are particularly interested in testing the hypothesis that substate terrorism’s modern meaning was not yet established in the 19th century but primarily restricted to Russian terrorism. We conclude that our comparative study finds both uniquely national and shared meanings of terrorism and that our study strengthen the hypothesis. In extension, the study also serves as an exploration of the potentials of cross-disciplinary evaluative studies based on extensive corpora and of cross-border comparative approaches to Swedish and Finnish newspaper corpora.}, booktitle = {CLARIN Annual Conference Proceedings 2020. Edited by Costanza Navarretta, Maria Eskevich, 05–07 October 2020, Virtual Edition}, author = {Fridlund, Mats and Olsson, Leif-Jöran and Brodén, Daniel and Borin, Lars}, year = {2020}, publisher = {CLARIN}, } @misc{alfter-etal-2020-proceedings-300071, title = {Proceedings of the 9th Workshop on Natural Language Processing for Computer Assisted Language Learning 2020}, abstract = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures. This collection presents four selected papers describing use of Language Technology for language learning.}, author = {Alfter, David and Volodina, Elena and Pilán, Ildikó and Lange, Herbert and Borin, Lars}, year = {2020}, publisher = {Linköping University Electronic Press}, address = {Linköping}, ISBN = {978-91-7929-732-9}, } @inProceedings{rouces-etal-2020-creating-290695, title = {Creating an Annotated Corpus for Aspect-Based Sentiment Analysis in Swedish}, abstract = {Aspect-Based Sentiment Analysis constitutes a more fine-grained alternative to traditional sentiment analysis at sentence level. In addition to a sentiment value denoting how positive or negative a particular opinion or sentiment expression is, it identifies additional aspects or 'slots' that characterize the opinion. Some typical aspects are target and source, i.e. who holds the opinion and about which entity or aspect is the opinion. We present a large Swedish corpus annotated for Aspect-Based Sentiment Analysis. Each sentiment expression is annotated as a tuple that contains the following fields: one among 5 possible sentiment values, the target, the source, and whether the sentiment expressed is ironic. In addition, the linguistic element that conveys the sentiment is identified too. Sentiment for a particular topic is also annotated at title, paragraph and document level. The documents are articles obtained from two Swedish media (Svenska Dagbladet and Aftonbladet) and one online forum (Flashback), totalling around 4000 documents. The corpus is freely available and we plan to use it for training and testing an Aspect-Based Sentiment Analysis system.}, booktitle = {Proceedings of the 5th conference in Digital Humanities in the Nordic Countries, Riga, Latvia, October 21-23, 2020.}, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2020}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{zechner-borin-2020-towards-296900, title = {Towards a Swedish Roget-Style Thesaurus for NLP}, abstract = {Bring’s thesaurus (Bring) is a Swedish counterpart of Roget, and its digitized version could make a valuable language resource for use in many and diverse natural language processing (NLP) applications. Fromlexicon, word sense disambiguation, topic detection the literature we know that Roget-style thesauruses and wordnets have complementary strengths in this context, so both kinds of lexical-semantic resource are good to have. However, Bring was published in 1930, and its lexical items are in the form of lemma–POS pairings. In order to be useful in our NLP systems, polysemous lexical items need to be disambiguated, and a large amount of modern vocabulary must be added in the proper places in Bring. The work presented here describes experiments aiming at automating these two tasks, at least in part, where we use the structure of an existing Swedish semantic lexicon – Saldo – both for disambiguation of ambiguous Bring entries and for addition of new entries to Bring.}, booktitle = {Proceedings of the 2020 Globalex Workshop on Linked Lexicography. Language Resources and Evaluation Conference (LREC 2020), Marseille, 11–16 May 2020}, author = {Zechner, Niklas and Borin, Lars}, year = {2020}, publisher = {European Language Resources Association}, address = {Paris}, ISBN = {979-10-95546-46-7}, } @inProceedings{waldispuhl-etal-2020-material-293332, title = {Material Philology Meets Digital Onomastic Lexicography: The NordiCon Database of Medieval Nordic Personal Names in Continental Sources}, abstract = {We present NordiCon, a database containing medieval Nordic personal names attested in Continental sources. The database combines formally interpreted and richly interlinked onomastic data with digitized versions of the medieval manuscripts from which the data originate and information on the tokens' context. The structure of NordiCon is inspired by other online historical given name dictionaries. It takes up challenges reported on in previous works, such as how to cover material properties of a name token and how to define lemmatization principles, and elaborates on possible solutions. The lemmatization principles for NordiCon are further developed in order to facilitate the connection to other name dictionaries and corpuses, and the integration of the database into SprÃ¥kbanken Text, an infrastructure containing modern and historical written data.}, booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, 11–16 May 2020 / editors: Nicoletta Calzolari... [et. al.]}, author = {Waldispühl, Michelle and Dannélls, Dana and Borin, Lars}, year = {2020}, publisher = {European Language Resources Association}, address = {Marseille}, ISBN = {979-10-95546-34-4}, } @inProceedings{virk-etal-2020-from-295339, title = {From Linguistic Descriptions to Language Profiles}, abstract = {Language catalogues and typological databases are two important types of resources containing different types of knowledge about the world’s natural languages. The former provide metadata such as number of speakers, location (in prose descriptions and/or GPS coordinates), language code, literacy, etc., while the latter contain information about a set of structural and functional attributes of languages. Given that both types of resources are developed and later maintained manually, there are practical limits as to the number of languages and the number of features that can be surveyed. We introduce the concept of a language profile, which is intended to be a structured representation of various types of knowledge about a natural language extracted semi-automatically from descriptive documents and stored at a central location. It has three major parts: (1) an introductory; (2) an attributive; and (3) a reference part, each containing different types of knowledge about a given natural language. As a case study, we develop and present a language profile of an example language. At this stage, a language profile is an independent entity, but in the future it is envisioned to become part of a network of language profiles connected to each other via various types of relations. Such a representation is expected to be suitable both for humans and machines to read and process for further deeper linguistic analyses and/or comparisons.}, booktitle = {Proceedings of the 7th Workshop on Linked Data in Linguistics (LDL-2020). Language Resources and Evaluation Conference (LREC 2020), Marseille, 11–16 May 2020 / Edited by : Maxim Ionov, John P. McCrae, Christian Chiarcos, Thierry Declerck, Julia Bosque-Gil, and Jorge Gracia}, author = {Virk, Shafqat and Hammarström, Harald and Borin, Lars and Forsberg, Markus and Wichmann, Søren }, year = {2020}, publisher = {European Language Resources Association}, address = {Paris}, ISBN = {979-10-95546-36-8}, }