Skip to main content


	title        = {Swe-Clarin: Language resources and technology for Digital Humanities},
	abstract     = {CLARIN is a European Research Infrastructure Consortium (ERIC), which aims at (a) making extensive language-based materials available as primary research data to the humanities and social sciences (HSS); and (b) offering state-of-the-art language technology (LT) as an e-research tool for this purpose, positioning CLARIN centrally in what is often referred to as the digital humanities (DH). The Swedish CLARIN node Swe-Clarin was established in 2015 with funding from the Swedish Research Council.

In this paper, we describe the composition and activities of Swe-Clarin, aiming at meeting the requirements of all HSS and other researchers whose research involves using text and speech as primary research data, and spreading the awareness of what Swe-Clarin can offer these research communities. We focus on one of the central means for doing this: pilot projects conducted in collaboration between HSS researchers and Swe-Clarin, together formulating a research question, the addressing of which requires working with large language-based materials. Four such pilot projects are described in more detail, illustrating research on rhetorical history, second-language acquisition, literature, and political science. A common thread to these projects is an aspiration to meet the challenge of conducting research on the basis of very large amounts of textual data in a consistent way without losing sight of the individual cases making up the mass of data, i.e., to be able to move between Moretti’s “distant” and “close reading” modes. 

While the pilot projects clearly make substantial contributions to DH, they also reveal some needs for more development, and in particular a need for document-level access to the text materials. As a consequence of this, work has now been initiated in Swe-Clarin to meet this need, so that Swe-Clarin together with HSS scholars investigating intricate research questions can take on the methodological challenges of big-data language-based digital humanities.},
	booktitle    = {Digital Humanities 2016. Extended Papers of the International Symposium on Digital Humanities (DH 2016) Växjö, Sweden, November, 7-8, 2016.  Edited by Koraljka Golub, Marcelo Milra.  Vol-2021},
	author       = {Borin, Lars and Tahmasebi, Nina and Volodina, Elena and Ekman, Stefan and Jordan, Caspar and Viklund, Jon and Megyesi, Beáta and Näsman, Jesper and Palmér, Anne and Wirén, Mats and Björkenstam, Kristina and Grigonyte, Gintare and Gustafson Capková, Sofia and Kosiński, Tomasz},
	year         = {2017},
	publisher    = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen.},
	address      = {Aachen},

	title        = {On the Uses of Word Sense Change for Research in the Digital Humanities},
	abstract     = {With advances in technology and culture, our language changes. We invent new words, add or change meanings of existing words and change names of existing things. Unfortunately, our language does not carry a memory; words, expressions and meanings used in the past are forgotten over time. When searching and interpreting content from archives, language changes pose a great challenge. In this paper, we present results of automatic word sense change detection and show the utility for archive users as well as digital humanities’ research. Our method is able to capture changes that relate to the usage and culture of a word that cannot easily be found using dictionaries or other resources.},
	booktitle    = {Research and Advanced Technology for Digital Libraries - 21st International  Conference on Theory and Practice of Digital Libraries, TPDL 2017, Thessaloniki, Greece, September 18-21, 2017. Proceedings},
	editor       = {Jaap Kamps and Giannis Tsakonas and Yannis Manolopoulos and Lazaros Iliadis and Ioannis Karydis},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2017},
	publisher    = {Springer Verlag},
	address      = {Cham},
	ISBN         = {978-3-319-67007-2},

	title        = {Parameter Transfer across Domains for Word Sense Disambiguation},
	abstract     = {Word  sense  disambiguation  is  defined  as finding the corresponding sense for a target word in a given context,  which comprises  a  major  step  in  text  applications. Recently, it has been addressed as an optimization problem.  The idea behind is to find a sequence of senses that corresponds
to the words in a given context with a maximum semantic similarity.  Metaheuristics like simulated annealing and D-Bees provide approximate good-enough solutions, but are usually influenced by the starting parameters. In this paper, we study the parameter tuning for both algorithms within the  word  sense  disambiguation  problem. The experiments are conducted on different datasets to cover different disambiguation scenarios. We show that D-Bees is robust and less sensitive towards the initial parameters compared to simulated annealing,  hence,  it is sufficient to tune the parameters once and reuse them for different datasets, domains or languages.},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing Meet Deep Learning, Varna, Bulgaria 2–8 September 2017 / Edited by Galia Angelova, Kalina Bontcheva, Ruslan Mitkov, Ivelina  Nikolova, Irina Temnikova  },
	author       = {Abualhajia, Sallam and Tahmasebi, Nina and Forin, Diane  and Zimmermann, Karl-Heinz},
	year         = {2017},
	ISBN         = { 978-954-452-048-9},

	title        = {Proceedings of the 21st Nordic Conference on Computational Linguistics, NODALIDA 2017, Gothenburg, Sweden, May 22-24, 2017
	author       = {Tidemann, Jörg and Tahmasebi, Nina},
	year         = {2017},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {978-91-7685-601-7},

	title        = {Finding Individual Word Sense Changes and their Delay in Appearance},
	abstract     = {We  present  a  method  for  detecting  word sense  changes  by  utilizing  automatically
induced word senses.  Our method works on  the  level  of  individual  senses  and  allows a word to have  e.g. one stable sense and then add a novel sense that later experiences  change.
Senses  are  grouped based on polysemy to find linguistic concepts and we can find broadening and narrowing as well as novel (polysemous and homonymic)  senses. We  evaluate  on  a testset, present recall and estimates of the time between expected and found change.},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing 2017. Varna, Bulgaria 2–8 September, 2017},
	editor       = {Galia Angelova and Kalina Bontcheva and Ruslan Mitkov and Ivelina Nikolova and Irina Temnikova},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2017},
	ISBN         = {978-954-452-048-9},