Språkbanken Text är en avdelning inom Språkbanken.
BibTeX

@inProceedings{ahlberg-etal-2015-case-217988,
	title        = {A case study on supervised classification of Swedish pseudo-coordination},
	abstract     = {We present a case study on supervised classification of Swedish pseudo-coordination (SPC). The classification is attempted on the type-level with data collected from two data sets: a blog corpus and a fiction corpus. Two small experiments were designed to evaluate the feasability of this task. The first experiment explored a classifier’s ability to discriminate pseudo-coordinations from ordinary verb coordinations, given a small labeled data set created during the experiment. The second experiment evaluated how well the classifier performed at detecting and ranking SPCs in a set of unlabeled verb coordinations, to investigate if it could be used as a semi-automatic discovery procedure to find new SPCs.},
	booktitle    = {Proceedings of the 20th Nordic Conference of Computational Linguistics, NODALIDA 2015, May 11-13, 2015, Vilnius, Lithuania},
	author       = {Ahlberg, Malin and Andersson, Peter and Forsberg, Markus and Tahmasebi, Nina},
	year         = {2015},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7519-098-3},
}

@article{tahmasebi-hengchen-2019-strengths-291189,
	title        = {The Strengths and Pitfalls of Large-Scale Text Mining for Literary Studies},
	abstract     = {This paper is an overview of the opportunities and challenges of using large-scale text mining to answer research questions that stem from the humanities in general and literature specifically.  In  this  paper,  we  will  discuss  a  data-intensive  research  methodology  and  how  different  views of digital text affect answers to research questions. We will discuss results derived from text mining, how these results can be evaluated, and their relation to hypotheses and research questions. Finally, we will discuss some pitfalls of computational literary analysis and give some pointers as to how these can be avoided.},
	journal      = {Samlaren : tidskrift för svensk litteraturvetenskaplig forskning},
	author       = {Tahmasebi, Nina and Hengchen, Simon},
	year         = {2019},
	volume       = {140},
	pages        = {198–227},
}

@misc{schlechtweg-etal-2020-post-295466,
	title        = {Post-Evaluation Data for SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {This data collection contains the post-evaluation data for SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection: (1) the starting kit to download data, and examples for competing in the CodaLab challenge including baselines; (2) the true binary change scores of the targets for Subtask 1, and their true graded change scores for Subtask 2 (test_data_truth/); (3)the scoring program used to score submissions against the true test data in the evaluation and post-evaluation phase (scoring_program/); and (4) the results of the evaluation phase including, for example, analysis plots (plots/) displaying the results:},
	author       = {Schlechtweg, Dominik and McGillivray, Barbara and Hengchen, Simon and Dubossarsky, Haim and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {Zenodo},
}

@misc{tahmasebi-etal-2020-swedish-295465,
	title        = {Swedish Test Data for SemEval 2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {This data collection contains the Swedish test data for SemEval 2020 Task 1: Unsupervised Lexical Semantic Change Detection. It consists of a Swedish text corpus pair (corpus1/, corpus2/) and 31 lemmas which have been annotated for their lexical semantic change between the two corpora (targets.txt). We sample from the KubHist2 corpus, digitized by the National Library of Sweden, and available through the Språkbanken corpus infrastructure Korp (Borin et al., 2012). The full corpus is available through a CC BY (attribution) license. Each word for which the lemmatizer in the Korp pipeline has found a lemma is replaced with the lemma. In cases where the lemmatizer cannot find a lemma, we leave the word as is (i.e., unlemmatized, no lower-casing). KubHist contains very frequent OCR errors, especially for the older data.More detail about the properties and quality of the Kubhist corpus can be found in (Adesam et al., 2019).},
	author       = {Tahmasebi, Nina and Hengchen, Simon and Schlechtweg, Dominik and McGillivray, Barbara and Dubossarsky, Haim},
	year         = {2020},
}

@inProceedings{dubossarsky-etal-2019-time-281304,
	title        = {Time-Out: Temporal Referencing for Robust Modeling of Lexical Semantic Change },
	abstract     = {State-of-the-art models of lexical semantic change detection suffer from noise stemming from vector space alignment. We have empirically tested the Temporal Referencing method for lexical semantic change and show that, by avoiding alignment, it is less affected by this noise. We show that, trained on a diachronic corpus, the skip-gram with negative sampling architecture with temporal referencing outperforms alignment models on a synthetic task as well as a manual testset. We introduce a principled way to simulate lexical semantic change and systematically control for possible biases. },
	booktitle    = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, Florence, Italy, July 28 - August 2, 2019 / Anna Korhonen, David Traum, Lluís Màrquez (Editors)},
	author       = {Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-48-2},
}

@inProceedings{schlechtweg-etal-2020-semeval-295463,
	title        = {SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {Lexical Semantic Change detection, i.e., the task of identifying words that change meaning over time, is a very active research area, with applications in NLP, lexicography, and linguistics. Evaluation is currently the most pressing problem in Lexical Semantic Change detection, as no gold standards are available to the community, which hinders progress. We present the results of the first shared task that addresses this gap by providing researchers with an evaluation framework and manually annotated, high-quality datasets for English, German, Latin, and Swedish. 33 teams submitted 186 systems, which were evaluated on two subtasks. },
	booktitle    = {Proceedings of the Fourteenth Workshop on Semantic Evaluation (SemEval2020), Barcelona, Spain (Online), December 12, 2020.},
	author       = {Schlechtweg, Dominik and McGillivray, Barbara and Hengchen, Simon and Dubossarsky, Haim and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {ACL},
}

@inProceedings{dubossarsky-etal-2019-time-295438,
	title        = {Time for change: Evaluating models of semantic change without evaluation tasks},
	booktitle    = {Cambridge Language Sciences Annual Symposium 2019 : Perspectives on Language Change},
	author       = {Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik},
	year         = {2019},
}

@inProceedings{rouces-etal-2020-creating-290695,
	title        = {Creating an Annotated Corpus for Aspect-Based Sentiment Analysis in Swedish},
	abstract     = {Aspect-Based Sentiment Analysis constitutes a more fine-grained alternative to traditional sentiment analysis at sentence level. In addition to a sentiment value denoting how positive or negative a particular opinion or sentiment expression is, it identifies additional aspects or 'slots' that characterize the opinion. Some typical aspects are target and source, i.e. who holds the opinion and about which entity or aspect is the opinion. We present a large Swedish corpus annotated for Aspect-Based Sentiment Analysis. Each sentiment expression is annotated as a tuple that contains the following fields: one among 5 possible sentiment values, the target, the source, and whether the sentiment expressed is ironic.  In addition, the linguistic element that conveys the sentiment is identified too. Sentiment for a particular topic is also annotated at title, paragraph and document level.
The documents are articles obtained from two Swedish media (Svenska Dagbladet and Aftonbladet) and one online forum (Flashback), totalling around 4000 documents. The corpus is freely available and we plan to use it for training and testing an Aspect-Based Sentiment Analysis system.},
	booktitle    = {Proceedings of the 5th conference in Digital Humanities in the Nordic Countries, Riga, Latvia, October 21-23, 2020.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {CEUR Workshop Proceedings},
}

@inProceedings{rouces-etal-2019-tracking-281308,
	title        = {Tracking Attitudes Towards Immigration in Swedish Media},
	abstract     = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019. },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen },
}

@inProceedings{rouces-etal-2019-political-281307,
	title        = {Political Stance Analysis Using Swedish Parliamentary Data},
	abstract     = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR },
	address      = {Aachen },
}

@inProceedings{rouces-etal-2018-generating-264719,
	title        = {Generating a Gold Standard for a Swedish Sentiment Lexicon},
	abstract     = {We create a gold standard for sentiment annotation of Swedish terms, using the freely available SALDO lexicon and the Gigaword
corpus. For this purpose, we employ a multi-stage approach combining corpus-based frequency sampling, direct score annotation and
Best-Worst Scaling. In addition to obtaining a gold standard, we analyze the data from our process and we draw conclusions about the
optimal sentiment model.},
	booktitle    = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, May 7-12, 2018, Miyazaki (Japan)},
	author       = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian},
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-00-9},
}

@inProceedings{rouces-etal-2018-sensaldo-264720,
	title        = {SenSALDO: Creating a Sentiment Lexicon for Swedish},
	abstract     = {The natural language processing subfield known as sentiment analysis or opinion mining has seen an explosive expansion over the
last decade or so, and sentiment analysis has become a standard item in the NLP toolbox. Still, many theoretical and methodological
questions remain unanswered and resource gaps unfilled. Most work on automated sentiment analysis has been done on English and
a few other languages; for most written languages of the world, this tool is not available. This paper describes the development of an
extensive sentiment lexicon for written (standard) Swedish. We investigate different methods for developing a sentiment lexicon for
Swedish. We use an existing gold standard dataset for training and testing. For each word sense from the SALDO Swedish lexicon,
we assign a real value sentiment score in the range [-1,1] and produce a sentiment label. We implement and evaluate three methods:
a graph-based method that iterates over the SALDO structure, a method based on random paths over the SALDO structure and a
corpus-driven method based on word embeddings. The resulting sense-disambiguated sentiment lexicon (SenSALDO) is an open source
resource and freely available from Språkbanken, The Swedish Language Bank at the University of Gothenburg.},
	booktitle    = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, 7-12 May 2018, Miyazaki (Japan)},
	author       = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian},
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-00-9},
}

@inProceedings{rouces-etal-2018-defining-264721,
	title        = {Defining a gold standard for a Swedish sentiment lexicon: Towards higher-yield text mining in the digital humanities},
	abstract     = {There is an increasing demand for multilingual sentiment analysis, and most work on
sentiment lexicons is still carried out based on English lexicons like WordNet. In addition, many
of the non-English sentiment lexicons that do exist have been compiled by (machine) translation
from English resources, thereby arguably obscuring possible language-specific characteristics
of sentiment-loaded vocabulary. In this paper we describe the creation from scratch of a gold
standard for the sentiment annotation of Swedish terms as a first step towards the creation of a
full-fledged sentiment lexicon for Swedish.},
	booktitle    = {CEUR Workshop Proceedings vol. 2084.  Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018.  Edited by  Eetu Mäkelä Mikko Tolonen Jouni Tuominen },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina and Rødven-Eide, Stian},
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@inProceedings{adesam-etal-2019-exploring-279948,
	title        = {Exploring the Quality of the Digital Historical Newspaper Archive KubHist},
	abstract     = {The KubHist Corpus is a massive corpus of Swedish historical newspapers, digitized by the Royal Swedish library, and available through the Språkbanken corpus infrastructure Korp. This paper contains a first overview of the KubHist corpus, exploring some of the difficulties with the data, such as OCR errors and spelling variation, and discussing possible paths for improving the quality and the searchability.},
	booktitle    = {Proceedings of the 4th Conference of The Association Digital Humanities in the Nordic Countries (DHN), Copenhagen, Denmark, March 5-8, 2019},
	editor       = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard},
	author       = {Adesam, Yvonne and Dannélls, Dana and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen},
}

@inProceedings{adesam-etal-2018-exploring-273835,
	title        = {Exploring the Quality of the Digital Historical Newspaper Archive KubHist},
	booktitle    = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7-9 November 2018},
	author       = {Adesam, Yvonne and Dannélls, Dana and Tahmasebi, Nina},
	year         = {2018},
}

@article{tahmasebi-etal-2015-visions-212969,
	title        = {Visions and open challenges for a knowledge-based culturomics},
	abstract     = {The concept of culturomics was born out of the availability of massive amounts of textual data and the interest to make sense of cultural and language phenomena over time. Thus far however, culturomics has only made use of, and shown the great potential of, statistical methods. In this paper, we present a vision for a knowledge-based culturomics that complements traditional culturomics. We discuss the possibilities and challenges of combining knowledge-based methods with statistical methods and address major challenges that arise due to the nature of the data; diversity of sources, changes in language over time as well as temporal dynamics of information in general. We address all layers needed for knowledge-based culturomics, from natural language processing and relations to summaries and opinions.},
	journal      = {International Journal on Digital Libraries},
	author       = {Tahmasebi, Nina and Borin, Lars and Capannini, Gabriele and Dubhashi, Devdatt and Exner, Peter and Forsberg, Markus and Gossen, Gerhard and Johansson, Fredrik and Johansson, Richard and Kågebäck, Mikael and Mogren, Olof and Nugues, Pierre and Risse, Thomas},
	year         = {2015},
	volume       = {15},
	number       = {2-4},
	pages        = {169--187},
}

@inProceedings{borin-etal-2017-clarin-261157,
	title        = {Swe-Clarin: Language resources and technology for Digital Humanities},
	abstract     = {CLARIN is a European Research Infrastructure Consortium (ERIC), which aims at (a) making extensive language-based materials available as primary research data to the humanities and social sciences (HSS); and (b) offering state-of-the-art language technology (LT) as an e-research tool for this purpose, positioning CLARIN centrally in what is often referred to as the digital humanities (DH). The Swedish CLARIN node Swe-Clarin was established in 2015 with funding from the Swedish Research Council.

In this paper, we describe the composition and activities of Swe-Clarin, aiming at meeting the requirements of all HSS and other researchers whose research involves using text and speech as primary research data, and spreading the awareness of what Swe-Clarin can offer these research communities. We focus on one of the central means for doing this: pilot projects conducted in collaboration between HSS researchers and Swe-Clarin, together formulating a research question, the addressing of which requires working with large language-based materials. Four such pilot projects are described in more detail, illustrating research on rhetorical history, second-language acquisition, literature, and political science. A common thread to these projects is an aspiration to meet the challenge of conducting research on the basis of very large amounts of textual data in a consistent way without losing sight of the individual cases making up the mass of data, i.e., to be able to move between Moretti’s “distant” and “close reading” modes. 

While the pilot projects clearly make substantial contributions to DH, they also reveal some needs for more development, and in particular a need for document-level access to the text materials. As a consequence of this, work has now been initiated in Swe-Clarin to meet this need, so that Swe-Clarin together with HSS scholars investigating intricate research questions can take on the methodological challenges of big-data language-based digital humanities.},
	booktitle    = {Digital Humanities 2016. Extended Papers of the International Symposium on Digital Humanities (DH 2016) Växjö, Sweden, November, 7-8, 2016.  Edited by Koraljka Golub, Marcelo Milra.  Vol-2021},
	author       = {Borin, Lars and Tahmasebi, Nina and Volodina, Elena and Ekman, Stefan and Jordan, Caspar and Viklund, Jon and Megyesi, Beáta and Näsman, Jesper and Palmér, Anne and Wirén, Mats and Björkenstam, Kristina and Grigonyte, Gintare and Gustafson Capková, Sofia and Kosiński, Tomasz},
	year         = {2017},
	publisher    = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen.},
	address      = {Aachen},
}

@inProceedings{tahmasebi-etal-2019-convergence-280684,
	title        = {A Convergence of Methodologies: Notes on Data-Intensive Humanities Research},
	abstract     = {In this paper, we discuss a data-intensive research methodology for the digital humanities. We highlight the differences and commonalities between quantitative and qualitative research methodologies in  relation  to  a  data-intensive  research  process.  We  argue  that  issues of  representativeness  and  reduction  must  be  in  focus  for  all  phases  of the process; from the status of texts as such, over their digitization topre-processing and methodological exploration.},
	booktitle    = {CEUR workshop proceedings ; 2364. Proceedings of the 4th Conference on Digital Humanities in the Nordic Countries, Copenhagen, Denmark, March 5-8, 2019},
	editor       = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard},
	author       = {Tahmasebi, Nina and Hagen, Niclas and Brodén, Daniel and Malm, Mats},
	year         = {2019},
	publisher    = {CEUR workshop proceedings},
	address      = {Aachen },
}

@misc{tahmasebi-etal-2019-proceedings-285886,
	title        = {Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change, August 2, 2019, Florence, Italy},
	author       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam and Xu, Yang},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-31-4},
}

@inProceedings{jatowt-etal-2018-every-272054,
	title        = {Every Word Has Its History: Interactive Exploration and Visualization of Word Sense Evolution},
	booktitle    = {CIKM '18 Proceedings of the 27th ACM International Conference on Information and Knowledge Management, October 22 - 26, 2018, Torino, Italy},
	author       = {Jatowt, Adam and Campos, Ricardo and Bhowmick, Sourav S. and Tahmasebi, Nina and Doucet, Antoine},
	year         = {2018},
	publisher    = {ACM},
	address      = {New York, NY, USA},
	ISBN         = {978-1-4503-6014-2},
}

@inProceedings{tahmasebi-risse-2017-uses-256649,
	title        = {On the Uses of Word Sense Change for Research in the Digital Humanities},
	abstract     = {With advances in technology and culture, our language changes. We invent new words, add or change meanings of existing words and change names of existing things. Unfortunately, our language does not carry a memory; words, expressions and meanings used in the past are forgotten over time. When searching and interpreting content from archives, language changes pose a great challenge. In this paper, we present results of automatic word sense change detection and show the utility for archive users as well as digital humanities’ research. Our method is able to capture changes that relate to the usage and culture of a word that cannot easily be found using dictionaries or other resources.},
	booktitle    = {Research and Advanced Technology for Digital Libraries - 21st International  Conference on Theory and Practice of Digital Libraries, TPDL 2017, Thessaloniki, Greece, September 18-21, 2017. Proceedings},
	editor       = {Jaap Kamps and Giannis Tsakonas and Yannis Manolopoulos and Lazaros Iliadis and Ioannis Karydis},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2017},
	publisher    = {Springer Verlag},
	address      = {Cham},
	ISBN         = {978-3-319-67007-2},
}

@inProceedings{abualhajia-etal-2017-parameter-256642,
	title        = {Parameter Transfer across Domains for Word Sense Disambiguation},
	abstract     = {Word  sense  disambiguation  is  defined  as finding the corresponding sense for a target word in a given context,  which comprises  a  major  step  in  text  applications. Recently, it has been addressed as an optimization problem.  The idea behind is to find a sequence of senses that corresponds
to the words in a given context with a maximum semantic similarity.  Metaheuristics like simulated annealing and D-Bees provide approximate good-enough solutions, but are usually influenced by the starting parameters. In this paper, we study the parameter tuning for both algorithms within the  word  sense  disambiguation  problem. The experiments are conducted on different datasets to cover different disambiguation scenarios. We show that D-Bees is robust and less sensitive towards the initial parameters compared to simulated annealing,  hence,  it is sufficient to tune the parameters once and reuse them for different datasets, domains or languages.},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing Meet Deep Learning, Varna, Bulgaria 2–8 September 2017 / Edited by Galia Angelova, Kalina Bontcheva, Ruslan Mitkov, Ivelina  Nikolova, Irina Temnikova  },
	author       = {Abualhajia, Sallam and Tahmasebi, Nina and Forin, Diane and Zimmermann, Karl-Heinz},
	year         = {2017},
	ISBN         = { 978-954-452-048-9},
}

@inProceedings{tahmasebi-2018-study-264722,
	title        = {A Study on Word2Vec on a Historical Swedish Newspaper Corpus},
	abstract     = {Detecting word sense changes can be of great interest in
the field of digital humanities. Thus far, most investigations and automatic methods have been developed and carried out on English text and
most recent methods make use of word embeddings. This paper presents
a study on using Word2Vec, a neural word embedding method, on a
Swedish historical newspaper collection. Our study includes a set of 11
words and our focus is the quality and stability of the word vectors over
time. We investigate if a word embedding method like Word2Vec can be
effectively used on texts where the volume and quality is limited.},
	booktitle    = {CEUR Workshop Proceedings. Vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference, Helsinki Finland, March 7-9, 2018. Edited by  Eetu Mäkelä, Mikko Tolonen, Jouni Tuominen },
	author       = {Tahmasebi, Nina},
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@misc{tidemann-tahmasebi-2017-proceedings-264302,
	title        = {Proceedings of the 21st Nordic Conference on Computational Linguistics, NODALIDA 2017, Gothenburg, Sweden, May 22-24, 2017
},
	author       = {Tidemann, Jörg and Tahmasebi, Nina},
	year         = {2017},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {978-91-7685-601-7},
}

@inProceedings{tahmasebi-risse-2017-finding-256637,
	title        = {Finding Individual Word Sense Changes and their Delay in Appearance},
	abstract     = {We  present  a  method  for  detecting  word sense  changes  by  utilizing  automatically
induced word senses.  Our method works on  the  level  of  individual  senses  and  allows a word to have  e.g. one stable sense and then add a novel sense that later experiences  change.
Senses  are  grouped based on polysemy to find linguistic concepts and we can find broadening and narrowing as well as novel (polysemous and homonymic)  senses. We  evaluate  on  a testset, present recall and estimates of the time between expected and found change.},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing 2017. Varna, Bulgaria 2–8 September, 2017},
	editor       = {Galia Angelova and Kalina Bontcheva and Ruslan Mitkov and Ivelina Nikolova and Irina Temnikova},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2017},
	ISBN         = {978-954-452-048-9},
}

@inProceedings{r?dveneide-etal-2016-swedish-250073,
	title        = {The Swedish Culturomics Gigaword Corpus: A One Billion Word Swedish Reference Dataset for NLP},
	abstract     = {In this paper we present a dataset of contemporary Swedish containing one billion words. The dataset consists of a wide range of sources, all annotated using a state-of-the-art corpus annotation pipeline, and is intended to be a static and clearly versioned dataset. This will facilitate reproducibility of experiments across institutions and make it easier to compare NLP algorithms on contemporary Swedish. The dataset contains sentences from 1950 to 2015 and has been carefully designed to feature a good mix of genres balanced over each included decade. The sources include literary, journalistic, academic and legal texts, as well as blogs and web forum entries.},
	booktitle    = {Linköping Electronic Conference Proceedings. Digital Humanities 2016. From Digitization to Knowledge 2016: Resources and Methods for Semantic Processing of Digital Works/Texts, July 11, 2016, Krakow, Poland},
	author       = {Rødven-Eide, Stian and Tahmasebi, Nina and Borin, Lars},
	year         = {2016},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-733-5},
}

@inProceedings{tahmasebi-risse-2013-role-191616,
	title        = {The Role of Language Evolution in Digital Archives},
	abstract     = {With advancements in technology and culture, our language
changes. We invent new words, add or change meanings of existing words and change names of existing things. Left untackled, these changes in language create a gap between the language known by users and the language stored in our digital archives. In particular, they affect our possibility to firstly find and content and secondly interpret that content. In this paper we discuss the limitations brought on by language evolution and existing methodology for automatically finding evolution. We discuss measured needed in the near future to ensure semantically accessible digital archives for long-term preservation.},
	booktitle    = {3rd International Workshop on Semantic Digital Archives, SDA 2013 - Co-located with 17th International Conference on Theory and Practice of Digital Libraries, TPDL 2013; Valetta; Malta; 26 September 2013},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2013},
	pages        = {16--27},
}

@inProceedings{holzmann-etal-2013-blogneer-191617,
	title        = {BlogNEER: Applying Named Entity Evolution Recognition on the Blogosphere},
	abstract     = {The introduction of Social Media allowed more people to publish texts by removing barriers that are technical but also social such as the editorial controls that exist in traditional media. The resulting language tends to be more like spoken language because people adapt their use to the medium. Since spoken language is more dynamic, more new and short lived terms are introduced also in written format on the Web. In teTahmasebi2012 we presented an unsupervised method for Named Entity Evolution Recognition (NEER) to find name changes in newspaper collections. In this paper we present BlogNEER, an extension to apply NEER on blog data. The language used in blogs is often closer to spoken language than to language used in traditional media. BlogNEER introduces a novel semantic filtering method that makes use of Semantic Web resources (i.e., DBpedia) to gain more information about terms. We present the approach of BlogNEER and initial results that show the potentials of the approach. },
	booktitle    = {3rd International Workshop on Semantic Digital Archives, SDA 2013 - Co-located with 17th International Conference on Theory and Practice of Digital Libraries, TPDL 2013; Valetta; Malta; 26 September 2013 },
	author       = {Holzmann, Helge and Tahmasebi, Nina and Risse, Thomas},
	year         = {2013},
	volume       = {1091},
	pages        = {28--39},
}

@inProceedings{nusko-etal-2016-building-238135,
	title        = {Building a Sentiment Lexicon for Swedish},
	abstract     = {In this paper we will present our ongoing project to build and evaluate a sentiment lexicon for Swedish. Our main resource is SALDO, a lexical resource of modern Swedish developed at Språkbanken, University of Gothenburg. Using a semi-supervised approach, we expand a manually chosen set of six core words using parent-child relations based on the semantic network structure of SALDO. At its current stage the lexicon consists of 175 seeds, 633 children, and 1319 grandchildren.},
	booktitle    = {Linköping Electronic Conference Proceedings},
	author       = {Nusko, Bianka and Tahmasebi, Nina and Mogren, Olof},
	year         = {2016},
	volume       = {126},
	number       = {006},
	ISBN         = {978-91-7685-733-5},
	pages        = {32----37},
}

@inProceedings{tahmasebi-etal-2016-clarin-233899,
	title        = {SWE-CLARIN – the Swedish CLARIN project – aims and activities},
	booktitle    = {Digital Humanities in the Nordic countries, Oslo, March 15-17 2016},
	author       = {Tahmasebi, Nina and Borin, Lars and Jordan, Caspar and Ekman, Stefan},
	year         = {2016},
	pages        = {122--123},
}

@article{holzmann-etal-2015-named-209780,
	title        = {Named entity evolution recognition on the Blogosphere},
	abstract     = {Advancements in technology and culture lead to changes in our language. These changes create a gap between the language known by users and the language stored in digital archives. It affects user’s possibility to firstly find content and secondly interpret that content. In a previous work, we introduced our approach for named entity evolution recognition (NEER) in newspaper collections. Lately, increasing efforts in Web preservation have led to increased availability of Web archives covering longer time spans. However, language on the Web is more dynamic than in traditional media and many of the basic assumptions from the newspaper domain do not hold for Web data. In this paper we discuss the limitations of existing methodology for NEER. We approach these by adapting an existing NEER method to work on noisy data like the Web and the Blogosphere in particular. We develop novel filters that reduce the noise and make use of Semantic Web resources to obtain more information about terms. Our evaluation shows the potentials of the proposed approach.},
	journal      = {International Journal on Digital Libraries},
	author       = {Holzmann, Helge and Tahmasebi, Nina and Risse, Thomas},
	year         = {2015},
	volume       = {15},
	number       = {2-4},
	pages        = {209--235},
}

@inProceedings{kageback-etal-2014-extractive-210878,
	title        = {Extractive Summarization using Continuous Vector Space Models},
	abstract     = {Automatic summarization can help users extract the most important pieces of information from the vast amount of text digitized into electronic form everyday. Central to automatic summarization is the notion of similarity between sentences in text. In this paper we propose the use of continuous vector representations for semantically aware representations of sentences as a basis for measuring similarity. We evaluate different compositions
for sentence representation on a standard dataset using the ROUGE evaluation measures. Our experiments show that the evaluated methods improve the performance of a state-of-the-art summarization framework and strongly indicate the benefits of continuous word vector representations for automatic summarization.},
	booktitle    = {Proceedings of the 2nd Workshop on Continuous Vector Space Models and their Compositionality (CVSC) EACL, April 26-30, 2014 Gothenburg, Sweden},
	author       = {Kågebäck, Mikael and Mogren, Olof and Tahmasebi, Nina and Dubhashi, Devdatt},
	year         = {2014},
	ISBN         = {978-1-937284-94-7},
	pages        = {31--39},
}

@book{tahmasebi-2013-models-210879,
	title        = {Models and Algorithms for Automatic Detection of Language Evolution},
	author       = {Tahmasebi, Nina},
	year         = {2013},
	publisher    = {Gottfried Wilhelm Leibniz Universität Hannover},
	address      = {Hannover, Tyskland},
}

@inProceedings{demidova-etal-2013-analysing-191624,
	title        = {Analysing Entities, Topics and Events in Community Memories. },
	abstract     = {his paper briefly describes the components of the ARCOMEM architecture concerned with the extraction, enrichment, consolidation and dynamics analysis of entities, topics and events, deploying text mining, NLP, and semantic data integration technologies. In particular, we focus on four main areas relevant to support the ARCOMEM requirements and use cases: (a) entity and event extraction from text; (b) entity and event enrichment and consolidation; (c) topic
detection and dynamics; and (d) temporal aspects and dynamics detection in Web language and online social networks.},
	booktitle    = {Proc. of the first International Workshop on Archiving Community Memories},
	author       = {Demidova, Elena and Barbieri, N. and Dietze, Stefan and Funk, Adam and Gossen, Gerhard and Maynard, Diana and Papailiou, N. and Plachouras, V. and Peters, W. and Stavrakas, Y. and Risse, Thomas and Tahmasebi, Nina},
	year         = {2013},
}

@inProceedings{spiliotopoulos-etal-2013-2013-191622,
	title        = {SMS 2013 PC co-chairs message},
	abstract     = {The SMS workshop 2013 on Social Media Semantics was held this year in the context of the OTM ("OnTheMove") federated conferences, covering different aspects of distributed information systems in September 2013 in Graz. The topic of the workshop is about semantics in Social Media. The SocialWeb has become the first and main medium to get and spread information. Everyday news is reported instantly, and social media has become a major source for broadcasters, news reporters and political analysts as well as a place of interaction for everyday people. For a full utilization of this medium, information must be gathered, analyzed and semantically understood. In this workshop we ask the question: how can Semantic Web technologies be used to provide the means for interested people to draw conclusions, assess situations and to preserve their findings for future use? © 2013 Springer-Verlag.},
	booktitle    = {Lecture Notes in Computer Science},
	author       = {Spiliotopoulos, D. and Risse, T. and Tahmasebi, Nina},
	year         = {2013},
	ISBN         = {9783642410321},
}

@article{hengchen-tahmasebi-2021-collection-301262,
	title        = {A Collection of Swedish Diachronic Word Embedding Models Trained on Historical Newspaper Data},
	abstract     = {This paper describes the creation of several word embedding models based on a large collection of diachronic Swedish newspaper material available through Språkbanken Text, the Swedish language bank. This data was produced in the context of Språkbanken Text’s continued mission to collaborate with humanities and natural language processing (NLP) researchers and to provide freely available language resources, for the development of state-of-the-art NLP methods and tools.},
	journal      = {Journal of Open Humanities Data},
	author       = {Hengchen, Simon and Tahmasebi, Nina},
	year         = {2021},
	volume       = {7},
	number       = {2},
	pages        = {1--7},
}

@inProceedings{hengchen-tahmasebi-2021-supersim-305157,
	title        = {SuperSim: a test set for word similarity and relatedness in Swedish},
	abstract     = {Language models are notoriously difficult to evaluate. 
We release SuperSim, a large-scale similarity and relatedness test set for Swedish built with expert human judgments. The test set is composed of 1,360 word-pairs independently judged for both relatedness and similarity by five annotators. We evaluate three different models (Word2Vec, fastText, and GloVe) trained on two separate Swedish datasets, namely the Swedish Gigaword corpus and a Swedish Wikipedia dump, to provide a baseline for future comparison. 
We release the fully annotated test set, code, baseline models, and data.},
	booktitle    = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa), May 31-June 2 2021, Reykjavik, Iceland (online)},
	author       = {Hengchen, Simon and Tahmasebi, Nina},
	year         = {2021},
	publisher    = {Linköping Electronic Conference Proceedings},
	address      = {Linköping},
	ISBN         = {978-91-7929-614-8},
}

@edited_book{tahmasebi-etal-2021-computational-306968,
	title        = {Computational approaches to semantic change},
	abstract     = {Semantic change — how the meanings of words change over time — has preoccupied scholars since well before modern linguistics emerged in the late 19th and early 20th century, ushering in a new methodological turn in the study of language change. Compared to changes in sound and grammar, semantic change is the least  understood. Ever since, the study of semantic change has progressed steadily, accumulating a vast store of knowledge for over a century, encompassing many languages and language families.

Historical linguists also early on realized the potential of computers as research tools, with papers at the very first international conferences in computational linguistics in the 1960s. Such computational studies still tended to be small-scale, method-oriented, and qualitative. However, recent years have witnessed a sea-change in this regard. Big-data empirical quantitative investigations are now coming to the forefront, enabled by enormous advances in storage capability and processing power. Diachronic corpora have grown beyond imagination, defying exploration by traditional manual qualitative methods, and language technology has become increasingly data-driven and semantics-oriented. These developments present a golden opportunity for the empirical study of semantic change over both long and short time spans.

A major challenge presently is to integrate the hard-earned  knowledge and expertise of traditional historical linguistics with  cutting-edge methodology explored primarily in computational linguistics.

The idea for the present volume came out of a concrete response to this challenge.  The 1st International Workshop on Computational Approaches to Historical Language Change (LChange'19), at ACL 2019, brought together scholars from both fields.

This volume offers a survey of this exciting new direction in the study of semantic change, a discussion of the many remaining challenges that we face in pursuing it, and considerably updated and extended versions of a selection of the contributions to the LChange'19 workshop, addressing both more theoretical problems —  e.g., discovery of "laws of semantic change" — and practical applications, such as information retrieval in longitudinal text archives.},
	editor       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam and Xu, Yang and Hengchen, Simon},
	year         = {2021},
	publisher    = {Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-98554-008-2},
}

@incollection{hengchen-etal-2021-challenges-306972,
	title        = {Challenges for computational lexical semantic change},
	abstract     = {The computational study of lexical semantic change (LSC) has taken off in the past few years and we are seeing increasing interest in the field, from both computational sciences and linguistics. Most of the research so far has focused on methods for modelling and detecting semantic change using large diachronic textual data, with the majority of the approaches employing neural embeddings. While methods that offer easy modelling of diachronic text are one of the main reasons for the spiking interest in LSC, neural models leave many aspects of the problem unsolved. The field has several open and complex challenges. In this chapter, we aim to describe the most important of these challenges and outline future directions.},
	booktitle    = {Computational approaches to semantic change / Tahmasebi, Nina, Borin, Lars, Jatowt, Adam, Yang, Xu, Hengchen, Simon (eds.)},
	author       = {Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik and Dubossarsky, Haim},
	year         = {2021},
	publisher    = {Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-98554-008-2},
	pages        = {341--372},
}

@incollection{jatowt-etal-2021-computational-307061,
	title        = {Computational approaches to lexical semantic change: Visualization systems and novel applications},
	abstract     = {The purpose of this chapter is to survey visualization and user interface solutions for understanding lexical semantic change as well as to survey a number of applications of techniques developed in computational analysis of lexical semantic change. We first overview approaches aiming to develop systems that support understanding semantic change in an interactive and visual way. It is generally accepted that computational techniques developed for analyzing and uncovering semantic change are beneficial to linguists, historians, sociologists, and practitioners in numerous related fields, especially within the humanities. However, quite a few non-professional users are equally interested in the histories of words. Developing interactive, visual, engaging, and easy-to-understand systems can help them to acquire relevant knowledge.

Second, we believe that other fields could benefit from the research outcomes of computational approaches to lexical semantic change. In general, properly representing the meaning of terms used in the past should be important for a range of natural language processing, information retrieval and other tasks that operate on old texts. In the latter part of the chapter, we then focus on current and potential applications related to computer and information science with the underlying question: “How can modeling semantic change benefit wider downstream applications in these disciplines?”},
	booktitle    = {Computational approaches to semantic change },
	author       = {Jatowt, Adam and Tahmasebi, Nina and Borin, Lars},
	year         = {2021},
	publisher    = { Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-96110-312-6},
	pages        = {311--339},
}

@incollection{tahmasebi-etal-2021-survey-307058,
	title        = {Survey of computational approaches to lexical semantic change detection},
	abstract     = {Our languages are in constant flux driven by external factors such as cultural, societal and technological changes, as well as by only partially understood internal motivations. Words acquire new meanings and lose old senses, new words are coined or borrowed from other languages and obsolete words slide into obscurity. Understanding the characteristics of shifts in the meaning and in the use of words
is useful for those who work with the content of historical texts, the interested general public, but also in and of itself.

The findings from automatic lexical semantic change detection and the models of diachronic conceptual change are also currently being incorporated in approaches for measuring document across-time similarity, information retrieval from long-term document archives, the design of OCR algorithms, and so on. In recent years we have seen a surge in interest in the academic community in computational methods and tools supporting inquiry into diachronic conceptual change and lexical replacement. This article provides a comprehensive survey of recent computational
techniques to tackle both.},
	booktitle    = {Computational approaches to semantic change / Nina Tahmasebi, Lars Borin, Adam Jatowt, Yang Xu, Simon Hengchen (eds.)  },
	author       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam},
	year         = {2021},
	publisher    = { Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-96110-312-6 },
	pages        = {1--91},
}

@misc{tahmasebi-etal-2022-proceedings-316661,
	title        = {Proceedings of the 3rd Workshop on Computational Approaches to Historical Language Change, May 26-27, 2022, Dublin, Ireland},
	author       = {Tahmasebi, Nina and Montariol, Syrielle and Kutuzov, Andrey and Hengchen, Simon and Dubossarsky, Haim and Borin, Lars},
	year         = {2022},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-955917-42-1},
}

@incollection{tahmasebi-dubossarsky-2023-computational-325543,
	title        = {Computational modeling of semantic change},
	abstract     = {In this chapter we provide an overview of computational modeling for semantic change using large and semi-large textual corpora. We aim to provide a key for the interpretation of relevant methods and evaluation techniques, and also provide insights into important aspects of the computational study of semantic change. We discuss the pros and cons of different classes of models with respect to the properties of the data from which one wishes to model semantic change, and which avenues are available to evaluate the results. This chapter is forthcoming as the book has not yet been published. },
	booktitle    = {Routledge Handbook of Historical Linguistics, 2nd edition},
	author       = {Tahmasebi, Nina and Dubossarsky, Haim},
	year         = {2023},
	publisher    = {Routledge},
}

@inProceedings{zhou-etal-2023-finer-325541,
	title        = {The Finer They Get: Combining Fine-Tuned Models For Better Semantic Change Detection},
	abstract     = {In this work we investigate the hypothesis that enriching contextualized models using fine-tuning tasks can improve their
capacity to detect lexical semantic change (LSC). We include tasks  aimed to capture both low-level linguistic information like part-of-speech tagging, as well as higher level (semantic) information.
 
Through a series of analyses we demonstrate that certain combinations of fine-tuning tasks, like sentiment, syntactic information, and logical inference, bring large improvements to standard LSC models that are based only on standard language modeling. We test on the binary classification and ranking tasks of SemEval-2020 Task 1 and evaluate using both permutation tests and under transfer-learning scenarios.},
	booktitle    = {24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
	author       = {Zhou, Wei and Tahmasebi, Nina and Dubossarsky, Haim},
	year         = {2023},
	publisher    = {Linköping University Electronic Press},
	ISBN         = {978-99-1621-999-7},
}

@inProceedings{ohlsson-etal-2023-going-329710,
	title        = {Going to the market together. A presentation of a
mixed methods project},
	booktitle    = {TwinTalks Workshop at DH2023, 10 July, Graz, Austria},
	author       = {Ohlsson, Claes and Virk, Shafqat and Tahmasebi, Nina},
	year         = {2023},
}

@misc{tahmasebi-etal-2023-proceedings-331093,
	title        = {Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change, LChange'23, December 6th, 2023, Singapore},
	abstract     = {Welcome to the 4th International Workshop on Computational Approaches to Historical Language Change (LChange’23) co-located with EMNLP 2023. LChange is held on December 6th, 2023, as a hybrid
event with participation possible both virtually and on-site in Singapore.

Characterizing the time-varying nature of language will have broad implications and applications in
multiple fields including linguistics, artificial intelligence, digital humanities, computational cognitive
and social sciences. In this workshop, we bring together the world’s pioneers and experts in computational approaches to historical language change with a focus on digital text corpora. In doing so, this workshop carries out the triple goals of disseminating state-of-the-art research on diachronic modeling of language change, fostering cross-disciplinary collaborations, and exploring the fundamental theoretical and methodological challenges in this growing niche of computational linguistic research.},
	author       = {Tahmasebi, Nina and Montariol, Syrielle and Dubossarsky, Haim and Kutuzov, Andrey and Hengchen, Simon and Alfter, David and Periti, Francesco and Cassotti, Pierluigi},
	year         = {2023},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {979-8-89176-043-1},
}

@inProceedings{berdicevskis-etal-2023-superlim-331445,
	title        = {Superlim: A Swedish Language Understanding Evaluation Benchmark},
	booktitle    = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, December 6-10, 2023, Singapore  / Houda Bouamor, Juan Pino, Kalika Bali (Editors)},
	author       = {Berdicevskis, Aleksandrs and Bouma, Gerlof and Kurtz, Robin and Morger, Felix and Öhman, Joey and Adesam, Yvonne and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Isbister, Tim and Lindahl, Anna and Malmsten, Martin and Rekathati, Faton and Sahlgren, Magnus and Volodina, Elena and Börjeson, Love and Hengchen, Simon and Tahmasebi, Nina},
	year         = {2023},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {979-8-89176-060-8},
	pages        = {8137--8153},
}
Sidansvarig: sb-webb