Hoppa till huvudinnehåll
Språkbanken Text är en avdelning inom Språkbanken.

BibTeX

@inProceedings{virk-etal-2024-enhancing-343103,
	title        = {Enhancing Swedish Parliamentary Data: Annotation, Accessibility, and Application in Digital Humanities},
	abstract     = {The Swedish bicameral parliament data presents a valuable textual resource that is of interest for many researches and scholars. The parliamentary texts offer many avenues for re- search including the study of how various af- fairs were run by governments over time. The Parliament proceedings are available in tex- tual format, but in their original form, they are noisy and unstructured and thus hard to explore and investigate. In this paper, we report the transformation of the raw bicameral parliament data (1867-1970) into a structured lexical re- source annotated with various word and doc- ument level attributes. The annotated data is then made searchable through two modern cor- pus infrastructure components which provide a wide array of corpus exploration, visualization, and comparison options. To demonstrate the practical utility of this resource, we present a case study examining the transformation of the concept of ’market’ over time from a tangible physical entity to an abstract idea.},
	booktitle    = {Association for Computational Linguistics (ACL)},
	author       = {Virk, Shafqat Mumtaz and Ohlsson, Claes  and Björck, Henrik and Tahmasebi, Nina and Runefelt, Leif },
	year         = {2024},
}

@inProceedings{schlechtweg-etal-2024-more-343019,
	title        = {More DWUGs: Extending and Evaluating Word Usage Graph Datasets in Multiple Languages},
	abstract     = {Word Usage Graphs (WUGs) represent human semantic proximity judgments for pairs of word uses in a weighted graph, which can be clustered to infer word sense clusters from simple pairwise word use judgments, avoiding the need for word sense definitions. SemEval-2020 Task 1 provided the first and to date largest manually annotated, diachronic WUG dataset. In this paper, we check the robustness and correctness of the annotations by continuing the SemEval annotation algorithm for two more rounds and comparing against an established annotation paradigm. Further, we test the reproducibility by resampling a new, smaller set of word uses from the SemEval source corpora and annotating them. Our work contributes to a better understanding of the problems and opportunities of the WUG annotation paradigm and points to future improvements.},
	booktitle    = {    Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing},
	author       = {Schlechtweg, Dominik and Cassotti, Pierluigi and Noble, Bill and Alfter, David and Schulte Im Walde, Sabine and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {Miami, Florida, USA},
	pages        = {14379–14393},
}

@inProceedings{periti-etal-2024-automatically-343018,
	title        = {Automatically Generated Definitions and their utility for Modeling Word Meaning},
	abstract     = {Modeling lexical semantics is a challenging task, often suffering from interpretability pitfalls. In this paper, we delve into the generation of dictionary-like sense definitions and explore their utility for modeling word meaning. We fine-tuned two Llama models and include an existing T5-based model in our evaluation. Firstly, we evaluate the quality of the generated definitions on existing English benchmarks, setting new state-of-the-art results for the Definition Generation task. Next, we explore the use of definitions generated by our models as intermediate representations subsequently encoded as sentence embeddings. We evaluate this approach on lexical semantics tasks such as the Word-in-Context, Word Sense Induction, and Lexical Semantic Change, setting new state-of-the-art results in all three tasks when compared to unsupervised baselines.},
	booktitle    = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing},
	author       = {Periti, Francesco  and Alfter, David and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {Miami, Florida, USA},
	pages        = {14008----14026},
}

@inProceedings{periti-etal-2024-trotr-343017,
	title        = {TRoTR: A Framework for Evaluating the Re-contextualization of Text Reuse},
	abstract     = {Current approaches for detecting text reuse do not focus on recontextualization, i.e., how the new context(s) of a reused text differs from its original context(s). In this paper, we propose a novel framework called TRoTR that relies on the notion of topic relatedness for evaluating the diachronic change of context in which text is reused. TRoTR includes two NLP tasks: TRiC and TRaC. TRiC is designed to evaluate the topic relatedness between a pair of recontextualizations. TRaC is designed to evaluate the overall topic variation within a set of recontextualizations. We also provide a curated TRoTR benchmark of biblical text reuse, human-annotated with topic relatedness. The benchmark exhibits an inter-annotator agreement of .811. We evaluate multiple, established SBERT models on the TRoTR tasks and find that they exhibit greater sensitivity to textual similarity than topic relatedness. Our experiments show that fine-tuning these models can mitigate such a kind of sensitivity.},
	booktitle    = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing},
	author       = {Periti, Francesco and Cassotti, Pierluigi and Montanelli, Stefano  and Tahmasebi, Nina and Schlechtweg, Dominik},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {Miami, Florida, USA},
	pages        = {13972–13990},
}

@inProceedings{berdicevskis-etal-2023-superlim-331445,
	title        = {Superlim: A Swedish Language Understanding Evaluation Benchmark},
	booktitle    = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, December 6-10, 2023, Singapore  / Houda Bouamor, Juan Pino, Kalika Bali (Editors)},
	author       = {Berdicevskis, Aleksandrs and Bouma, Gerlof and Kurtz, Robin and Morger, Felix and Öhman, Joey and Adesam, Yvonne and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Isbister, Tim and Lindahl, Anna and Malmsten, Martin and Rekathati, Faton and Sahlgren, Magnus and Volodina, Elena and Börjeson, Love and Hengchen, Simon and Tahmasebi, Nina},
	year         = {2023},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {979-8-89176-060-8},
	pages        = {8137--8153},
}

@inProceedings{schlechtweg-etal-2024-durel-336715,
	title        = {The DURel Annotation Tool: Human and Computational Measurement of Semantic Proximity, Sense Clusters and Semantic Change},
	abstract     = {We present the DURel tool implementing the annotation of semantic proximity between word uses into an online, open source interface. The tool supports standardized human annotation as well as computational annotation, building on recent advances with Word-in-Context models. Annotator judgments are clustered with automatic graph clustering techniques and visualized for analysis. This allows to measure word senses with simple and intuitive micro-task judgments between use pairs, requiring minimal preparation efforts. The tool offers additional functionalities to compare the agreement between annotators to guarantee the inter-subjectivity of the obtained judgments and to calculate summary statistics over the annotated data giving insights into sense frequency distributions, semantic variation or changes of senses over time.},
	booktitle    = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations, March 17-22, 2024, St. Julians, Malta. },
	author       = {Schlechtweg, Dominik and Virk, Shafqat and Sander, Pauline and Sköldberg, Emma and Theuer Linke, Lukas and  Zhang, Tuo and Tahmasebi, Nina and  Schulte im Walde, Sabine},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {979-8-89176-091-2},
}

@inProceedings{periti-etal-2024-(chat)gpt-337358,
	title        = {(Chat)GPT v BERT Dawn of Justice for Semantic Change Detection},
	abstract     = {In the universe of Natural Language Processing, Transformer-based language models like BERT and (Chat)GPT have emerged as lexical superheroes with great power to solve open research problems. In this paper, we specifically focus on the temporal problem of semantic change, and evaluate their ability to solve two diachronic extensions of the Word-in-Context (WiC) task: TempoWiC and HistoWiC. In particular, we investigate the potential of a novel, off-the-shelf technology like ChatGPT (and GPT) 3.5 compared to BERT, which represents a family of models that currently stand as the state-of-the-art for modeling semantic change. Our experiments represent the first attempt to assess the use of (Chat)GPT for studying semantic change. Our results indicate that ChatGPT performs significantly worse than the foundational GPT version. Furthermore, our results demonstrate that (Chat)GPT achieves slightly lower performance than BERT in detecting long-term changes but performs significantly worse in detecting short-term changes.},
	booktitle    = {Findings of the Association for Computational Linguistics: EACL 2024, March 17-22, 2024, St. Julian’s, Malta},
	author       = {Periti, Francesco and Dubossarsky, Haim and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {979-8-89176-093-6},
}

@incollection{tahmasebi-dubossarsky-2023-computational-325543,
	title        = {Computational modeling of semantic change},
	abstract     = {In this chapter we provide an overview of computational modeling for semantic change using large and semi-large textual corpora. We aim to provide a key for the interpretation of relevant methods and evaluation techniques, and also provide insights into important aspects of the computational study of semantic change. We discuss the pros and cons of different classes of models with respect to the properties of the data from which one wishes to model semantic change, and which avenues are available to evaluate the results. This chapter is forthcoming as the book has not yet been published. },
	booktitle    = {Routledge Handbook of Historical Linguistics, 2nd edition},
	author       = {Tahmasebi, Nina and Dubossarsky, Haim},
	year         = {2023},
	publisher    = {Routledge},
}

@article{periti-etal-2024-studying-340876,
	title        = {Studying word meaning evolution through incremental semantic shift detection},
	abstract     = {The study of semantic shift, that is, of how words change meaning as a consequence of social practices, events and political circumstances, is relevant in Natural Language Processing, Linguistics, and Social Sciences. The increasing availability of large diachronic corpora and advance in computational semantics have accelerated the development of computational approaches to detecting such shift. In this paper, we introduce a novel approach to tracing the evolution of word meaning over time. Our analysis focuses on gradual changes in word semantics and relies on an incremental approach to semantic shift detection (SSD) called What is Done is Done (WiDiD). WiDiD leverages scalable and evolutionary clustering of contextualised word embeddings to detect semantic shift and capture temporal transactions in word meanings. Existing approaches to SSD: (a) significantly simplify the semantic shift problem to cover change between two (or a few) time points, and (b) consider the existing corpora as static. We instead treat SSD as an organic process in which word meanings evolve across tens or even hundreds of time periods as the corpus is progressively made available. This results in an extremely demanding task that entails a multitude of intricate decisions. We demonstrate the applicability of this incremental approach on a diachronic corpus of Italian parliamentary speeches spanning eighteen distinct time periods. We also evaluate its performance on seven popular labelled benchmarks for SSD across multiple languages. Empirical results show that our results are comparable to state-of-the-art approaches, while outperforming the state-of-the-art for certain languages.},
	journal      = {Language Resources and Evaluation},
	author       = {Periti, Francesco and Picascia, Sergio and Montanelli, Stefano  and Ferrara, Alfio  and Tahmasebi, Nina},
	year         = {2024},
	pages        = {37},
}

@inProceedings{cassotti-etal-2024-computational-337360,
	title        = {Computational modeling of semantic change

},
	abstract     = {Languages change constantly over time, influenced by social, technological, cultural and political factors that affect how people express themselves. In particular, words can undergo the process of semantic change, which can be subtle and significantly impact the interpretation of texts. For example, the word terrific used to mean ‘causing terror’ and was as such synonymous to terrifying. Nowadays, speakers use the word in the sense of ‘excessive’ and even ‘amazing’. In Historical Linguistics, tools and methods have been developed to analyse this phenomenon, including systematic categorisations of the types of change, the causes and the mechanisms underlying the different types of change. However, traditional linguistic methods, while informative, are often based on small, carefully curated samples. Thanks to the availability of both large diachronic corpora, the computational means to model word meaning unsupervised, and evaluation benchmarks, we are seeing an increasing interest in the computational modelling of semantic change. This is evidenced by the increasing number of publications in this new domain as well as the organisation of initiatives and events related to this topic, such as four editions of the International Workshop on Computational Approaches to Historical Language Change LChange1, and several evaluation campaigns (Schlechtweg et al., 2020a; Basile et al., 2020b; Kutuzov et al.; Zamora-Reina et al., 2022).},
	booktitle    = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts},
	author       = {Cassotti, Pierluigi and Periti, Francesco and De Pascale, Stefano and Dubossarsky, Haim and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{periti-tahmasebi-2024-towards-339990,
	title        = {Towards a Complete Solution to Lexical Semantic Change: an Extension to Multiple Time Periods and Diachronic Word Sense Induction},
	abstract     = {Thus far, the research community has focused on a simplified computational modeling of semantic change between two time periods. This simplified view has served as a foundational block but is not a complete solution to the complex modeling of semantic change. Acknowledging the power of recent language models, we believe that now is the right time to extend the current modeling to multiple time periods and diachronic word sense induction. In this position paper, we outline several extensions of
the current modeling and discuss issues related to the extensions. },
	booktitle    = {Proceedings of the 5th Workshop on Computational Approaches to Historical Language Change, Aug 15, 2024, Bangkok, Thailand},
	author       = {Periti, Francesco and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {979-8-89176-138-4},
}

@inProceedings{noble-etal-2024-improving-339991,
	title        = {Improving Word Usage Graphs with Edge Induction},
	abstract     = {This paper investigates edge induction as a method for augmenting Word Usage Graphs, in which word usages (nodes) are connected through scores (edges) representing semantic relatedness. Clustering (densely) annotated WUGs can be used as a way to find senses of a word without relying on traditional word sense annotation. However, annotating all or a majority of pairs of usages is typically infeasible, resulting in sparse graphs and, likely, lower quality senses. In this paper, we ask if filling out WUGs with edges predicted from the human annotated edges improves the eventual clusters. We experiment with edge induction models that use structural features of the existing sparse graph, as well as those that exploit textual (distributional) features of the usages. We find that in both cases, inducing edges prior
to clustering improves correlation with human sense-usage annotation across three different clustering algorithms and languages.},
	booktitle    = {Proceedings of the 5th Workshop on Computational Approaches to Historical Language Change, August 15, 2024, Bangkok, Thailand},
	author       = {Noble, Bill and Periti, Francesco and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {979-8-89176-138-4},
}

@misc{tahmasebi-etal-2004-proceedings-339992,
	title        = {Proceedings of the 5th Workshop on Computational Approaches to Historical Language Change},
	abstract     = {Welcome to the 5th International Workshop on Computational Approaches to Historical Language Change (LChange’24) co-located with ACL 2024. LChange is held on August 15th, 2024, as a hybrid event
with participation possible both virtually and on-site in Thailand.},
	author       = {Tahmasebi, Nina and Montariol, Syrielle and Kutuzov, Andrey and Alfter, David and Cassotti, Pierluigi and Huebscher, Netta},
	year         = {2004},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{periti-etal-2024-analyzing-338831,
	title        = {Analyzing Semantic Change through Lexical Replacements},
	abstract     = {Modern language models are capable of contextualizing words based on their surrounding context. However, this capability is often compromised due to semantic change that leads to words being used in new, unexpected contexts not encountered during pre-training. In this paper, we model \textit{semantic change} by studying the effect of unexpected contexts introduced by lexical replacements. We propose a replacement schema where a target word is substituted with lexical replacements of varying relatedness, thus simulating different kinds of semantic change. Furthermore, we leverage the replacement schema as a basis for a novel interpretable model for semantic change. We are also the first to evaluate the use of LLaMa for semantic change detection. },
	booktitle    = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	author       = {Periti, Francesco  and Cassotti, Pierluigi and Dubossarsky, Haim  and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{cassotti-etal-2024-using-338833,
	title        = {Using Synchronic Definitions and Semantic Relations to Classify Semantic Change Types},
	abstract     = {There is abundant evidence of the fact that the way words change their meaning can be classified in different types of change, highlighting the relationship between the old and new meanings (among which generalization, specialization and co-hyponymy transfer). In this paper, we present a way of detecting these types of change by constructing a model that leverages information both from synchronic lexical relations and definitions of word meanings. Specifically, we use synset definitions and hierarchy information from WordNet and test it on a digitized version of Blank's (1997) dataset of semantic change types. Finally, we show how the sense relationships can improve models for both approximation of human judgments of semantic relatedness as well as binary Lexical Semantic Change Detection. },
	booktitle    = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	author       = {Cassotti, Pierluigi and De Pascale, Stefano and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{periti-tahmasebi-2024-systematic-337365,
	title        = {A Systematic Comparison of Contextualized Word Embeddings for Lexical Semantic Change},
	abstract     = {Contextualized embeddings are the preferred tool for modeling Lexical Semantic Change (LSC). Current evaluations typically focus on a specific task known as Graded Change Detection (GCD). However, performance comparison across work are often misleading due to their reliance on diverse settings. In this paper, we evaluate state-of-the-art models and approaches for GCD under equal conditions. We further break the LSC problem into Word-in-Context (WiC) and Word Sense Induction (WSI) tasks, and compare models across these different levels. Our evaluation is performed across different languages on eight available benchmarks for LSC, and shows that (i) APD outperforms other approaches for GCD; (ii) XL-LEXEME outperforms other contextualized models for WiC, WSI, and GCD, while being comparable to GPT-4; (iii) there is a clear need for improving the modeling of word meanings, as well as focus on how, when, and why these meanings change, rather than solely focusing on the extent of semantic change.},
	booktitle    = {Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), June 16-21, 2024, Mexico City, Mexico},
	author       = {Periti, Francesco and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {979-8-89176-114-8},
}

@article{nielbo-etal-2024-quantitative-337356,
	title        = {Quantitative text analysis},
	abstract     = {Text analysis has undergone substantial evolution since its inception, moving from manual qualitative assessments to sophisticated quantitative and computational methods. Beginning in the late twentieth century, a surge in the utilization of computational techniques reshaped the landscape of text analysis, catalysed by advances in computational power and database technologies. Researchers in various fields, from history to medicine, are now using quantitative methodologies, particularly machine learning, to extract insights from massive textual data sets. This transformation can be described in three discernible methodological stages: feature-based models, representation learning models and generative models. Although sequential, these stages are complementary, each addressing analytical challenges in the text analysis. The progression from feature-based models that require manual feature engineering to contemporary generative models, such as GPT-4 and Llama2, signifies a change in the workflow, scale and computational infrastructure of the quantitative text analysis. This Primer presents a detailed introduction of some of these developments, offering insights into the methods, principles and applications pertinent to researchers embarking on the quantitative text analysis, especially within the field of machine learning.},
	journal      = {Nature Reviews Methods Primers},
	author       = {Nielbo, Kristoffer L. and Karsdorp, Folgert and Wevers, Melvin and Lassche, Alie and Baglini, Rebekah B. and Kestemont, Mike and Tahmasebi, Nina},
	year         = {2024},
	volume       = {4},
	number       = {1},
}

@inProceedings{zhou-etal-2023-finer-325541,
	title        = {The Finer They Get: Combining Fine-Tuned Models For Better Semantic Change Detection},
	abstract     = {In this work we investigate the hypothesis that enriching contextualized models using fine-tuning tasks can improve their
capacity to detect lexical semantic change (LSC). We include tasks  aimed to capture both low-level linguistic information like part-of-speech tagging, as well as higher level (semantic) information.
 
Through a series of analyses we demonstrate that certain combinations of fine-tuning tasks, like sentiment, syntactic information, and logical inference, bring large improvements to standard LSC models that are based only on standard language modeling. We test on the binary classification and ranking tasks of SemEval-2020 Task 1 and evaluate using both permutation tests and under transfer-learning scenarios.},
	booktitle    = {24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
	author       = {Zhou, Wei and Tahmasebi, Nina and Dubossarsky, Haim},
	year         = {2023},
	publisher    = {Linköping University Electronic Press},
	ISBN         = {978-99-1621-999-7},
}

@inProceedings{ohlsson-etal-2023-going-329710,
	title        = {Going to the market together. A presentation of a
mixed methods project},
	booktitle    = {TwinTalks Workshop at DH2023, 10 July, Graz, Austria},
	author       = {Ohlsson, Claes and Virk, Shafqat and Tahmasebi, Nina},
	year         = {2023},
}

@misc{tahmasebi-etal-2023-proceedings-331093,
	title        = {Proceedings of the 4th Workshop on Computational Approaches to Historical Language Change, LChange'23, December 6th, 2023, Singapore},
	abstract     = {Welcome to the 4th International Workshop on Computational Approaches to Historical Language Change (LChange’23) co-located with EMNLP 2023. LChange is held on December 6th, 2023, as a hybrid
event with participation possible both virtually and on-site in Singapore.

Characterizing the time-varying nature of language will have broad implications and applications in
multiple fields including linguistics, artificial intelligence, digital humanities, computational cognitive
and social sciences. In this workshop, we bring together the world’s pioneers and experts in computational approaches to historical language change with a focus on digital text corpora. In doing so, this workshop carries out the triple goals of disseminating state-of-the-art research on diachronic modeling of language change, fostering cross-disciplinary collaborations, and exploring the fundamental theoretical and methodological challenges in this growing niche of computational linguistic research.},
	author       = {Tahmasebi, Nina and Montariol, Syrielle and Dubossarsky, Haim and Kutuzov, Andrey and Hengchen, Simon and Alfter, David and Periti, Francesco and Cassotti, Pierluigi},
	year         = {2023},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {979-8-89176-043-1},
}

@inProceedings{jatowt-etal-2018-every-272054,
	title        = {Every Word Has Its History: Interactive Exploration and Visualization of Word Sense Evolution},
	booktitle    = {CIKM '18 Proceedings of the 27th ACM International Conference on Information and Knowledge Management, October 22 - 26, 2018, Torino, Italy},
	author       = {Jatowt, Adam  and Campos, Ricardo and Bhowmick ,  Sourav S.  and Tahmasebi, Nina and Doucet, Antoine },
	year         = {2018},
	publisher    = {ACM},
	address      = {New York, NY, USA},
	ISBN         = {978-1-4503-6014-2},
}

@inProceedings{tahmasebi-risse-2017-uses-256649,
	title        = {On the Uses of Word Sense Change for Research in the Digital Humanities},
	abstract     = {With advances in technology and culture, our language changes. We invent new words, add or change meanings of existing words and change names of existing things. Unfortunately, our language does not carry a memory; words, expressions and meanings used in the past are forgotten over time. When searching and interpreting content from archives, language changes pose a great challenge. In this paper, we present results of automatic word sense change detection and show the utility for archive users as well as digital humanities’ research. Our method is able to capture changes that relate to the usage and culture of a word that cannot easily be found using dictionaries or other resources.},
	booktitle    = {Research and Advanced Technology for Digital Libraries - 21st International  Conference on Theory and Practice of Digital Libraries, TPDL 2017, Thessaloniki, Greece, September 18-21, 2017. Proceedings},
	editor       = {Jaap Kamps and Giannis Tsakonas and Yannis Manolopoulos and Lazaros Iliadis and Ioannis Karydis},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2017},
	publisher    = {Springer Verlag},
	address      = {Cham},
	ISBN         = {978-3-319-67007-2},
}

@inProceedings{adesam-etal-2018-exploring-273835,
	title        = {Exploring the Quality of the Digital Historical Newspaper Archive KubHist},
	booktitle    = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7-9 November 2018},
	author       = {Adesam, Yvonne and Dannélls, Dana and Tahmasebi, Nina},
	year         = {2018},
}

@misc{tahmasebi-etal-2022-proceedings-316661,
	title        = {Proceedings of the 3rd Workshop on Computational Approaches to Historical Language Change, May 26-27, 2022, Dublin, Ireland},
	author       = {Tahmasebi, Nina and Montariol, Syrielle  and Kutuzov, Andrey and Hengchen, Simon and Dubossarsky, Haim and Borin, Lars},
	year         = {2022},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-955917-42-1},
}

@inProceedings{tahmasebi-etal-2019-convergence-280684,
	title        = {A Convergence of Methodologies: Notes on Data-Intensive Humanities Research},
	abstract     = {In this paper, we discuss a data-intensive research methodology for the digital humanities. We highlight the differences and commonalities between quantitative and qualitative research methodologies in  relation  to  a  data-intensive  research  process.  We  argue  that  issues of  representativeness  and  reduction  must  be  in  focus  for  all  phases  of the process; from the status of texts as such, over their digitization topre-processing and methodological exploration.},
	booktitle    = {CEUR workshop proceedings ; 2364. Proceedings of the 4th Conference on Digital Humanities in the Nordic Countries, Copenhagen, Denmark, March 5-8, 2019},
	editor       = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard},
	author       = {Tahmasebi, Nina and Hagen, Niclas and Brodén, Daniel and Malm, Mats},
	year         = {2019},
	publisher    = {CEUR workshop proceedings},
	address      = {Aachen },
}

@inProceedings{adesam-etal-2019-exploring-279948,
	title        = {Exploring the Quality of the Digital Historical Newspaper Archive KubHist},
	abstract     = {The KubHist Corpus is a massive corpus of Swedish historical newspapers, digitized by the Royal Swedish library, and available through the Språkbanken corpus infrastructure Korp. This paper contains a first overview of the KubHist corpus, exploring some of the difficulties with the data, such as OCR errors and spelling variation, and discussing possible paths for improving the quality and the searchability.},
	booktitle    = {Proceedings of the 4th Conference of The Association Digital Humanities in the Nordic Countries (DHN), Copenhagen, Denmark, March 5-8, 2019},
	editor       = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard},
	author       = {Adesam, Yvonne and Dannélls, Dana and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen},
}

@inProceedings{rouces-etal-2019-tracking-281308,
	title        = {Tracking Attitudes Towards Immigration in Swedish Media},
	abstract     = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019. },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen },
}

@inProceedings{rouces-etal-2019-political-281307,
	title        = {Political Stance Analysis Using Swedish Parliamentary Data},
	abstract     = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR },
	address      = {Aachen },
}

@inProceedings{rouces-etal-2018-generating-264719,
	title        = {Generating a Gold Standard for a Swedish Sentiment Lexicon},
	abstract     = {We create a gold standard for sentiment annotation of Swedish terms, using the freely available SALDO lexicon and the Gigaword
corpus. For this purpose, we employ a multi-stage approach combining corpus-based frequency sampling, direct score annotation and
Best-Worst Scaling. In addition to obtaining a gold standard, we analyze the data from our process and we draw conclusions about the
optimal sentiment model.},
	booktitle    = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, May 7-12, 2018, Miyazaki (Japan)},
	author       = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-00-9},
}

@inProceedings{rouces-etal-2018-sensaldo-264720,
	title        = {SenSALDO: Creating a Sentiment Lexicon for Swedish},
	abstract     = {The natural language processing subfield known as sentiment analysis or opinion mining has seen an explosive expansion over the
last decade or so, and sentiment analysis has become a standard item in the NLP toolbox. Still, many theoretical and methodological
questions remain unanswered and resource gaps unfilled. Most work on automated sentiment analysis has been done on English and
a few other languages; for most written languages of the world, this tool is not available. This paper describes the development of an
extensive sentiment lexicon for written (standard) Swedish. We investigate different methods for developing a sentiment lexicon for
Swedish. We use an existing gold standard dataset for training and testing. For each word sense from the SALDO Swedish lexicon,
we assign a real value sentiment score in the range [-1,1] and produce a sentiment label. We implement and evaluate three methods:
a graph-based method that iterates over the SALDO structure, a method based on random paths over the SALDO structure and a
corpus-driven method based on word embeddings. The resulting sense-disambiguated sentiment lexicon (SenSALDO) is an open source
resource and freely available from Språkbanken, The Swedish Language Bank at the University of Gothenburg.},
	booktitle    = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, 7-12 May 2018, Miyazaki (Japan)},
	author       = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-00-9},
}

@inProceedings{tahmasebi-2018-study-264722,
	title        = {A Study on Word2Vec on a Historical Swedish Newspaper Corpus},
	abstract     = {Detecting word sense changes can be of great interest in
the field of digital humanities. Thus far, most investigations and automatic methods have been developed and carried out on English text and
most recent methods make use of word embeddings. This paper presents
a study on using Word2Vec, a neural word embedding method, on a
Swedish historical newspaper collection. Our study includes a set of 11
words and our focus is the quality and stability of the word vectors over
time. We investigate if a word embedding method like Word2Vec can be
effectively used on texts where the volume and quality is limited.},
	booktitle    = {CEUR Workshop Proceedings. Vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference, Helsinki Finland, March 7-9, 2018. Edited by  Eetu Mäkelä, Mikko Tolonen, Jouni Tuominen },
	author       = {Tahmasebi, Nina},
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@inProceedings{rouces-etal-2018-defining-264721,
	title        = {Defining a gold standard for a Swedish sentiment lexicon: Towards higher-yield text mining in the digital humanities},
	abstract     = {There is an increasing demand for multilingual sentiment analysis, and most work on
sentiment lexicons is still carried out based on English lexicons like WordNet. In addition, many
of the non-English sentiment lexicons that do exist have been compiled by (machine) translation
from English resources, thereby arguably obscuring possible language-specific characteristics
of sentiment-loaded vocabulary. In this paper we describe the creation from scratch of a gold
standard for the sentiment annotation of Swedish terms as a first step towards the creation of a
full-fledged sentiment lexicon for Swedish.},
	booktitle    = {CEUR Workshop Proceedings vol. 2084.  Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018.  Edited by  Eetu Mäkelä Mikko Tolonen Jouni Tuominen },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@inProceedings{dubossarsky-etal-2019-time-295438,
	title        = {Time for change: Evaluating models of semantic change without evaluation tasks},
	booktitle    = {Cambridge Language Sciences Annual Symposium 2019 : Perspectives on Language Change},
	author       = {Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik },
	year         = {2019},
}

@misc{tahmasebi-etal-2019-proceedings-285886,
	title        = {Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change, August 2, 2019, Florence, Italy},
	author       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam  and Xu, Yang},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-31-4},
}

@inProceedings{dubossarsky-etal-2019-time-281304,
	title        = {Time-Out: Temporal Referencing for Robust Modeling of Lexical Semantic Change },
	abstract     = {State-of-the-art models of lexical semantic change detection suffer from noise stemming from vector space alignment. We have empirically tested the Temporal Referencing method for lexical semantic change and show that, by avoiding alignment, it is less affected by this noise. We show that, trained on a diachronic corpus, the skip-gram with negative sampling architecture with temporal referencing outperforms alignment models on a synthetic task as well as a manual testset. We introduce a principled way to simulate lexical semantic change and systematically control for possible biases. },
	booktitle    = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, Florence, Italy, July 28 - August 2, 2019 / Anna Korhonen, David Traum, Lluís Màrquez (Editors)},
	author       = { Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik },
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-48-2},
}

@article{tahmasebi-hengchen-2019-strengths-291189,
	title        = {The Strengths and Pitfalls of Large-Scale Text Mining for Literary Studies},
	abstract     = {This paper is an overview of the opportunities and challenges of using large-scale text mining to answer research questions that stem from the humanities in general and literature specifically.  In  this  paper,  we  will  discuss  a  data-intensive  research  methodology  and  how  different  views of digital text affect answers to research questions. We will discuss results derived from text mining, how these results can be evaluated, and their relation to hypotheses and research questions. Finally, we will discuss some pitfalls of computational literary analysis and give some pointers as to how these can be avoided.},
	journal      = {Samlaren : tidskrift för svensk litteraturvetenskaplig forskning},
	author       = {Tahmasebi, Nina and Hengchen, Simon},
	year         = {2019},
	volume       = {140},
	pages        = {198–227},
}

@incollection{tahmasebi-etal-2021-survey-307058,
	title        = {Survey of computational approaches to lexical semantic change detection},
	abstract     = {Our languages are in constant flux driven by external factors such as cultural, societal and technological changes, as well as by only partially understood internal motivations. Words acquire new meanings and lose old senses, new words are coined or borrowed from other languages and obsolete words slide into obscurity. Understanding the characteristics of shifts in the meaning and in the use of words
is useful for those who work with the content of historical texts, the interested general public, but also in and of itself.

The findings from automatic lexical semantic change detection and the models of diachronic conceptual change are also currently being incorporated in approaches for measuring document across-time similarity, information retrieval from long-term document archives, the design of OCR algorithms, and so on. In recent years we have seen a surge in interest in the academic community in computational methods and tools supporting inquiry into diachronic conceptual change and lexical replacement. This article provides a comprehensive survey of recent computational
techniques to tackle both.},
	booktitle    = {Computational approaches to semantic change / Nina Tahmasebi, Lars Borin, Adam Jatowt, Yang Xu, Simon Hengchen (eds.)  },
	author       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam},
	year         = {2021},
	publisher    = { Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-96110-312-6 },
	pages        = {1--91},
}

@incollection{hengchen-etal-2021-challenges-306972,
	title        = {Challenges for computational lexical semantic change},
	abstract     = {The computational study of lexical semantic change (LSC) has taken off in the past few years and we are seeing increasing interest in the field, from both computational sciences and linguistics. Most of the research so far has focused on methods for modelling and detecting semantic change using large diachronic textual data, with the majority of the approaches employing neural embeddings. While methods that offer easy modelling of diachronic text are one of the main reasons for the spiking interest in LSC, neural models leave many aspects of the problem unsolved. The field has several open and complex challenges. In this chapter, we aim to describe the most important of these challenges and outline future directions.},
	booktitle    = {Computational approaches to semantic change / Tahmasebi, Nina, Borin, Lars, Jatowt, Adam, Yang, Xu, Hengchen, Simon (eds.)},
	author       = {Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik and Dubossarsky, Haim},
	year         = {2021},
	publisher    = {Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-98554-008-2},
	pages        = {341--372},
}

@incollection{jatowt-etal-2021-computational-307061,
	title        = {Computational approaches to lexical semantic change: Visualization systems and novel applications},
	abstract     = {The purpose of this chapter is to survey visualization and user interface solutions for understanding lexical semantic change as well as to survey a number of applications of techniques developed in computational analysis of lexical semantic change. We first overview approaches aiming to develop systems that support understanding semantic change in an interactive and visual way. It is generally accepted that computational techniques developed for analyzing and uncovering semantic change are beneficial to linguists, historians, sociologists, and practitioners in numerous related fields, especially within the humanities. However, quite a few non-professional users are equally interested in the histories of words. Developing interactive, visual, engaging, and easy-to-understand systems can help them to acquire relevant knowledge.

Second, we believe that other fields could benefit from the research outcomes of computational approaches to lexical semantic change. In general, properly representing the meaning of terms used in the past should be important for a range of natural language processing, information retrieval and other tasks that operate on old texts. In the latter part of the chapter, we then focus on current and potential applications related to computer and information science with the underlying question: “How can modeling semantic change benefit wider downstream applications in these disciplines?”},
	booktitle    = {Computational approaches to semantic change },
	author       = {Jatowt, Adam and Tahmasebi, Nina and Borin, Lars},
	year         = {2021},
	publisher    = { Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-96110-312-6},
	pages        = {311--339},
}

@edited_book{tahmasebi-etal-2021-computational-306968,
	title        = {Computational approaches to semantic change},
	abstract     = {Semantic change — how the meanings of words change over time — has preoccupied scholars since well before modern linguistics emerged in the late 19th and early 20th century, ushering in a new methodological turn in the study of language change. Compared to changes in sound and grammar, semantic change is the least  understood. Ever since, the study of semantic change has progressed steadily, accumulating a vast store of knowledge for over a century, encompassing many languages and language families.

Historical linguists also early on realized the potential of computers as research tools, with papers at the very first international conferences in computational linguistics in the 1960s. Such computational studies still tended to be small-scale, method-oriented, and qualitative. However, recent years have witnessed a sea-change in this regard. Big-data empirical quantitative investigations are now coming to the forefront, enabled by enormous advances in storage capability and processing power. Diachronic corpora have grown beyond imagination, defying exploration by traditional manual qualitative methods, and language technology has become increasingly data-driven and semantics-oriented. These developments present a golden opportunity for the empirical study of semantic change over both long and short time spans.

A major challenge presently is to integrate the hard-earned  knowledge and expertise of traditional historical linguistics with  cutting-edge methodology explored primarily in computational linguistics.

The idea for the present volume came out of a concrete response to this challenge.  The 1st International Workshop on Computational Approaches to Historical Language Change (LChange'19), at ACL 2019, brought together scholars from both fields.

This volume offers a survey of this exciting new direction in the study of semantic change, a discussion of the many remaining challenges that we face in pursuing it, and considerably updated and extended versions of a selection of the contributions to the LChange'19 workshop, addressing both more theoretical problems —  e.g., discovery of "laws of semantic change" — and practical applications, such as information retrieval in longitudinal text archives.},
	editor       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam and Xu, Yang and Hengchen, Simon},
	year         = {2021},
	publisher    = {Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-98554-008-2},
}

@inProceedings{hengchen-tahmasebi-2021-supersim-305157,
	title        = {SuperSim: a test set for word similarity and relatedness in Swedish},
	abstract     = {Language models are notoriously difficult to evaluate. 
We release SuperSim, a large-scale similarity and relatedness test set for Swedish built with expert human judgments. The test set is composed of 1,360 word-pairs independently judged for both relatedness and similarity by five annotators. We evaluate three different models (Word2Vec, fastText, and GloVe) trained on two separate Swedish datasets, namely the Swedish Gigaword corpus and a Swedish Wikipedia dump, to provide a baseline for future comparison. 
We release the fully annotated test set, code, baseline models, and data.},
	booktitle    = {Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa), May 31-June 2 2021, Reykjavik, Iceland (online)},
	author       = {Hengchen, Simon and Tahmasebi, Nina},
	year         = {2021},
	publisher    = {Linköping Electronic Conference Proceedings},
	address      = {Linköping},
	ISBN         = {978-91-7929-614-8},
}

@article{hengchen-tahmasebi-2021-collection-301262,
	title        = {A Collection of Swedish Diachronic Word Embedding Models Trained on Historical Newspaper Data},
	abstract     = {This paper describes the creation of several word embedding models based on a large collection of diachronic Swedish newspaper material available through Språkbanken Text, the Swedish language bank. This data was produced in the context of Språkbanken Text’s continued mission to collaborate with humanities and natural language processing (NLP) researchers and to provide freely available language resources, for the development of state-of-the-art NLP methods and tools.},
	journal      = {Journal of Open Humanities Data},
	author       = {Hengchen, Simon and Tahmasebi, Nina},
	year         = {2021},
	volume       = {7},
	number       = {2},
	pages        = {1--7},
}

@inProceedings{rouces-etal-2020-creating-290695,
	title        = {Creating an Annotated Corpus for Aspect-Based Sentiment Analysis in Swedish},
	abstract     = {Aspect-Based Sentiment Analysis constitutes a more fine-grained alternative to traditional sentiment analysis at sentence level. In addition to a sentiment value denoting how positive or negative a particular opinion or sentiment expression is, it identifies additional aspects or 'slots' that characterize the opinion. Some typical aspects are target and source, i.e. who holds the opinion and about which entity or aspect is the opinion. We present a large Swedish corpus annotated for Aspect-Based Sentiment Analysis. Each sentiment expression is annotated as a tuple that contains the following fields: one among 5 possible sentiment values, the target, the source, and whether the sentiment expressed is ironic.  In addition, the linguistic element that conveys the sentiment is identified too. Sentiment for a particular topic is also annotated at title, paragraph and document level.
The documents are articles obtained from two Swedish media (Svenska Dagbladet and Aftonbladet) and one online forum (Flashback), totalling around 4000 documents. The corpus is freely available and we plan to use it for training and testing an Aspect-Based Sentiment Analysis system.},
	booktitle    = {Proceedings of the 5th conference in Digital Humanities in the Nordic Countries, Riga, Latvia, October 21-23, 2020.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {CEUR Workshop Proceedings},
}

@inProceedings{schlechtweg-etal-2020-semeval-295463,
	title        = {SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {Lexical Semantic Change detection, i.e., the task of identifying words that change meaning over time, is a very active research area, with applications in NLP, lexicography, and linguistics. Evaluation is currently the most pressing problem in Lexical Semantic Change detection, as no gold standards are available to the community, which hinders progress. We present the results of the first shared task that addresses this gap by providing researchers with an evaluation framework and manually annotated, high-quality datasets for English, German, Latin, and Swedish. 33 teams submitted 186 systems, which were evaluated on two subtasks. },
	booktitle    = {Proceedings of the Fourteenth Workshop on Semantic Evaluation (SemEval2020), Barcelona, Spain (Online), December 12, 2020.},
	author       = {Schlechtweg, Dominik and McGillivray, Barbara  and Hengchen, Simon and Dubossarsky, Haim  and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {ACL},
}

@misc{schlechtweg-etal-2020-post-295466,
	title        = {Post-Evaluation Data for SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {This data collection contains the post-evaluation data for SemEval-2020 Task 1: Unsupervised Lexical Semantic Change Detection: (1) the starting kit to download data, and examples for competing in the CodaLab challenge including baselines; (2) the true binary change scores of the targets for Subtask 1, and their true graded change scores for Subtask 2 (test_data_truth/); (3)the scoring program used to score submissions against the true test data in the evaluation and post-evaluation phase (scoring_program/); and (4) the results of the evaluation phase including, for example, analysis plots (plots/) displaying the results:},
	author       = {Schlechtweg, Dominik and McGillivray, Barbara and Hengchen, Simon and Dubossarsky, Haim and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {Zenodo},
}

@misc{tahmasebi-etal-2020-swedish-295465,
	title        = {Swedish Test Data for SemEval 2020 Task 1: Unsupervised Lexical Semantic Change Detection},
	abstract     = {This data collection contains the Swedish test data for SemEval 2020 Task 1: Unsupervised Lexical Semantic Change Detection. It consists of a Swedish text corpus pair (corpus1/, corpus2/) and 31 lemmas which have been annotated for their lexical semantic change between the two corpora (targets.txt). We sample from the KubHist2 corpus, digitized by the National Library of Sweden, and available through the Språkbanken corpus infrastructure Korp (Borin et al., 2012). The full corpus is available through a CC BY (attribution) license. Each word for which the lemmatizer in the Korp pipeline has found a lemma is replaced with the lemma. In cases where the lemmatizer cannot find a lemma, we leave the word as is (i.e., unlemmatized, no lower-casing). KubHist contains very frequent OCR errors, especially for the older data.More detail about the properties and quality of the Kubhist corpus can be found in (Adesam et al., 2019).},
	author       = {Tahmasebi, Nina and Hengchen, Simon and Schlechtweg, Dominik and McGillivray, Barbara and Dubossarsky, Haim},
	year         = {2020},
}

@inProceedings{abualhajia-etal-2017-parameter-256642,
	title        = {Parameter Transfer across Domains for Word Sense Disambiguation},
	abstract     = {Word  sense  disambiguation  is  defined  as finding the corresponding sense for a target word in a given context,  which comprises  a  major  step  in  text  applications. Recently, it has been addressed as an optimization problem.  The idea behind is to find a sequence of senses that corresponds
to the words in a given context with a maximum semantic similarity.  Metaheuristics like simulated annealing and D-Bees provide approximate good-enough solutions, but are usually influenced by the starting parameters. In this paper, we study the parameter tuning for both algorithms within the  word  sense  disambiguation  problem. The experiments are conducted on different datasets to cover different disambiguation scenarios. We show that D-Bees is robust and less sensitive towards the initial parameters compared to simulated annealing,  hence,  it is sufficient to tune the parameters once and reuse them for different datasets, domains or languages.},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing Meet Deep Learning, Varna, Bulgaria 2–8 September 2017 / Edited by Galia Angelova, Kalina Bontcheva, Ruslan Mitkov, Ivelina  Nikolova, Irina Temnikova  },
	author       = {Abualhajia, Sallam and Tahmasebi, Nina and Forin, Diane  and Zimmermann, Karl-Heinz},
	year         = {2017},
	ISBN         = { 978-954-452-048-9},
}

@inProceedings{borin-etal-2017-clarin-261157,
	title        = {Swe-Clarin: Language resources and technology for Digital Humanities},
	abstract     = {CLARIN is a European Research Infrastructure Consortium (ERIC), which aims at (a) making extensive language-based materials available as primary research data to the humanities and social sciences (HSS); and (b) offering state-of-the-art language technology (LT) as an e-research tool for this purpose, positioning CLARIN centrally in what is often referred to as the digital humanities (DH). The Swedish CLARIN node Swe-Clarin was established in 2015 with funding from the Swedish Research Council.

In this paper, we describe the composition and activities of Swe-Clarin, aiming at meeting the requirements of all HSS and other researchers whose research involves using text and speech as primary research data, and spreading the awareness of what Swe-Clarin can offer these research communities. We focus on one of the central means for doing this: pilot projects conducted in collaboration between HSS researchers and Swe-Clarin, together formulating a research question, the addressing of which requires working with large language-based materials. Four such pilot projects are described in more detail, illustrating research on rhetorical history, second-language acquisition, literature, and political science. A common thread to these projects is an aspiration to meet the challenge of conducting research on the basis of very large amounts of textual data in a consistent way without losing sight of the individual cases making up the mass of data, i.e., to be able to move between Moretti’s “distant” and “close reading” modes. 

While the pilot projects clearly make substantial contributions to DH, they also reveal some needs for more development, and in particular a need for document-level access to the text materials. As a consequence of this, work has now been initiated in Swe-Clarin to meet this need, so that Swe-Clarin together with HSS scholars investigating intricate research questions can take on the methodological challenges of big-data language-based digital humanities.},
	booktitle    = {Digital Humanities 2016. Extended Papers of the International Symposium on Digital Humanities (DH 2016) Växjö, Sweden, November, 7-8, 2016.  Edited by Koraljka Golub, Marcelo Milra.  Vol-2021},
	author       = {Borin, Lars and Tahmasebi, Nina and Volodina, Elena and Ekman, Stefan and Jordan, Caspar and Viklund, Jon and Megyesi, Beáta and Näsman, Jesper and Palmér, Anne and Wirén, Mats and Björkenstam, Kristina and Grigonyte, Gintare and Gustafson Capková, Sofia and Kosiński, Tomasz},
	year         = {2017},
	publisher    = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen.},
	address      = {Aachen},
}

@misc{tidemann-tahmasebi-2017-proceedings-264302,
	title        = {Proceedings of the 21st Nordic Conference on Computational Linguistics, NODALIDA 2017, Gothenburg, Sweden, May 22-24, 2017
},
	author       = {Tidemann, Jörg and Tahmasebi, Nina},
	year         = {2017},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {978-91-7685-601-7},
}

@inProceedings{tahmasebi-risse-2017-finding-256637,
	title        = {Finding Individual Word Sense Changes and their Delay in Appearance},
	abstract     = {We  present  a  method  for  detecting  word sense  changes  by  utilizing  automatically
induced word senses.  Our method works on  the  level  of  individual  senses  and  allows a word to have  e.g. one stable sense and then add a novel sense that later experiences  change.
Senses  are  grouped based on polysemy to find linguistic concepts and we can find broadening and narrowing as well as novel (polysemous and homonymic)  senses. We  evaluate  on  a testset, present recall and estimates of the time between expected and found change.},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing 2017. Varna, Bulgaria 2–8 September, 2017},
	editor       = {Galia Angelova and Kalina Bontcheva and Ruslan Mitkov and Ivelina Nikolova and Irina Temnikova},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2017},
	ISBN         = {978-954-452-048-9},
}

@inProceedings{ahlberg-etal-2015-case-217988,
	title        = {A case study on supervised classification of Swedish pseudo-coordination},
	abstract     = {We present a case study on supervised classification of Swedish pseudo-coordination (SPC). The classification is attempted on the type-level with data collected from two data sets: a blog corpus and a fiction corpus. Two small experiments were designed to evaluate the feasability of this task. The first experiment explored a classifier’s ability to discriminate pseudo-coordinations from ordinary verb coordinations, given a small labeled data set created during the experiment. The second experiment evaluated how well the classifier performed at detecting and ranking SPCs in a set of unlabeled verb coordinations, to investigate if it could be used as a semi-automatic discovery procedure to find new SPCs.},
	booktitle    = {Proceedings of the 20th Nordic Conference of Computational Linguistics, NODALIDA 2015, May 11-13, 2015, Vilnius, Lithuania},
	author       = {Ahlberg, Malin and Andersson, Peter and Forsberg, Markus and Tahmasebi, Nina},
	year         = {2015},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7519-098-3},
}

@inProceedings{r?dveneide-etal-2016-swedish-250073,
	title        = {The Swedish Culturomics Gigaword Corpus: A One Billion Word Swedish Reference Dataset for NLP},
	abstract     = {In this paper we present a dataset of contemporary Swedish containing one billion words. The dataset consists of a wide range of sources, all annotated using a state-of-the-art corpus annotation pipeline, and is intended to be a static and clearly versioned dataset. This will facilitate reproducibility of experiments across institutions and make it easier to compare NLP algorithms on contemporary Swedish. The dataset contains sentences from 1950 to 2015 and has been carefully designed to feature a good mix of genres balanced over each included decade. The sources include literary, journalistic, academic and legal texts, as well as blogs and web forum entries.},
	booktitle    = {Linköping Electronic Conference Proceedings. Digital Humanities 2016. From Digitization to Knowledge 2016: Resources and Methods for Semantic Processing of Digital Works/Texts, July 11, 2016, Krakow, Poland},
	author       = {Rødven-Eide, Stian  and Tahmasebi, Nina and Borin, Lars},
	year         = {2016},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-733-5},
}

@inProceedings{tahmasebi-risse-2013-role-191616,
	title        = {The Role of Language Evolution in Digital Archives},
	abstract     = {With advancements in technology and culture, our language
changes. We invent new words, add or change meanings of existing words and change names of existing things. Left untackled, these changes in language create a gap between the language known by users and the language stored in our digital archives. In particular, they affect our possibility to firstly find and content and secondly interpret that content. In this paper we discuss the limitations brought on by language evolution and existing methodology for automatically finding evolution. We discuss measured needed in the near future to ensure semantically accessible digital archives for long-term preservation.},
	booktitle    = {3rd International Workshop on Semantic Digital Archives, SDA 2013 - Co-located with 17th International Conference on Theory and Practice of Digital Libraries, TPDL 2013; Valetta; Malta; 26 September 2013},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2013},
	pages        = {16--27},
}

@inProceedings{holzmann-etal-2013-blogneer-191617,
	title        = {BlogNEER: Applying Named Entity Evolution Recognition on the Blogosphere},
	abstract     = {The introduction of Social Media allowed more people to publish texts by removing barriers that are technical but also social such as the editorial controls that exist in traditional media. The resulting language tends to be more like spoken language because people adapt their use to the medium. Since spoken language is more dynamic, more new and short lived terms are introduced also in written format on the Web. In teTahmasebi2012 we presented an unsupervised method for Named Entity Evolution Recognition (NEER) to find name changes in newspaper collections. In this paper we present BlogNEER, an extension to apply NEER on blog data. The language used in blogs is often closer to spoken language than to language used in traditional media. BlogNEER introduces a novel semantic filtering method that makes use of Semantic Web resources (i.e., DBpedia) to gain more information about terms. We present the approach of BlogNEER and initial results that show the potentials of the approach. },
	booktitle    = {3rd International Workshop on Semantic Digital Archives, SDA 2013 - Co-located with 17th International Conference on Theory and Practice of Digital Libraries, TPDL 2013; Valetta; Malta; 26 September 2013 },
	author       = {Holzmann, Helge and Tahmasebi, Nina and Risse, Thomas},
	year         = {2013},
	volume       = {1091},
	pages        = {28--39},
}

@article{tahmasebi-etal-2015-visions-212969,
	title        = {Visions and open challenges for a knowledge-based culturomics},
	abstract     = {The concept of culturomics was born out of the availability of massive amounts of textual data and the interest to make sense of cultural and language phenomena over time. Thus far however, culturomics has only made use of, and shown the great potential of, statistical methods. In this paper, we present a vision for a knowledge-based culturomics that complements traditional culturomics. We discuss the possibilities and challenges of combining knowledge-based methods with statistical methods and address major challenges that arise due to the nature of the data; diversity of sources, changes in language over time as well as temporal dynamics of information in general. We address all layers needed for knowledge-based culturomics, from natural language processing and relations to summaries and opinions.},
	journal      = {International Journal on Digital Libraries},
	author       = {Tahmasebi, Nina and Borin, Lars and Capannini, Gabriele and Dubhashi, Devdatt and Exner, Peter and Forsberg, Markus and Gossen, Gerhard and Johansson, Fredrik and Johansson, Richard and Kågebäck, Mikael and Mogren, Olof and Nugues, Pierre and Risse, Thomas},
	year         = {2015},
	volume       = {15},
	number       = {2-4},
	pages        = {169--187},
}

@inProceedings{nusko-etal-2016-building-238135,
	title        = {Building a Sentiment Lexicon for Swedish},
	abstract     = {In this paper we will present our ongoing project to build and evaluate a sentiment lexicon for Swedish. Our main resource is SALDO, a lexical resource of modern Swedish developed at Språkbanken, University of Gothenburg. Using a semi-supervised approach, we expand a manually chosen set of six core words using parent-child relations based on the semantic network structure of SALDO. At its current stage the lexicon consists of 175 seeds, 633 children, and 1319 grandchildren.},
	booktitle    = {Linköping Electronic Conference Proceedings},
	author       = {Nusko, Bianka and Tahmasebi, Nina and Mogren, Olof},
	year         = {2016},
	volume       = {126},
	number       = {006},
	ISBN         = {978-91-7685-733-5},
	pages        = {32----37},
}

@inProceedings{tahmasebi-etal-2016-clarin-233899,
	title        = {SWE-CLARIN – the Swedish CLARIN project – aims and activities},
	booktitle    = {Digital Humanities in the Nordic countries, Oslo, March 15-17 2016},
	author       = {Tahmasebi, Nina and Borin, Lars and Jordan, Caspar and Ekman, Stefan},
	year         = {2016},
	pages        = {122--123},
}

@article{holzmann-etal-2015-named-209780,
	title        = {Named entity evolution recognition on the Blogosphere},
	abstract     = {Advancements in technology and culture lead to changes in our language. These changes create a gap between the language known by users and the language stored in digital archives. It affects user’s possibility to firstly find content and secondly interpret that content. In a previous work, we introduced our approach for named entity evolution recognition (NEER) in newspaper collections. Lately, increasing efforts in Web preservation have led to increased availability of Web archives covering longer time spans. However, language on the Web is more dynamic than in traditional media and many of the basic assumptions from the newspaper domain do not hold for Web data. In this paper we discuss the limitations of existing methodology for NEER. We approach these by adapting an existing NEER method to work on noisy data like the Web and the Blogosphere in particular. We develop novel filters that reduce the noise and make use of Semantic Web resources to obtain more information about terms. Our evaluation shows the potentials of the proposed approach.},
	journal      = {International Journal on Digital Libraries},
	author       = {Holzmann, Helge and Tahmasebi, Nina and Risse, Thomas},
	year         = {2015},
	volume       = {15},
	number       = {2-4},
	pages        = {209--235},
}

@inProceedings{kageback-etal-2014-extractive-210878,
	title        = {Extractive Summarization using Continuous Vector Space Models},
	abstract     = {Automatic summarization can help users extract the most important pieces of information from the vast amount of text digitized into electronic form everyday. Central to automatic summarization is the notion of similarity between sentences in text. In this paper we propose the use of continuous vector representations for semantically aware representations of sentences as a basis for measuring similarity. We evaluate different compositions
for sentence representation on a standard dataset using the ROUGE evaluation measures. Our experiments show that the evaluated methods improve the performance of a state-of-the-art summarization framework and strongly indicate the benefits of continuous word vector representations for automatic summarization.},
	booktitle    = {Proceedings of the 2nd Workshop on Continuous Vector Space Models and their Compositionality (CVSC) EACL, April 26-30, 2014 Gothenburg, Sweden},
	author       = {Kågebäck, Mikael and Mogren, Olof and Tahmasebi, Nina and Dubhashi, Devdatt},
	year         = {2014},
	ISBN         = {978-1-937284-94-7},
	pages        = {31--39},
}

@book{tahmasebi-2013-models-210879,
	title        = {Models and Algorithms for Automatic Detection of Language Evolution},
	author       = {Tahmasebi, Nina},
	year         = {2013},
	publisher    = {Gottfried Wilhelm Leibniz Universität Hannover},
	address      = {Hannover, Tyskland},
}

@inProceedings{demidova-etal-2013-analysing-191624,
	title        = {Analysing Entities, Topics and Events in Community Memories. },
	abstract     = {his paper briefly describes the components of the ARCOMEM architecture concerned with the extraction, enrichment, consolidation and dynamics analysis of entities, topics and events, deploying text mining, NLP, and semantic data integration technologies. In particular, we focus on four main areas relevant to support the ARCOMEM requirements and use cases: (a) entity and event extraction from text; (b) entity and event enrichment and consolidation; (c) topic
detection and dynamics; and (d) temporal aspects and dynamics detection in Web language and online social networks.},
	booktitle    = {Proc. of the first International Workshop on Archiving Community Memories},
	author       = {Demidova, Elena and Barbieri, N. and Dietze, Stefan and Funk, Adam and Gossen, Gerhard and Maynard, Diana and Papailiou, N. and Plachouras, V. and Peters, W. and Stavrakas, Y. and Risse, Thomas and Tahmasebi, Nina},
	year         = {2013},
}

@inProceedings{spiliotopoulos-etal-2013-2013-191622,
	title        = {SMS 2013 PC co-chairs message},
	abstract     = {The SMS workshop 2013 on Social Media Semantics was held this year in the context of the OTM ("OnTheMove") federated conferences, covering different aspects of distributed information systems in September 2013 in Graz. The topic of the workshop is about semantics in Social Media. The SocialWeb has become the first and main medium to get and spread information. Everyday news is reported instantly, and social media has become a major source for broadcasters, news reporters and political analysts as well as a place of interaction for everyday people. For a full utilization of this medium, information must be gathered, analyzed and semantically understood. In this workshop we ask the question: how can Semantic Web technologies be used to provide the means for interested people to draw conclusions, assess situations and to preserve their findings for future use? © 2013 Springer-Verlag.},
	booktitle    = {Lecture Notes in Computer Science},
	author       = {Spiliotopoulos, D. and Risse, T. and Tahmasebi, Nina},
	year         = {2013},
	ISBN         = {9783642410321},
}