@inProceedings{tahmasebi-etal-2019-convergence-280684, title = {A Convergence of Methodologies: Notes on Data-Intensive Humanities Research}, abstract = {In this paper, we discuss a data-intensive research methodology for the digital humanities. We highlight the differences and commonalities between quantitative and qualitative research methodologies in relation to a data-intensive research process. We argue that issues of representativeness and reduction must be in focus for all phases of the process; from the status of texts as such, over their digitization topre-processing and methodological exploration.}, booktitle = {CEUR workshop proceedings ; 2364. Proceedings of the 4th Conference on Digital Humanities in the Nordic Countries, Copenhagen, Denmark, March 5-8, 2019}, editor = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard}, author = {Tahmasebi, Nina and Hagen, Niclas and Brodén, Daniel and Malm, Mats}, year = {2019}, publisher = {CEUR workshop proceedings}, address = {Aachen }, } @inProceedings{adesam-etal-2019-exploring-279948, title = {Exploring the Quality of the Digital Historical Newspaper Archive KubHist}, abstract = {The KubHist Corpus is a massive corpus of Swedish historical newspapers, digitized by the Royal Swedish library, and available through the Språkbanken corpus infrastructure Korp. This paper contains a first overview of the KubHist corpus, exploring some of the difficulties with the data, such as OCR errors and spelling variation, and discussing possible paths for improving the quality and the searchability.}, booktitle = {Proceedings of the 4th Conference of The Association Digital Humanities in the Nordic Countries (DHN), Copenhagen, Denmark, March 5-8, 2019}, editor = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard}, author = {Adesam, Yvonne and Dannélls, Dana and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, address = {Aachen}, } @inProceedings{rouces-etal-2019-tracking-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019. }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, address = {Aachen }, } @inProceedings{rouces-etal-2019-political-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019.}, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR }, address = {Aachen }, } @inProceedings{dubossarsky-etal-2019-time-295438, title = {Time for change: Evaluating models of semantic change without evaluation tasks}, booktitle = {Cambridge Language Sciences Annual Symposium 2019 : Perspectives on Language Change}, author = {Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik }, year = {2019}, } @misc{tahmasebi-etal-2019-proceedings-285886, title = {Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change, August 2, 2019, Florence, Italy}, author = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam and Xu, Yang}, year = {2019}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {978-1-950737-31-4}, } @inProceedings{dubossarsky-etal-2019-time-281304, title = {Time-Out: Temporal Referencing for Robust Modeling of Lexical Semantic Change }, abstract = {State-of-the-art models of lexical semantic change detection suffer from noise stemming from vector space alignment. We have empirically tested the Temporal Referencing method for lexical semantic change and show that, by avoiding alignment, it is less affected by this noise. We show that, trained on a diachronic corpus, the skip-gram with negative sampling architecture with temporal referencing outperforms alignment models on a synthetic task as well as a manual testset. We introduce a principled way to simulate lexical semantic change and systematically control for possible biases. }, booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, Florence, Italy, July 28 - August 2, 2019 / Anna Korhonen, David Traum, Lluís Màrquez (Editors)}, author = { Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik }, year = {2019}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {978-1-950737-48-2}, } @article{tahmasebi-hengchen-2019-strengths-291189, title = {The Strengths and Pitfalls of Large-Scale Text Mining for Literary Studies}, abstract = {This paper is an overview of the opportunities and challenges of using large-scale text mining to answer research questions that stem from the humanities in general and literature specifically. In this paper, we will discuss a data-intensive research methodology and how different views of digital text affect answers to research questions. We will discuss results derived from text mining, how these results can be evaluated, and their relation to hypotheses and research questions. Finally, we will discuss some pitfalls of computational literary analysis and give some pointers as to how these can be avoided.}, journal = {Samlaren : tidskrift för svensk litteraturvetenskaplig forskning}, author = {Tahmasebi, Nina and Hengchen, Simon}, year = {2019}, volume = {140}, pages = {198–227}, }