Skip to main content

BibTeX

@inProceedings{adesam-berdicevskis-2021-part-304973,
	title        = {Part-of-speech tagging of Swedish texts in the neural era},
	booktitle    = {Proceedings of the 23rd Nordic Conference on Computational Linguistics, NoDaLiDa, May 31–2 June, 2021, Reykjavik, Iceland (online) / eds Simon Dobnik and Lilja Øvrelid},
	author       = {Adesam, Yvonne and Berdicevskis, Aleksandrs},
	year         = {2021},
	publisher    = { Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = { 978-91-7929-614-8},
}

@book{alfter-2021-exploring-304548,
	title        = {Exploring natural language processing for single-word and multi-word lexical complexity from a second language learner perspective},
	abstract     = {In this thesis, we investigate how natural language processing (NLP) tools and techniques can be applied to vocabulary aimed at second language learners of Swedish in order to classify vocabulary items into different proficiency levels suitable for learners of different levels.  

In the first part, we use feature-engineering to represent words as vectors and feed these vectors into machine learning algorithms in order to (1) learn CEFR labels from the input data and (2) predict the CEFR level of unseen words.
Our experiments corroborate the finding that feature-based classification models using 'traditional' machine learning still outperform deep learning architectures in the task of deciding how complex a word is. 

In the second part, we use crowdsourcing as a technique to generate ranked lists of multi-word expressions using both experts and non-experts (i.e. language learners). Our experiment shows that non-expert and expert rankings are highly correlated, suggesting that non-expert intuition can be seen as on-par with expert knowledge, at least in the chosen experimental configuration.

The main practical output of this research comes in two forms: prototypes and resources. We have implemented various prototype applications for (1) the automatic prediction of words based on the feature-engineering machine learning method, (2) language learning applications using graded word lists, and (3) an annotation tool for the manual annotation of expressions across a variety of linguistic factors.},
	author       = {Alfter, David},
	year         = {2021},
	publisher    = {Göteborgs universitet},
	ISBN         = {978-91-87850-79-0},
}

@article{antonsson-etal-2021-using-301490,
	title        = {Using a Discourse Task to Explore Semantic Ability in Persons With Cognitive Impairment.},
	abstract     = {This paper uses a discourse task to explore aspects of semantic production in persons with various degree of cognitive impairment and healthy controls. The purpose of the study was to test if an in-depth semantic analysis of a cognitive-linguistic challenging discourse task could differentiate persons with a cognitive decline from those with a stable cognitive impairment. Both quantitative measures of semantic ability, using tests of oral lexical retrieval, and qualitative analysis of a narrative were used to detect semantic difficulties. Besides group comparisons a classification experiment was performed to investigate if the discourse features could be used to improve classification of the participants who had a stable cognitive impairment from those who had cognitively declined. In sum, both types of assessment methods captured difficulties between the groups, but tests of oral lexical retrieval most successfully differentiated between the cognitively stable and the cognitively declined group. Discourse features improved classification accuracy and the best combination of features discriminated between participants with a stable cognitive impairment and those who had cognitively declined with an area under the curve (AUC) of 0.93.},
	journal      = {Frontiers in aging neuroscience},
	author       = {Antonsson, Malin and Lundholm Fors, Kristina and Eckerström, Marie and Kokkinakis, Dimitrios},
	year         = {2021},
	volume       = {12},
}

@article{basirat-etal-2021-empirical-302492,
	title        = {An empirical study on the contribution of formal and semantic features to the grammatical gender of nouns},
	abstract     = {This study conducts an experimental evaluation of two hypotheses about the contributions of formal and semantic features to the grammatical gender assignment of nouns. One of the hypotheses (Corbett and Fraser 2000) claims that semantic features dominate formal ones. The other hypothesis, formulated within the optimal gender assignment theory (Rice 2006), states that form and semantics contribute equally. Both hypotheses claim that the combination of formal and semantic features yields the most accurate gender identification. In this paper, we operationalize and test these hypotheses by trying to predict grammatical gender using only character-based embeddings (that capture only formal features), only context-based embeddings (that capture only semantic features) and the combination of both. We performed the experiment using data from three languages with different gender systems (French, German and Russian). Formal features are a significantly better predictor of gender than semantic ones, and the difference in prediction accuracy is very large. Overall, formal features are also significantly better than the combination of form and semantics, but the difference is very small and the results for this comparison are not entirely consistent across languages.},
	journal      = {Linguistics Vanguard},
	author       = {Basirat, Ali and Allassonnière-Tang, Marc and Berdicevskis, Aleksandrs},
	year         = {2021},
	volume       = {7},
	number       = {1},
}

@article{ehret-etal-2021-meaning-304914,
	title        = {Meaning and Measures: Interpreting and Evaluating Complexity Metrics},
	journal      = {Frontiers in communication},
	author       = {Ehret, Katharina and Blumenthal-Dramé, Alice and Bentz, Christian and Berdicevskis, Aleksandrs},
	year         = {2021},
	volume       = {6},
}

@article{hengchen-tahmasebi-2021-collection-301262,
	title        = {A Collection of Swedish Diachronic Word Embedding Models Trained on Historical Newspaper Data},
	abstract     = {This paper describes the creation of several word embedding models based on a large collection of diachronic Swedish newspaper material available through Språkbanken Text, the Swedish language bank. This data was produced in the context of Språkbanken Text’s continued mission to collaborate with humanities and natural language processing (NLP) researchers and to provide freely available language resources, for the development of state-of-the-art NLP methods and tools.},
	journal      = {Journal of Open Humanities Data},
	author       = {Hengchen, Simon and Tahmasebi, Nina},
	year         = {2021},
	volume       = {7},
	number       = {2},
	pages        = {1--7},
}

@inProceedings{landqvist-2021-"finlandssvenska"-304467,
	title        = {”finlandssvenska” + ”betydelsefulla” + ”översättare till svenska språket” = ?
Upplysningar och urval i Svenskt översättarlexikon},
	booktitle    = {Sektionsföredrag vid Svenskan i Finland 19, 6–7 maj 2021, Åbo Akademi i Vasa},
	author       = {Landqvist, Hans},
	year         = {2021},
}

@inProceedings{marjanen-etal-2021-topic-304736,
	title        = {Topic Modelling Discourse Dynamics in Historical Newspapers
},
	abstract     = {This paper addresses methodological issues in diachronic data analysis for historical research. We apply two families of topic models (LDA and DTM) on a relatively large set of historical newspapers, with the aim of capturing and understanding discourse dynamics. Our case study focuses on newspapers and periodicals published in Finland between 1854 and 1917, but our method can easily be transposed to any diachronic data. Our main contributions are a) a combined sampling, training and inference procedure for applying topic models to huge and imbalanced diachronic text collections; b) a discussion on the differences between two topic models for this type of data; c) quantifying topic prominence for a period and thus a generalization of document-wise topic assignment to a discourse level; and d) a discussion of the role of humanistic interpretation with regard to analysing discourse dynamics through topic models.
},
	booktitle    = {CEUR Workshop Proceedings. Post-Proceedings of the 5th Conference Digital Humanities in the Nordic Countries (DHN 2020), Riga, Latvia, October 21-23, 2020},
	author       = {Marjanen, Jani and Zosa, Elaine and Hengchen, Simon and Pivovarova, Lidia and Tolonen, Mikko},
	year         = {2021},
	publisher    = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik},
	address      = {Aachen },
}

@misc{romanello-hengchen-2021-detecting-304990,
	title        = {Detecting Text Reuse with Passim},
	abstract     = {In this lesson you will learn about text reuse detection – the automatic identification of reused passages in texts – and why you might want to use it in your research. Through a detailed installation guide and two case studies, this lesson will teach you the ropes of Passim, an open source and scalable tool for text reuse detection.},
	author       = {Romanello, Matteo and Hengchen, Simon},
	year         = {2021},
	volume       = {10},
}