Skip to main content
Språkbanken Text is a part of Språkbanken.

BibTeX

@inProceedings{periti-etal-2024-automatically-343018,
	title        = {Automatically Generated Definitions and their utility for Modeling Word Meaning},
	abstract     = {Modeling lexical semantics is a challenging task, often suffering from interpretability pitfalls. In this paper, we delve into the generation of dictionary-like sense definitions and explore their utility for modeling word meaning. We fine-tuned two Llama models and include an existing T5-based model in our evaluation. Firstly, we evaluate the quality of the generated definitions on existing English benchmarks, setting new state-of-the-art results for the Definition Generation task. Next, we explore the use of definitions generated by our models as intermediate representations subsequently encoded as sentence embeddings. We evaluate this approach on lexical semantics tasks such as the Word-in-Context, Word Sense Induction, and Lexical Semantic Change, setting new state-of-the-art results in all three tasks when compared to unsupervised baselines.},
	booktitle    = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, November 12-16, 2024, Miami, Florida, USA / Yaser Al-Onaizan, Mohit Bansal, Yun-Nung Chen (eds.)},
	author       = {Periti, Francesco and Alfter, David and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {Miami, Florida, USA},
	ISBN         = {979-8-89176-164-3},
	pages        = {14008----14026},
}

@inProceedings{alfter-2024-graded-344267,
	title        = {Out-of-the-Box Graded Vocabulary Lists with Generative Language Models: Fact or Fiction?},
	booktitle    = {Proceedings of the 13th Workshop on Natural Language Processing for Computer Assisted Language Learning / Thomas Gaillat, Cyriel Mallart, Fabienne Moreau, Jen-Yu Li, Griselda Drouet, David Alfter, Elena Volodina and Arne Jönsson (eds.) },
	author       = {Alfter, David},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-774-4},
	pages        = {1--19},
}

@inProceedings{schlechtweg-etal-2024-more-343019,
	title        = {More DWUGs: Extending and Evaluating Word Usage Graph Datasets in Multiple Languages},
	abstract     = {Word Usage Graphs (WUGs) represent human semantic proximity judgments for pairs of word uses in a weighted graph, which can be clustered to infer word sense clusters from simple pairwise word use judgments, avoiding the need for word sense definitions. SemEval-2020 Task 1 provided the first and to date largest manually annotated, diachronic WUG dataset. In this paper, we check the robustness and correctness of the annotations by continuing the SemEval annotation algorithm for two more rounds and comparing against an established annotation paradigm. Further, we test the reproducibility by resampling a new, smaller set of word uses from the SemEval source corpora and annotating them. Our work contributes to a better understanding of the problems and opportunities of the WUG annotation paradigm and points to future improvements.},
	booktitle    = { Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, November 12-16, 2024, Miami, Florida, USA},
	author       = {Schlechtweg, Dominik and Cassotti, Pierluigi and Noble, Bill and Alfter, David and Schulte Im Walde, Sabine and Tahmasebi, Nina},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {Miami, Florida, USA},
	ISBN         = {979-8-89176-164-3},
	pages        = {14379–14393},
}

@inProceedings{volodina-etal-2024-profiles-345602,
	title        = {Profiles for Swedish as a Second Language: Lexis, Grammar, Morphology},
	abstract     = {This article gives a short introduction to the Swedish Second Language Profile, a tool that visualizes language in Swedish learner corpora from different angles, such as vocabulary, grammar and morphology. The tool is aimed at research on Second Language Acquisition, development of NLP models, teaching of Swedish as a second language, automatic approaches for second language teaching and learning, and at a number of other fields.},
	booktitle    = {Proceedings of the Huminfra Conference (HiC 2024) 10-11 January, 2024, Gothenburg, Sweden / Editors: Elena Volodina, Gerlof Bouma, Markus Forsberg, Dimitrios Kokkinakis, David Alfter, Mats Fridlund, Christian Horn, Lars Ahrenberg, Anna Blåder},
	author       = {Volodina, Elena and Alfter, David and Lindström Tiedemann, Therese},
	year         = {2024},
	publisher    = {Linköping University Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-512-2},
	pages        = {10--19},
}

@misc{gaillat-etal-2024-proceedings-345595,
	title        = {Proceedings of the 13th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2024)},
	abstract     = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures.},
	author       = {Gaillat, Thomas and Mallart, Cyriel and Moreau, Fabienne and Li, Jen-Yu and Drouet, Griselda and Alfter, David and Volodina, Elena and Jönsson, Arne},
	year         = {2024},
	publisher    = {Linköping University Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-774-4},
}

@incollection{tiedemann-etal-2024-multiword-343530,
	title        = {Multiword expressions in Swedish as a second language: Taxonomy, annotation, and initial results},
	abstract     = {This chapter introduces part of the Swedish L2 profiles, a new resource for Swedish as a second language. Multiword expressions (MWEs) in this resource are based on knowledge-based automatic annotation of MWEs, which we show works quite well for Swedish. In contrast, manual annotation of the compositionality of each MWE proved difficult, probably due to different interpretations of "compositionality" by the two annotators. We show that experts and non-experts can rank MWEs very similarly according to relative receptive difficulty, with particularly high agreement for the easiest items. A qualitative comparison of the proficiency levels associated with the MWEs based on coursebook occurrences and the results from crowdsourcing and direct ranking indicate that MWEs which appear in few books of the same level are more likely to be difficult to associate with an appropriate level based on coursebook corpus data. Furthermore, results show that compositionality and/or transparency might influence the relative ranking. Finally, there is a clear increase in MWE lemmas at higher proficiency levels at the group level, and at the highest level receptive and productive data include the same percentage of MWEs.},
	booktitle    = {Multiword Expressions in Lexical Resources: Linguistic, Lexicographic, and Computational Perspectives},
	editor       = {Voula Giouli and Verginica Barbu Mititelu},
	author       = {Tiedemann, Therese Lindström and Alfter, David and Ali Mohammed, Yousuf and Piipponen, Daniela and Silén, Beatrice and Volodina, Elena},
	year         = {2024},
	publisher    = {Language Science Press},
	address      = {Berlin},
	ISBN         = {978-3-98554-099-0},
	pages        = {309--348},
}

@inProceedings{alfter-2024-complexity-341312,
	title        = {Complexity and Indecision: A Proof-of-Concept Exploration of Lexical Complexity and Lexical Semantic Change},
	booktitle    = {Proceedings of the 5th Workshop on Computational Approaches to Historical Language Change, August 15, 2024, Bangkok, Thailand},
	author       = {Alfter, David},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {979-8-89176-138-4},
}

@inProceedings{munozsanchez-etal-2024-jingle-342259,
	title        = { Jingle BERT, Jingle BERT, Frozen All the Way: Freezing Layers to Identify CEFR Levels of Second Language Learners Using BERT},
	abstract     = {In this paper, we investigate the question of how much domain adaptation is needed for the task of automatic essay assessment by freezing layers in BERT models. We test our methodology on three different graded language corpora (English, French and Swedish) and find that partially fine-tuning base models improves performance over fully fine-tuning base models, although the number of layers to freeze differs by language. We also look at the effect of freezing layers on different grades in the corpora and find that different layers are important for different grade levels. Finally, our results represent a new state-of-the-art in automatic essay classification for the three languages under investigation.},
	booktitle    = {Proceedings of the 13th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2024) },
	author       = {Muñoz Sánchez, Ricardo and Alfter, David and Dobnik, Simon and Szawerna, Maria Irena and Volodina, Elena},
	year         = {2024},
	publisher    = {Linköping Electronic Conference Proceedings},
	ISBN         = {978-91-8075-774-4},
}

@misc{volodina-etal-2024-proceedings-336386,
	title        = {Proceedings of the Workshop on Computational Approaches to Language Data Pseudonymization (CALD-pseudo 2024), March 21, 2024,  Malta},
	author       = {Volodina, Elena and Alfter, David and Dobnik, Simon and Lindström Tiedemann, Therese and Muñoz Sánchez, Ricardo and Szawerna, Maria Irena and Vu, Xuan-Son},
	year         = {2024},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {979-8-89176-085-1},
}

@misc{volodina-etal-2024-proceedings-335190,
	title        = {Proceedings of the Huminfra Conference (HiC 2024), 10-11 January, 2024, Gothenburg, Sweden},
	author       = {Volodina, Elena and Bouma, Gerlof and Forsberg, Markus and Kokkinakis, Dimitrios and Alfter, David and Fridlund, Mats and Horn, Christian and Ahrenberg, Lars and Blåder, Anna},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-512-2},
}

@inProceedings{fridlund-etal-2024-humanistic-335724,
	title        = {Humanistic AI: Towards a new field of interdisciplinary expertise and research},
	abstract     = {The Gothenburg Research Infrastructure in Digital Humanities (GRIDH) have participated in projects within various humanities fields that utilise as well as develop research tools and infrastructural resources that incorporate applications of ‘artificial intelligence’ (AI). These applications can include natural language processing, machine learning, computer vision, large language models, image recognition algorithms, classification, clustering, and deep learning. This paper advances the term ‘humanistic AI’ to describe an emergent form of interdisciplinary practice that uses and develops AI-based research applications to answer humanities research questions together with its entangled humanistic reflection. We coin this term to make implicit and visible the epistemological and material particularities of its practice and the new forms of knowledge its affordances make possible. The paper presents GRIDH projects within ‘humanistic AI’ together with its developed AI resources and applications.},
	booktitle    = {Proceedings of the Huminfra Conference (HiC 2024), 10-11 January, 2024, Gothenburg, Sweden},
	author       = {Fridlund, Mats and Alfter, David and Brodén, Daniel and Green, Ashely and Karimi, Aram and Lindhé, Cecilia},
	year         = {2024},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-8075-512-2},
}