Språkbanken Text is a department within Språkbanken.
BibTeX

@incollection{berdicevskis-etal-2022-actually-320416,
	title        = {We may actually all die tomorrow... nevertheless: Predicting short-term frequency changes in Swedish
neologisms},
	abstract     = {Predicting the future is difficult, as Lars Borin likes to point out by saying the phrase which is included in the title of this paper. Nevertheless, we attempt to predict short-term changes in the frequency of new Swedish words based on some measures of their linguistic and social dissemination. We show that it is possible to predict the direction of change with a higher-than-baseline accuracy. Most  interestingly, we show that predictions are much less accurate for those words that denote new phenomena than for those who are new signifiers for already existing phenomena.},
	booktitle    = {Live and learn: Festschrift in honor of Lars Borin / Editors: Elena Volodina, Dana Dannélls, Aleksandrs Berdicevskis, Markus Forsberg, Shafqat Virk},
	author       = {Berdicevskis, Aleksandrs and Adesam, Yvonne and Coussé, Evie},
	year         = {2022},
	publisher    = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet},
	address      = {Göteborg},
	ISBN         = {978-91-87850-83-7},
	pages        = {5--12},
}

@misc{cousse-etal-2023-inget-324690,
	title        = {Inget stöd i forskningen för att de/dem slås ut},
	author       = {Coussé, Evie and Adesam, Yvonne and Berdicevskis, Aleksandrs},
	year         = {2023},
	number       = {2023-03-20},
}

@incollection{tahmasebi-dubossarsky-2023-computational-325543,
	title        = {Computational modeling of semantic change},
	abstract     = {In this chapter we provide an overview of computational modeling for semantic change using large and semi-large textual corpora. We aim to provide a key for the interpretation of relevant methods and evaluation techniques, and also provide insights into important aspects of the computational study of semantic change. We discuss the pros and cons of different classes of models with respect to the properties of the data from which one wishes to model semantic change, and which avenues are available to evaluate the results. This chapter is forthcoming as the book has not yet been published. },
	booktitle    = {Routledge Handbook of Historical Linguistics, 2nd edition},
	author       = {Tahmasebi, Nina and Dubossarsky, Haim},
	year         = {2023},
	publisher    = {Routledge},
}

@article{berdicevskis-etal-2024-drop-326112,
	title        = {To drop or not to drop? Predicting the omission of the infinitival marker in a Swedish future construction},
	abstract     = {We investigate the optional omission of the infinitival marker in a Swedish future tense construction. During the last two decades the frequency of omission has been rapidly increasing, and this process has received considerable attention in the literature. We test whether the knowledge which has been accumulated can yield accurate predictions of language variation and change. We extracted all occurrences of the construction from a very large collection of corpora. The dataset was automatically annotated with language-internal predictors which have previously been shown or hypothesized to affect the variation. We trained several models in order to make two kinds of predictions: whether the marker will be omitted in a specific utterance and how large the proportion of omissions will be for a given time period. For most of the approaches we tried, we were not able to achieve a better-than-baseline performance. The only exception was predicting the proportion of omissions using autoregressive integrated moving average models for one-step-ahead forecast, and in this case time was the only predictor that mattered. Our data suggest that most of the language-internal predictors do have some effect on the variation, but the effect is not strong enough to yield reliable predictions.},
	journal      = {Corpus Linguistics and Linguistic Theory},
	author       = {Berdicevskis, Aleksandrs and Coussé, Evie and Koplenig, Alexander and Adesam, Yvonne},
	year         = {2024},
	volume       = {20},
	number       = {1},
	pages        = {219–261},
}

@article{ehret-etal-2023-measuring-326113,
	title        = {Measuring language complexity: challenges and opportunities},
	journal      = {Linguistics Vanguard},
	author       = {Ehret, Katharina and Berdicevskis, Aleksandrs and Bentz, Christian and Blumenthal-Dramé, Alice},
	year         = {2023},
	volume       = {9},
	pages        = {1--8},
}

@inProceedings{berdicevskis-erbro-2023-tomato-326355,
	title        = {You say tomato, I say the same: A large-scale study of linguistic accommodation in online communities},
	booktitle    = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)},
	author       = {Berdicevskis, Aleksandrs and Erbro, Viktor},
	year         = {2023},
	publisher    = {University of Tartu Library},
	ISBN         = {978-99-1621-999-7},
	pages        = {415--424},
}

@article{cousse-etal-2023-anvands-332468,
	title        = {Hur används de, dem och dom i nutida skriftspråk? En storskalig korpusundersökning av nyheter och sociala medier},
	abstract     = {This study ties in with a longstanding debate on the Swedish spelling variants de, dem and dom for personal pronouns (third person plural) and definite articles (plural). It charts the usage of de, dem and dom in five large corpora with news and social media texts over the past 25 years. The corpora contain more than 1.5 billion tokens, which rules out manual handling of the data. Instead,  this  study  makes  use  of  computational  methods  (including  an  AI  language  model)  to  automatically identify and classify relevant observations. Analysis of the news corpora shows a relatively stable usage of de, dem and dom over the past 25 years. The forms de and dem are predominantly used according to the norm: de for pronouns in subject position and as a definite article; dem for pronouns in object position. The colloquial form dom is hardly found in news texts.  Analysis  of  the  social  media  corpora  shows  more  variation  and  change.  The  colloquial  form dom is used in 5–25% of all instances instead of de  or  dem  and  has  decreased  after  an  initial rise. The forms de and dem are sometimes used in a non-standard way: de occurs in object position in 4–10% of the observations; dem is found in subject position or as a definite article in 1–7% of the cases. Non-standard dem is potentially on the rise with younger writers. The corpus analysis also provides details on the usage of de and dem in relative clauses, and on the users’ ratings of posts containing de, dem and dom on the social media platform Reddit},
	journal      = {Språk & Stil},
	author       = {Coussé, Evie and Adesam, Yvonne and Rekathati, Faton and Berdicevskis, Aleksandrs},
	year         = {2023},
	volume       = {NF 33},
	pages        = {39--70},
}

@inProceedings{adesam-etal-2024-sprakforandring-337166,
	title        = {Språkförändring på bar gärning. En mikrodiakron korpusstudie av pågående förändringar i stavning, lexikon och grammatik},
	booktitle    = {Svenskans beskrivning 38: Förhandlingar vid trettioåttonde sammankomsten. Örebro 4–6 maj 2022, Del I},
	author       = {Adesam, Yvonne and Berdicevskis, Aleksandrs and Coussé, Evie},
	year         = {2024},
	publisher    = {Örebro universitet},
	address      = {Örebro},
	ISBN         = {978-91-87789-89-2},
	pages        = {234--251},
}
Page manager: sb-webb