@misc{ehret-etal-2023-measuring-326620, title = {Measuring Language Complexity: challenges and opportunities}, author = {Ehret, Katharina and Berdicevskis, Aleksandrs and Bentz, Christian and Blumenthal-Dramé, Alice}, year = {2023}, volume = {9}, number = {s1}, pages = {1--8}, } @article{cousse-etal-2023-anvands-332468, title = {Hur används de, dem och dom i nutida skriftspråk? En storskalig korpusundersökning av nyheter och sociala medier}, abstract = {This study ties in with a longstanding debate on the Swedish spelling variants de, dem and dom for personal pronouns (third person plural) and definite articles (plural). It charts the usage of de, dem and dom in five large corpora with news and social media texts over the past 25 years. The corpora contain more than 1.5 billion tokens, which rules out manual handling of the data. Instead, this study makes use of computational methods (including an AI language model) to automatically identify and classify relevant observations. Analysis of the news corpora shows a relatively stable usage of de, dem and dom over the past 25 years. The forms de and dem are predominantly used according to the norm: de for pronouns in subject position and as a definite article; dem for pronouns in object position. The colloquial form dom is hardly found in news texts. Analysis of the social media corpora shows more variation and change. The colloquial form dom is used in 5–25% of all instances instead of de or dem and has decreased after an initial rise. The forms de and dem are sometimes used in a non-standard way: de occurs in object position in 4–10% of the observations; dem is found in subject position or as a definite article in 1–7% of the cases. Non-standard dem is potentially on the rise with younger writers. The corpus analysis also provides details on the usage of de and dem in relative clauses, and on the users’ ratings of posts containing de, dem and dom on the social media platform Reddit}, journal = {Språk & Stil}, author = {Coussé, Evie and Adesam, Yvonne and Rekathati, Faton and Berdicevskis, Aleksandrs}, year = {2023}, volume = {NF 33}, pages = {39--70}, } @inProceedings{berdicevskis-2020-older-290636, title = {Older English Words Are More Polysemous}, booktitle = {The Evolution of Language: Proceedings of the 13th International Conference (EvoLang13). Pp. 14-21}, author = {Berdicevskis, Aleksandrs}, year = {2020}, publisher = {The Evolution of Language Conferences }, address = {Nijmegen }, pages = {14--21}, } @inProceedings{berdicevskis-etal-2020-subjects-297403, title = {Subjects tend to be coded only once: Corpus-based and grammar-based evidence for an efficiency-driven trade-off}, booktitle = {Proceedings of the 19th International Workshop on Treebanks and Linguistic Theories, TLT 2020, 27–28 October 2020, Düsseldorf, Germany}, author = {Berdicevskis, Aleksandrs and Schmidtke-Bode, Karsten and Seržant, Ilja}, year = {2020}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = { 978-1-952148-01-9}, pages = {79--92}, } @inProceedings{berdicevskis-piperski-2020-corpus-298524, title = {Corpus evidence for word order freezing in Russian and German}, booktitle = {Proceedings of the Fourth Workshop on Universal Dependencies (UDW 2020), December 13, 2020, Barcelona, Spain (Online) / Marie-Catherine de Marneffe, Miryam de Lhoneux, Joakim Nivre, Sebastian Schuster (Editors).}, author = {Berdicevskis, Aleksandrs and Piperski, Alexander}, year = {2020}, publisher = {Association for Computational Linguistics}, ISBN = { 978-1-952148-48-4}, pages = {26--33}, } @inProceedings{berdicevskis-2020-foreigner-297766, title = {Foreigner-directed speech is simpler than native-directed: Evidence from social media}, booktitle = {Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science, NLP+CSS 2020, November 20, 2020, Online. Pp. 163-172}, author = {Berdicevskis, Aleksandrs}, year = {2020}, publisher = {Association for Computational Linguistics}, ISBN = {978-1-952148-80-4}, pages = {163--172}, } @inProceedings{veeman-etal-2020-cross-297782, title = {Cross-lingual Embeddings Reveal Universal and Lineage-Specific Patterns in Grammatical Gender Assignment}, booktitle = {Proceedings of the 24th Conference on Computational Natural Language Learning, Online, November 19-20, 2020. Pp. 265-275}, author = {Veeman, Hartger and Allassonnière-Tang, Marc and Berdicevskis, Aleksandrs and Basirat, Ali}, year = {2020}, publisher = {Association for Computational Linguistics}, ISBN = {978-1-952148-63-7}, pages = {265--275}, } @inProceedings{berdicevskis-eckhoff-2020-diachronic-293349, title = {A Diachronic Treebank of Russian Spanning More Than a Thousand Years}, booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020), May 11-16, 2020, Marseille, France / ed. Nicoletta Calzolari (Conference chair). }, author = {Berdicevskis, Aleksandrs and Eckhoff, Hanne}, year = {2020}, publisher = {European Language Resources Association}, address = {Paris}, ISBN = {979-10-95546-34-4}, pages = {5251--5256}, } @inProceedings{adesam-berdicevskis-2021-part-304973, title = {Part-of-speech tagging of Swedish texts in the neural era}, booktitle = {Proceedings of the 23rd Nordic Conference on Computational Linguistics, NoDaLiDa, May 31–2 June, 2021, Reykjavik, Iceland (online) / eds Simon Dobnik and Lilja Øvrelid}, author = {Adesam, Yvonne and Berdicevskis, Aleksandrs}, year = {2021}, publisher = { Linköping University Electronic Press}, address = {Linköping}, ISBN = { 978-91-7929-614-8}, pages = {200--209}, } @inProceedings{bloomstrom-etal-2023-preparing-328710, title = {Preparing a corpus of spoken Xhosa}, booktitle = {Proceedings of the 2023 CLASP Conference on Learning with Small Data (LSD), Gothenburg and online 11–12 September 2023}, author = {Bloom Ström, Eva-Marie and Slater, Onelisa and Zahran, Aron and Berdicevskis, Aleksandrs and Schumacher, Anne}, year = {2023}, publisher = {Association for Computational Linguistics}, address = {Gothenburg, Sweden}, ISBN = {979-8-89176-000-4}, pages = {62--67}, } @inProceedings{volodina-etal-2023-dalaj-326817, title = {DaLAJ-GED – a dataset for Grammatical Error Detection tasks on Swedish}, booktitle = {Proceedings of the 12th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2023)}, editor = {David Alfter and Elena Volodina and Thomas François and Arne Jönsson and Evelina Rennes}, author = {Volodina, Elena and Ali Mohammed, Yousuf and Berdicevskis, Aleksandrs and Bouma, Gerlof and Öhman, Joey}, year = {2023}, publisher = { Linköping Electronic Conference Proceedings}, address = {Linköping }, ISBN = {978-91-8075-250-3}, pages = {94--101}, } @inProceedings{berdicevskis-etal-2023-superlim-331445, title = {Superlim: A Swedish Language Understanding Evaluation Benchmark}, booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, December 6-10, 2023, Singapore / Houda Bouamor, Juan Pino, Kalika Bali (Editors)}, author = {Berdicevskis, Aleksandrs and Bouma, Gerlof and Kurtz, Robin and Morger, Felix and Öhman, Joey and Adesam, Yvonne and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Isbister, Tim and Lindahl, Anna and Malmsten, Martin and Rekathati, Faton and Sahlgren, Magnus and Volodina, Elena and Börjeson, Love and Hengchen, Simon and Tahmasebi, Nina}, year = {2023}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = {979-8-89176-060-8}, pages = {8137--8153}, } @inProceedings{adesam-etal-2024-sprakforandring-337166, title = {Språkförändring på bar gärning. En mikrodiakron korpusstudie av pågående förändringar i stavning, lexikon och grammatik}, booktitle = {Svenskans beskrivning 38: Förhandlingar vid trettioåttonde sammankomsten. Örebro 4–6 maj 2022, Del I}, author = {Adesam, Yvonne and Berdicevskis, Aleksandrs and Coussé, Evie}, year = {2024}, publisher = {Örebro universitet}, address = {Örebro}, ISBN = {978-91-87789-89-2}, pages = {234--251}, } @inProceedings{berdicevskis-erbro-2023-tomato-326355, title = {You say tomato, I say the same: A large-scale study of linguistic accommodation in online communities}, booktitle = {Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)}, author = {Berdicevskis, Aleksandrs and Erbro, Viktor}, year = {2023}, publisher = {University of Tartu Library}, ISBN = {978-99-1621-999-7}, pages = {415--424}, } @incollection{berdicevskis-etal-2022-actually-320416, title = {We may actually all die tomorrow... nevertheless: Predicting short-term frequency changes in Swedish neologisms}, abstract = {Predicting the future is difficult, as Lars Borin likes to point out by saying the phrase which is included in the title of this paper. Nevertheless, we attempt to predict short-term changes in the frequency of new Swedish words based on some measures of their linguistic and social dissemination. We show that it is possible to predict the direction of change with a higher-than-baseline accuracy. Most interestingly, we show that predictions are much less accurate for those words that denote new phenomena than for those who are new signifiers for already existing phenomena.}, booktitle = {Live and learn: Festschrift in honor of Lars Borin / Editors: Elena Volodina, Dana Dannélls, Aleksandrs Berdicevskis, Markus Forsberg, Shafqat Virk}, author = {Berdicevskis, Aleksandrs and Adesam, Yvonne and Coussé, Evie}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, pages = {5--12}, } @article{berdicevskis-etal-2024-drop-326112, title = {To drop or not to drop? Predicting the omission of the infinitival marker in a Swedish future construction}, abstract = {We investigate the optional omission of the infinitival marker in a Swedish future tense construction. During the last two decades the frequency of omission has been rapidly increasing, and this process has received considerable attention in the literature. We test whether the knowledge which has been accumulated can yield accurate predictions of language variation and change. We extracted all occurrences of the construction from a very large collection of corpora. The dataset was automatically annotated with language-internal predictors which have previously been shown or hypothesized to affect the variation. We trained several models in order to make two kinds of predictions: whether the marker will be omitted in a specific utterance and how large the proportion of omissions will be for a given time period. For most of the approaches we tried, we were not able to achieve a better-than-baseline performance. The only exception was predicting the proportion of omissions using autoregressive integrated moving average models for one-step-ahead forecast, and in this case time was the only predictor that mattered. Our data suggest that most of the language-internal predictors do have some effect on the variation, but the effect is not strong enough to yield reliable predictions.}, journal = {Corpus Linguistics and Linguistic Theory}, author = {Berdicevskis, Aleksandrs and Coussé, Evie and Koplenig, Alexander and Adesam, Yvonne}, year = {2024}, volume = {20}, number = {1}, pages = {219–261}, } @misc{cousse-etal-2023-inget-324690, title = {Inget stöd i forskningen för att de/dem slås ut}, author = {Coussé, Evie and Adesam, Yvonne and Berdicevskis, Aleksandrs}, year = {2023}, number = {2023-03-20}, } @article{ehret-etal-2023-measuring-326113, title = {Measuring language complexity: challenges and opportunities}, journal = {Linguistics Vanguard}, author = {Ehret, Katharina and Berdicevskis, Aleksandrs and Bentz, Christian and Blumenthal-Dramé, Alice}, year = {2023}, volume = {9}, pages = {1--8}, } @article{roberts-etal-2020-chield-292421, title = {CHIELD: the causal hypotheses in evolutionary linguistics database}, journal = {Journal of Language Evolution}, author = {Roberts, Sean and Killin, Anton and Deb, Angarika and Sheard, Catherine and Greenhill, Simon and Sinnemäki, Kaius and Segovia-Martin, José and Nölle, Jonas and Berdicevskis, Aleksandrs and Humphreys-Balkwill, Archie and Little, Hannah and Opie, Cristopher and Jacques, Guillaume and Bromham, Lindell and Tinits, Peeter and Ross, Robert and Lee, Sean and Gasser, Emily and Calladine, Jasmine and Spike, Matthew and Mann, Stephen and Shcherbakova, Olena and Singer, Ruth and Zhang, Shuya and Benítez-Burraco, Antonio and Kliesch, Christian and Thomas-Colquhoun, Ewan and Skirgård, Hedvig and Tamariz, Monica and Passmore, Sam and Pellard, Thomas and Jordan, Fiona}, year = {2020}, volume = {5}, number = {2}, pages = {101–120}, } @edited_book{volodina-etal-2022-live-320415, title = {Live and Learn- Festschrift in honor of Lars Borin}, abstract = {This Festschrift has been compiled to honor Professor Lars Borin on his 65th anniversary. It consists of 30 articles which reflect a fraction of Lars’ scholarly interests within computational linguistics and related fields. They come from his friends and colleagues around the world and deal with topics that have been – in one way or another – inspired by his work. A common theme for the articles is the never-ending need to learn, which is alluded to in the title of the volume, Live and Learn.}, editor = {Volodina, Elena and Dannélls, Dana and Berdicevskis, Aleksandrs and Forsberg, Markus and Virk, Shafqat}, year = {2022}, publisher = {Institutionen för svenska, flerspråkighet och språkteknologi, Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-83-7}, } @techreport{adesam-etal-2020-swedishglue-299130, title = {SwedishGLUE – Towards a Swedish Test Set for Evaluating Natural Language Understanding Models}, author = {Adesam, Yvonne and Berdicevskis, Aleksandrs and Morger, Felix}, year = {2020}, publisher = {University of Gothenburg}, } @misc{berdicevskis-2020-pizzas-297688, title = {Pizzas and vermouth}, author = {Berdicevskis, Aleksandrs}, year = {2020}, publisher = {Faber & Faber}, ISBN = {9781783352203}, pages = {150--151}, } @incollection{berdicevskis-2020-kogda-296607, title = {Kogda morfologija bessil'na}, booktitle = {VAProsy jazykoznanija: megasbornik nanostatej}, author = {Berdicevskis, Aleksandrs}, year = {2020}, publisher = {Buki-Vedi}, address = {Moskva}, ISBN = {978-5-4465-2882-0}, pages = {56--60}, } @incollection{berdicevskis-semenuks-2020-different-296274, title = {Different trajectories of morphological overspecification and irregularity under imperfect language learning}, booktitle = {The Complexities of Morphology}, editor = {Peter Arkadiev and Francesco Gardani}, author = {Berdicevskis, Aleksandrs and Semenuks, Arturs}, year = {2020}, publisher = {Oxford University Press}, address = {Oxford}, ISBN = {9780198861287}, pages = {283--305}, } @inProceedings{boguslavsky-etal-2005-interactive-289178, title = {Interactive Resolution of Intrinsic and Translational Ambiguity in a Machine Translation System}, booktitle = {Lecture Notes in Computer Science (3406)}, author = {Boguslavsky, Igor and Iomdin, Leonid and Lazursky, Alexander and Mityushin, Leonid and Sizov, Viktor and Kreydlin, Leonid and Berdicevskis, Aleksandrs}, year = {2005}, publisher = {Springer}, ISBN = {978-3-540-30586-6}, } @inProceedings{boguslavskij-etal-2005-interaktivnoe-288992, title = {Interaktivnoe razreshenie neodnoznachnosti razlichnyx tipov v mashinnom perevode}, booktitle = {In Komp'juternaja lingvistika i intellektual'nye texnologii (Dialogue-2005). Trudy konferencii, 216–221.}, author = {Boguslavskij, Igor and Iomdin, Leonid and Lazurskij, Aleksandr and Mitjushin, Leonid and Berdicevskis, Aleksandrs}, year = {2005}, } @incollection{berdicevskis-2007-punktuacija-287992, title = {Punktuacija kak sredstvo razreshenija neodnoznachnosti}, booktitle = {Struktury i interpretacii: raboty molodyx issledovatelej po teoreticheskoj i prikladnoj lingvistike}, author = {Berdicevskis, Aleksandrs}, year = {2007}, ISBN = {978-5-211-05488-2}, pages = {11--32}, } @inProceedings{berdicevskis-2011-mail-287907, title = {E-mail vs. Chat: The influence of the communication channel on the language.}, booktitle = {Computational linguistics and intellectual technologies. Papers from the annual international conference "Dialogue","Dialogue", 10: 84-93}, author = {Berdicevskis, Aleksandrs}, year = {2011}, } @misc{berdicevskis-piperski-2011-doubts-287904, title = {Doubts About a Serial Founder-Effect Model of Language Expansion}, author = {Berdicevskis, Aleksandrs and Piperski, Aleksandr}, year = {2011}, } @inProceedings{berdicevskis-eckhoff-2014-verbal-287581, title = {Verbal constructional profiles: reliability, distinction power and practical applications.}, abstract = {In this paper we explore the notion of constructional profiles (the frequency distribution of a given linguistic item across syntactic environments) from two angles, methodological and applied. We concentrate on verbal constructional profiles, using Russian argument frame data in two different dependency formats. We first test the profiles’ stability and distinction power across sample sizes, and then use the profiles in two tasks concerning Russian aspect: to identify the aspectual partner of a given verb and to guess whether a given verb is perfective or imperfective.}, booktitle = {In Proceedings of the Thirteenth International Workshop on Treebanks and Linguistic Theories, 2–13}, author = {Berdicevskis, Aleksandrs and Eckhoff, Hanne}, year = {2014}, } @article{eckhoff-berdicevskis-2015-linguistics-287287, title = {Linguistics vs. digital editions: The Tromsø Old Russian and OCS Treebank}, abstract = {This article provides a description of the Tromsø Old Russian and OCS Treebank (TOROT), which, along with its parent treebank, the PROIEL corpus (built by members of the project Pragmatic Resources in Old Indo-European Languages), is the only existing treebank of Old Church Slavonic, Old East Slavic and Middle Russian texts. The TOROT is a part of a larger family of treebanks of ancient languages which all use the PROIEL open-source annotion web tool and annotation schemes. In this article we present principles and selected problems at several levels of analysis in the TOROT, and then briefly discuss ways that corpus linguists and edition philologists can fruitfully collaborate and complement each other.}, journal = {Scripta & e-Scripta}, author = {Eckhoff, Hanne and Berdicevskis, Aleksandrs}, year = {2015}, number = {14-15}, pages = {9--25}, } @inProceedings{berdicevskis-2015-estimating-287288, title = {Estimating Grammeme Redundancy by Measuring Their Importance for Syntactic Parser Performance.}, abstract = {Redundancy is an important psycholinguistic concept which is often used for explanations of language change, but is notoriously difficult to operationalize and measure. Assuming that the reconstruction of a syntactic structure by a parser can be used as a rough model of the understanding of a sentence by a human hearer, I propose a method for estimating redundancy. The key idea is to compare performances of a parser on a given treebank before and after artificially removing all information about a certain grammeme from the morphological annotation. The change in performance can be used as an estimate for the redundancy of the grammeme. I perform an experiment, applying MaltParser to an Old Church Slavonic treebank to estimate grammeme redundancy in Proto-Slavic. The results show that those Old Church Slavonic grammemes within the case, number and tense categories that were estimated as most redundant are those that disappeared in modern Russian. Moreover, redundancy estimates serve as a good predictor of case grammeme frequencies in modern Russian. The small sizes of the samples do not allow to make definitive conclusions for number and tense.}, booktitle = {Proceedings of the Sixth Workshop on Cognitive Aspects of Computational Language Learning, 65–73}, author = {Berdicevskis, Aleksandrs}, year = {2015}, publisher = {Association for Computational Linguistics}, } @inProceedings{berdicevskis-eckhoff-2015-automatic-287516, title = {Automatic identification of shared arguments in verbal coordinations.}, booktitle = {Computational linguistics and intellectual technologies. Papers from the annual international conference "Dialogue", 14: 33–43}, author = {Berdicevskis, Aleksandrs and Eckhoff, Hanne}, year = {2015}, } @inProceedings{bentz-berdicevskis-2016-learning-286983, title = {Learning pressures reduce morphological complexity: Linking corpus, computational and experimental evidence.}, abstract = {The morphological complexity of languages differs widely and changes over time. Pathways of change are often driven by the interplay of multiple competing factors, and are hard to disentangle. We here focus on a paradigmatic scenario of language change: the reduction of morphological complexity from Latin towards the Romance languages. To establish a causal explanation for this phenomenon, we employ three lines of evidence: 1) analyses of parallel corpora to measure the complexity of words in actual language production, 2) applications of NLP tools to further tease apart the contribution of inflectional morphology to word complexity, and 3) experimental data from artificial language learning, which illustrate the learning pressures at play when morphology simplifies. These three lines of evidence converge to show that pressures associated with imperfect language learning are good candidates to causally explain the reduction in morphological complexity in the Latin-to-Romance scenario. More generally, we argue that combining corpus, computational and experimental evidence is the way forward in historical linguistics and linguistic typology.}, booktitle = {Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC at COLING 2016): 222–232}, author = {Bentz, Christian and Berdicevskis, Aleksandrs}, year = {2016}, publisher = {Association for Computational Linguistics}, } @inProceedings{eckhoff-berdicevskis-2016-from-287184, title = {From diachronic treebank to dictionary resource: the Varangian Rus' project}, booktitle = {Proceedings of the EURALEX 2016 conference: 335–340}, author = {Eckhoff, Hanne and Berdicevskis, Aleksandrs}, year = {2016}, publisher = {Ivane Javakhishvili Tbilisi State University}, } @inProceedings{berdicevskis-eckhoff-2016-redundant-287286, title = {Redundant features are less likely to survive: empirical evidence from the Slavic languages}, abstract = {We test whether the functionality (non-redundancy) of morphological features can serve as a predictor of the survivability of those features in the course of language change. We apply a recently proposed method of measuring functionality of a feature by estimating its importance for the performance of an automatic parser to the Slavic language group. We find that the functionality of a Common Slavic grammeme, together with the functionality of its category, is a significant predictor of its survivability in modern Slavic languages. The least functional grammemes within the most functional categories are most likely to die out.}, booktitle = {The Evolution of Language: Proceedings of the 11th International Conference (EVOLANG11), 69–77}, author = {Berdicevskis, Aleksandrs and Eckhoff, Hanne}, year = {2016}, ISBN = {978-1-326-61450-8}, } @inProceedings{berdicevskis-etal-2018-using-286919, title = {Using Universal Dependencies in cross-linguistic complexity research}, abstract = {We evaluate corpus-based measures of linguistic complexity obtained using Universal Dependencies (UD) treebanks. We propose a method of estimating robustness of the complexity values obtained using a given measure and a given treebank. The results indicate that measures of syntactic complexity might be on average less robust than those of morphological complexity. We also estimate the validity of complexity measures by comparing the results for very similar languages and checking for unexpected differences. We show that some of those differences that arise can be diminished by using parallel treebanks and, more importantly from the practical point of view, by harmonizing the language-specific solutions in the UD annotation.}, booktitle = {Proceedings of the Second Workshop on Universal Dependencies (UDW 2018), 8–17}, author = {Berdicevskis, Aleksandrs and Çöltekin, Çağrı and Ehret, Katharina and von Prince, Kilu and Ross, Daniel and Thompson, Bill and Yan, Chunxiao and Demberg, Vera and Lupyan, Gary and Rama, Taraka and Bentz, Christian}, year = {2018}, publisher = {Association for Computational Linguistics}, } @misc{berdicevskis-bentz-2018-proceedings-286979, title = {Proceedings of the First Shared Task on Measuring Language Complexity}, author = {Berdicevskis, Aleksandrs and Bentz, Christian}, year = {2018}, ISBN = { 978-91-639-7435-9}, } @incollection{berdicevskis-zvereva-2014-slangs-287584, title = {Slangs Go Online, or The Rise and Fall of the Olbanian language.}, booktitle = {Digital Russia: The Language, Culture and Politics of New Media Communication}, author = {Berdicevskis, Aleksandrs and Zvereva, Vera}, year = {2014}, publisher = {Routledge}, ISBN = {978-0415707046}, pages = {122--140}, } @incollection{berdicevskis-2014-written-287583, title = {The written turn: how computer-mediated communication actuates linguistic change in Russian.}, booktitle = {Digital Russia: The Language, Culture, and Politics of New Media Communication}, author = {Berdicevskis, Aleksandrs}, year = {2014}, publisher = {Routledge}, ISBN = {978-0415707046}, pages = {107–122}, } @article{berdicevskis-2012-jazykovaja-287862, title = {Jazykovaja slozhnost’}, journal = {Voprosy jazykoznanija - Journal of the Institute of Linguistics of the Russian Academy of Sciences}, author = {Berdicevskis, Aleksandrs}, year = {2012}, number = {5}, pages = {101--124}, } @incollection{berdicevskis-2014-chto-287749, title = {"Ty chto-to pechatala, ja tebja perebil": igra so strukturoj dialoga v kvazisinxronnoj kommunikacii}, booktitle = {Sovremennyj russkij jazyk v internete}, author = {Berdicevskis, Aleksandrs}, year = {2014}, publisher = {Jazyki slavjanskoj kul'tury}, ISBN = { 978-5-9551-0722-6}, pages = {61--82}, } @incollection{berdicevskis-2014-predictors-287748, title = {Predictors of pluricentricity: lexical divergences between Latvian Russian and Russian Russian.}, booktitle = {The Russian Language Outside the Nation}, author = {Berdicevskis, Aleksandrs}, year = {2014}, publisher = {Edinburgh University Press}, ISBN = {9780748668458}, pages = {225–246}, } @inProceedings{berdicevskis-2021-successes-311655, title = {Successes and failures of Menzerath’s law at the syntactic level}, booktitle = {Proceedings of the Second Workshop on Quantitative Syntax (Quasy, SyntaxFest 2021), 21–25 March, 2022, Sofia, Bulgaria / Radek Čech, Xinying Chen (eds.)}, author = {Berdicevskis, Aleksandrs}, year = {2021}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA}, ISBN = { 978-1-955917-15-5}, pages = {17--33}, } @article{basirat-etal-2021-empirical-302492, title = {An empirical study on the contribution of formal and semantic features to the grammatical gender of nouns}, abstract = {This study conducts an experimental evaluation of two hypotheses about the contributions of formal and semantic features to the grammatical gender assignment of nouns. One of the hypotheses (Corbett and Fraser 2000) claims that semantic features dominate formal ones. The other hypothesis, formulated within the optimal gender assignment theory (Rice 2006), states that form and semantics contribute equally. Both hypotheses claim that the combination of formal and semantic features yields the most accurate gender identification. In this paper, we operationalize and test these hypotheses by trying to predict grammatical gender using only character-based embeddings (that capture only formal features), only context-based embeddings (that capture only semantic features) and the combination of both. We performed the experiment using data from three languages with different gender systems (French, German and Russian). Formal features are a significantly better predictor of gender than semantic ones, and the difference in prediction accuracy is very large. Overall, formal features are also significantly better than the combination of form and semantics, but the difference is very small and the results for this comparison are not entirely consistent across languages.}, journal = {Linguistics Vanguard}, author = {Basirat, Ali and Allassonnière-Tang, Marc and Berdicevskis, Aleksandrs}, year = {2021}, volume = {7}, number = {1}, } @article{berdicevskis-semenuks-2022-imperfect-313148, title = {Imperfect language learning reduces morphological overspecification: Experimental evidence}, journal = {PLoS ONE}, author = {Berdicevskis, Aleksandrs and Semenuks, Arturs}, year = {2022}, volume = {17}, number = {1}, pages = {1--26}, } @article{ehret-etal-2021-meaning-304914, title = {Meaning and Measures: Interpreting and Evaluating Complexity Metrics}, journal = {Frontiers in communication}, author = {Ehret, Katharina and Blumenthal-Dramé, Alice and Bentz, Christian and Berdicevskis, Aleksandrs}, year = {2021}, volume = {6}, } @edited_book{berdicevskis-piperski-2021-skljanki-311612, title = {Tri skljanki popoludni i drugie zadachi po lingvistike}, editor = {Berdicevskis, Aleksandrs and Piperski, Alexander}, year = {2021}, publisher = {Alpina Non-Fiction}, address = {Moskva}, ISBN = {978-5-00139-130-2}, } @inProceedings{derzhanski-etal-2004-perevodimosti-289179, title = {O perevodimosti lingvisticheskih zadach: Uroki Pervoj mezhdunarodnoj lingvisticheskoj olimpiady}, booktitle = {Komp'juternaja lingvistika i intellektual'nye texnologii (Dialogue-2004). Trudy konferencii, 166–171.}, author = {Derzhanski, Ivan and Berdicevskis, Aleksandrs and Gilyarova, Kseniya and Iomdin, Boris and Rubinshtein, Maria}, year = {2004}, } @incollection{lazurskij-etal-2005-interaktivnoe-288980, title = {Interaktivnoe razreshenie leksicheskoj i sintaksicheskoj neodnoznachnosti v sistemah avtomaticheskoj obrabotki estestvennogo jazyka}, booktitle = {Internet-matematika 2005: sistemy obrabotki veb-dannyh}, author = {Lazurskij, Aleksandr and Berdicevskis, Aleksandrs and Krejdlin, Leonid and Mitjushin, Leonid and Sizov, Viktor}, year = {2005}, pages = {44--66}, } @inProceedings{berdicevskis-iomdin-2007-rol'-287989, title = {Rol' punktuacii v razreshenii neodnoznachnosti}, booktitle = {Komp'juternaja lingvistika i intellektual'nye texnologii (Dialogue-2007). Trudy konferencii, 44–49}, author = {Berdicevskis, Aleksandrs and Iomdin, Boris}, year = {2007}, } @inProceedings{iomdin-berdicevskis-2006-etot-287988, title = {A kto ètot ètot? Imena sobstvennye i opredelennaja neopredelennost’}, booktitle = {Komp'juternaja lingvistika i intellektual'nye texnologii (Dialogue- 2006). Trudy konferencii, 196–201.}, author = {Iomdin, Boris and Berdicevskis, Aleksandrs}, year = {2006}, } @inProceedings{iomdin-berdicevskis-2007-combinations-287986, title = {Combinations of Probability Qualifiers in Russian}, booktitle = {Meaning - Text theory 2007: proceedings of the 3rd International Conference on Meaning - Text Theory, 189-198}, author = {Iomdin, Boris and Berdicevskis, Aleksandrs}, year = {2007}, } @inProceedings{berdicevskis-2012-introducing-287867, title = {Introducing pressure for expressivity into language evolution experiments}, booktitle = {The Evolution of Language: Proceedings of the 9th International Conference on the Evolution of Language, 64-71}, author = {Berdicevskis, Aleksandrs}, year = {2012}, publisher = {World Scientific}, address = {Singapore}, ISBN = {9781299742437}, } @inProceedings{eckhoff-berdicevskis-2016-automatic-287182, title = {Automatic parsing as an efficient pre-annotation tool for historical texts}, abstract = {Historical treebanks tend to be manually annotated, which is not surprising, since state-of-the-art parsers are not accurate enough to ensure high-quality annotation for historical texts. We test whether automatic parsing can be an efficient pre-annotation tool for Old East Slavic texts. We use the TOROT treebank from the PROIEL treebank family. We convert the PROIEL format to the CONLL format and use MaltParser to create syntactic pre-annotation. Using the most conservative evaluation method, which takes into account PROIEL-specific features, MaltParser by itself yields 0.845 unlabelled attachment score, 0.779 labelled attachment score and 0.741 secondary dependency accuracy (note, though, that the test set comes from a relatively simple genre and contains rather short sentences). Experiments with human annotators show that preparsing, if limited to sentences where no changes to word or sentence boundaries are required, increases their annotation rate. For experienced annotators, the speed gain varies from 5.80% to 16.57%, for inexperienced annotators from 14.61% to 32.17% (using conservative estimates). There are no strong reliable differences in the annotation accuracy, which means that there is no reason to suspect that using preparsing might lower the final annotation quality.}, booktitle = {Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (LT4DH at COLING 2016): 62–70.}, author = {Eckhoff, Hanne and Berdicevskis, Aleksandrs}, year = {2016}, publisher = {Association for Computational Linguistics}, } @inProceedings{berdicevskis-etal-2016-beginning-287185, title = {The beginning of a beautiful friendship: rule-based and statistical analysis of Middle Russian}, booktitle = {Computational linguistics and intellectual technologies. Papers from the annual international conference "Dialogue", 15: 99–111}, author = {Berdicevskis, Aleksandrs and Eckhoff, Hanne and Gavrilova, Tatjana}, year = {2016}, publisher = {Russian State University for the Humanities}, } @incollection{berdicevskis-2012-<<orfograficheskij>>-287865, title = {«Orfograficheskij» srednij rod: grammaticheskaja innovacija v jazyke russkogo Interneta}, booktitle = {Variativnost’ v jazyke i kommunikacii}, author = {Berdicevskis, Aleksandrs}, year = {2012}, ISBN = {9785728113430}, pages = {51–72}, } @article{berdicevskis-2017-baerman-286981, title = {M. Baerman, D. Brown, G. G. Corbett (eds.). Understanding and measuring morphological complexity.}, journal = {Voprosy jazykoznanija - Journal of the Institute of Linguistics of the Russian Academy of Sciences}, author = {Berdicevskis, Aleksandrs}, year = {2017}, number = {5}, pages = {123--135}, } @book{berdicevskis-2013-language-287751, title = {Language Change Online: Linguistic Innovations in Russian Induced by Computer-Mediated Communication.}, author = {Berdicevskis, Aleksandrs}, year = {2013}, ISBN = {978-82-308-2334-7}, }