Skip to main content

BibTeX

@inProceedings{berdicevskis-2020-older-290636,
	title        = {Older English Words Are More Polysemous},
	booktitle    = {The Evolution of Language: Proceedings of the 13th International Conference (EvoLang13). Pp. 14-21},
	author       = {Berdicevskis, Aleksandrs},
	year         = {2020},
	publisher    = {The Evolution of Language Conferences },
	address      = {Nijmegen },
}

@inProceedings{berdicevskis-eckhoff-2020-diachronic-293349,
	title        = {A Diachronic Treebank of Russian Spanning More Than a Thousand Years},
	booktitle    = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020), May 11-16, 2020, Marseille, France / ed. Nicoletta Calzolari (Conference chair). },
	author       = {Berdicevskis, Aleksandrs and Eckhoff, Hanne},
	year         = {2020},
	publisher    = {European Language Resources Association},
	address      = {Paris},
	ISBN         = {979-10-95546-34-4},
}

@inProceedings{dannells-simon-2020-supervised-289944,
	title        = {Supervised OCR Post-Correction of Historical Swedish Texts: What Role Does the OCR System Play?},
	abstract     = {Current approaches for post-correction of OCR errors offer solutions that are tailored to a specific OCR system. This can be problematic if the post-correction method was trained on a specific OCR
system but have to be applied on the result of another system. Whereas OCR post-correction of historical text has received much attention lately, the question of what role does the OCR system play for the post-correction method has not been addressed. In this study we explore a dataset of
400 documents of historical Swedish text which has been OCR processed by three state-of-the-art OCR systems: Abbyy Finereader, Tesseract and Ocropus. We examine the OCR results of each system and present a supervised machine learning post-correction method that tries to approach
the challenges exhibited by each system. We study the performance of our method by using three evaluation tools: PrimA, Språkbanken evaluation tool and Frontiers Toolkit. Based on the evaluation analysis we discuss the impact each of the OCR systems has on the results of the post-
correction method. We report on quantitative and qualitative results showing varying degrees of OCR post-processing complexity that are important to consider when developing an OCR post-correction method.},
	booktitle    = {Proceedings of the Digital Humanities in the Nordic Countries, 5th Conference, Riga, Latvia, October 21-23, 2020},
	editor       = {Sanita Reinsone and Inguna Skadiņa and Anda Baklāne and Jānis Daugavietis},
	author       = {Dannélls, Dana and Simon, Persson},
	year         = {2020},
	publisher    = {CEUR-WS},
}

@inProceedings{johansson-adesam-2020-training-293365,
	title        = {Training a Swedish Constituency Parser on Six Incompatible Treebanks},
	abstract     = {We  investigate  a  transition-based  parser  that  usesEukalyptus,  a  function-tagged  constituent  treebank  for  Swedish  which  includesdiscontinuous  constituents.   In  addition,  we  show  that  the  accuracy  of  this  parser  can  be  improved  by  using  a  multitask  learning architecture that makes it possible to train the parser on additional treebanks that use other annotation models.},
	booktitle    = {Proceedings of the 12th International Conference on Language Resources and Evaluation (LREC 2020)},
	author       = {Johansson, Richard and Adesam, Yvonne},
	year         = {2020},
	publisher    = {European Language Resources Association (ELRA)},
}

@inProceedings{lange-ljunglof-2020-learning-291243,
	title        = {Learning Domain-specific Grammars from a Small Number of Examples},
	abstract     = {In this paper we investigate the problem of grammar inference from a different perspective. The common approach is to try to infer a grammar directly from example sentences, which either requires a large training set or suffers from bad accuracy. We instead view it as a problem of grammar restriction or sub-grammar extraction. We start from a large-scale resource grammar and a small number of examples, and find a sub-grammar that still covers all the examples. To do this we formulate the problem as a constraint satisfaction problem, and use an existing constraint solver to find the optimal grammar. We have made experiments with English, Finnish, German, Swedish and Spanish, which show that 10–20 examples are often sufficient to learn an interesting domain grammar. Possible applications include computer-assisted language learning, domain-specific dialogue systems, computer games, Q/A-systems, and others.},
	booktitle    = {12th International Conference on Agents and Artificial Intelligence - Volume 1: NLPinAI},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2020},
	publisher    = {SciTePress},
	ISBN         = {978-989-758-395-7},
}

@article{roberts-etal-2020-chield-292421,
	title        = {CHIELD: the causal hypotheses in evolutionary linguistics database},
	journal      = {Journal of Language Evolution},
	author       = {Roberts, Sean and Killin, Anton and Deb, Angarika and Sheard, Catherine and Greenhill, Simon and Sinnemäki, Kaius and Segovia-Martin, José and Nölle, Jonas and Berdicevskis, Aleksandrs and Humphreys-Balkwill, Archie and Little, Hannah and Opie, Cristopher and Jacques, Guillaume and Bromham, Lindell and Tinits, Peeter and Ross, Robert and Lee, Sean and Gasser, Emily and Calladine, Jasmine and Spike, Matthew and Mann, Stephen and Shcherbakova, Olena and Singer, Ruth and Zhang, Shuya and Benítez-Burraco, Antonio and Kliesch, Christian and Thomas-Colquhoun, Ewan and Skirgård, Hedvig and Tamariz, Monica and Passmore, Sam and Pellard, Thomas and Jordan, Fiona},
	year         = {2020},
	volume       = {5},
	number       = {2},
	pages        = {101–120},
}

@inProceedings{rouces-etal-2020-creating-290695,
	title        = {Creating an Annotated Corpus for Aspect-Based Sentiment Analysis in Swedish},
	abstract     = {Aspect-Based Sentiment Analysis constitutes a more fine-grained alternative to traditional sentiment analysis at sentence level. In addition to a sentiment value denoting how positive or negative a particular opinion or sentiment expression is, it identifies additional aspects or 'slots' that characterize the opinion. Some typical aspects are target and source, i.e. who holds the opinion and about which entity or aspect is the opinion. We present a large Swedish corpus annotated for Aspect-Based Sentiment Analysis. Each sentiment expression is annotated as a tuple that contains the following fields: one among 5 possible sentiment values, the target, the source, and whether the sentiment expressed is ironic.  In addition, the linguistic element that conveys the sentiment is identified too. Sentiment for a particular topic is also annotated at title, paragraph and document level.
The documents are articles obtained from two Swedish media (Svenska Dagbladet and Aftonbladet) and one online forum (Flashback), totalling around 4000 documents. The corpus is freely available and we plan to use it for training and testing an Aspect-Based Sentiment Analysis system.},
	booktitle    = {Proceedings of the 5th conference in Digital Humanities in the Nordic Countries, Riga, Latvia, October 21-23, 2020.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {CEUR Workshop Proceedings},
}

@inProceedings{waldispuhl-etal-2020-material-293332,
	title        = {Material Philology Meets Digital Onomastic Lexicography: The NordiCon Database of Medieval Nordic Personal Names in Continental Sources},
	abstract     = {We present NordiCon, a database containing medieval Nordic personal names attested in Continental sources. The database combines formally interpreted and richly interlinked onomastic data with digitized versions of the medieval manuscripts from which the data originate and information on the tokens' context. The structure of NordiCon is inspired by other online historical given name dictionaries. It takes up challenges reported on in previous works, such as how to cover material properties of a name token and how to define lemmatization principles, and elaborates on possible solutions. The lemmatization principles for NordiCon are further developed in order to facilitate the connection to other name dictionaries and corpuses, and the integration of the database into Språkbanken Text, an infrastructure containing modern and historical written data.},
	booktitle    = {Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, 11–16 May 2020 / editors: Nicoletta Calzolari... [et. al.]},
	author       = {Waldispühl, Michelle and Dannélls, Dana and Borin, Lars},
	year         = {2020},
	publisher    = {European Language Resources Association},
	address      = {Marseille},
	ISBN         = {979-10-95546-34-4},
}