Hoppa till huvudinnehåll

BibTeX

@inProceedings{berdicevskis-2020-older-290636,
	title        = {Older English Words Are More Polysemous},
	booktitle    = {The Evolution of Language: Proceedings of the 13th International Conference (EvoLang13). Pp. 14-21},
	author       = {Berdicevskis, Aleksandrs},
	year         = {2020},
	publisher    = {The Evolution of Language Conferences },
	address      = {Nijmegen },
}

@inProceedings{berdicevskis-eckhoff-2020-diachronic-293349,
	title        = {A Diachronic Treebank of Russian Spanning More Than a Thousand Years},
	booktitle    = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020), May 11-16, 2020, Marseille, France / ed. Nicoletta Calzolari (Conference chair). },
	author       = {Berdicevskis, Aleksandrs and Eckhoff, Hanne},
	year         = {2020},
	publisher    = {European Language Resources Association},
	address      = {Paris},
	ISBN         = {979-10-95546-34-4},
}

@inProceedings{dannells-simon-2020-supervised-289944,
	title        = {Supervised OCR Post-Correction of Historical Swedish Texts: What Role Does the OCR System Play?},
	abstract     = {Current approaches for post-correction of OCR errors offer solutions that are tailored to a specific OCR system. This can be problematic if the post-correction method was trained on a specific OCR
system but have to be applied on the result of another system. Whereas OCR post-correction of historical text has received much attention lately, the question of what role does the OCR system play for the post-correction method has not been addressed. In this study we explore a dataset of
400 documents of historical Swedish text which has been OCR processed by three state-of-the-art OCR systems: Abbyy Finereader, Tesseract and Ocropus. We examine the OCR results of each system and present a supervised machine learning post-correction method that tries to approach
the challenges exhibited by each system. We study the performance of our method by using three evaluation tools: PrimA, Språkbanken evaluation tool and Frontiers Toolkit. Based on the evaluation analysis we discuss the impact each of the OCR systems has on the results of the post-
correction method. We report on quantitative and qualitative results showing varying degrees of OCR post-processing complexity that are important to consider when developing an OCR post-correction method.},
	booktitle    = {Proceedings of the Digital Humanities in the Nordic Countries, 5th Conference, Riga, Latvia, October 21-23, 2020},
	editor       = {Sanita Reinsone and Inguna Skadiņa and Anda Baklāne and Jānis Daugavietis},
	author       = {Dannélls, Dana and Simon, Persson},
	year         = {2020},
	publisher    = {CEUR-WS},
}

@inProceedings{frossard-etal-2020-dataset-293923,
	title        = {Dataset for Temporal Analysis of English-French Cognates},
	abstract     = {Languages change over time and, thanks to the abundance of digital corpora, their evolutionary analysis using computational techniques has recently gained much research attention. In this paper, we focus on creating a dataset to support investigating the similarity in evolution between different languages. We look in particular into the similarities and differences between the use of corresponding words across time in English and French, two languages from different linguistic families yet with shared syntax and close contact. For this we select a set of cognates in both languages and study their frequency changes and correlations over time. We propose a new dataset for computational approaches of synchronized diachronic investigation of language pairs, and subsequently show novel findings stemming from the cognate-focused diachronic comparison of the two chosen languages. To the best of our knowledge, the present study is the first in the literature to use computational approaches and large data to make a cross-language diachronic analysis.},
	booktitle    = {Proceedings of The 12th Language Resources and Evaluation Conference},
	author       = {Frossard, Esteban and Coustaty, Mickael and Doucet, Antoine and Jatowt, Adam and Hengchen, Simon},
	year         = {2020},
	publisher    = {European Language Resources Association},
	address      = {Marseille, France},
	ISBN         = {979-10-95546-34-4},
}

@inProceedings{johansson-adesam-2020-training-293365,
	title        = {Training a Swedish Constituency Parser on Six Incompatible Treebanks},
	abstract     = {We  investigate  a  transition-based  parser  that  usesEukalyptus,  a  function-tagged  constituent  treebank  for  Swedish  which  includesdiscontinuous  constituents.   In  addition,  we  show  that  the  accuracy  of  this  parser  can  be  improved  by  using  a  multitask  learning architecture that makes it possible to train the parser on additional treebanks that use other annotation models.},
	booktitle    = {Proceedings of the 12th International Conference on Language Resources and Evaluation (LREC 2020)},
	author       = {Johansson, Richard and Adesam, Yvonne},
	year         = {2020},
	publisher    = {European Language Resources Association (ELRA)},
}

@article{kokkinakis-lundholmfors-2020-manga-294522,
	title        = {Hur många djur du kommer på kan avslöja hur din hjärna mår},
	journal      = {Språkbruk},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina},
	year         = {2020},
	volume       = {2},
	pages        = {48--51},
}

@inProceedings{lange-ljunglof-2020-learning-291243,
	title        = {Learning Domain-specific Grammars from a Small Number of Examples},
	abstract     = {In this paper we investigate the problem of grammar inference from a different perspective. The common approach is to try to infer a grammar directly from example sentences, which either requires a large training set or suffers from bad accuracy. We instead view it as a problem of grammar restriction or sub-grammar extraction. We start from a large-scale resource grammar and a small number of examples, and find a sub-grammar that still covers all the examples. To do this we formulate the problem as a constraint satisfaction problem, and use an existing constraint solver to find the optimal grammar. We have made experiments with English, Finnish, German, Swedish and Spanish, which show that 10–20 examples are often sufficient to learn an interesting domain grammar. Possible applications include computer-assisted language learning, domain-specific dialogue systems, computer games, Q/A-systems, and others.},
	booktitle    = {12th International Conference on Agents and Artificial Intelligence - Volume 1: NLPinAI},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2020},
	publisher    = {SciTePress},
	ISBN         = {978-989-758-395-7},
}

@misc{mcgillivray-etal-2020-challenges-295208,
	title        = {The challenges and prospects of the intersection of humanities and data science: A White Paper from The Alan Turing Institute},
	abstract     = {Since their beginnings, the digital humanities have engaged in an energetic debate about their scope, defining features, and relationship to the wider humanities, and have established themselves as a community of practice (Schreibman et al., 2004; Terras, 2010; Terras, 2013; Terras et al., 2013; Gold and Klein, 2016; The Digital Humanities Manifesto 2.0). The computational focus has characterised the field from its initial explorations (Hockey, 2004; Vanhoutte, 2013; Nyhan and Flinn, 2016) and the shift from the label ‘Humanities Computing’ to ‘Digital Humanities’ was a catalyst for change. In the history of the field, recurring cycles and productive tensions have arisen from the interfolding of computational methodologies and approaches with hermeneutic and critical modes of analysis (see McCarty, 2005; Rockwell and Sinclair, 2016; Jones, 2016). This document postulates that we are currently witnessing another one of these junctures, one that is calling for a critical involvement with data science.
In many ways, we are seeing earlier methods blending into, or being extended by data science.
Digitisation workflows are being augmented with automatic information extraction, data analysis, automated transcription of handwritten documents, and visualisation of transcribed content. Techniques developed for history, literary studies, and linguistics are being scaled towards larger datasets and more complex problems raising the bar of interpretability and questioning the validity of data collection and analysis methods. On the other hand, the field of data science has recently started to engage with non-STEM (Science, Technology, Engineering, and Mathematics) disciplines, by offering new data-driven modelling frameworks for addressing
long-standing research questions (Kitchin, 2014; Lazer et al., 2009) and proposing so-called ‘human-centred approaches’ to data science, focussed on the interpretability of machine learning models and a more active role for human input in algorithms (See Chen et al., 2016).
Moreover, in the current historical context we are witnessing an increased awareness of the questions of diversity and inclusion in research and academia, and we are seeing the creation of a strong movement aimed at addressing such issues globally. We believe that this paper can play a role in reinforcing a positive message in this respect.},
	author       = {McGillivray, Barbara and Alex, Beatrice and Ames, Sarah and Armstrong, Guyda and Beavan, David and Ciula, Arianna and Colavizza, Giovanni and Cummings, James and De Roure, David and Farquhar, Adam and Hengchen, Simon and Lang, Anouk and Loxley, James and Goudarouli, Eirini and Nanni, Federico and Nini, Andrea and Nyhan, Julianne and Osborne, Nicola and Poibeau, Thierry and Ridge, Mia and Ranade, Sonia and Smithies, James and Terras, Melissa and Vlachidis, Andreas and Willcox, Pip},
	year         = {2020},
}

@article{roberts-etal-2020-chield-292421,
	title        = {CHIELD: the causal hypotheses in evolutionary linguistics database},
	journal      = {Journal of Language Evolution},
	author       = {Roberts, Sean and Killin, Anton and Deb, Angarika and Sheard, Catherine and Greenhill, Simon and Sinnemäki, Kaius and Segovia-Martin, José and Nölle, Jonas and Berdicevskis, Aleksandrs and Humphreys-Balkwill, Archie and Little, Hannah and Opie, Cristopher and Jacques, Guillaume and Bromham, Lindell and Tinits, Peeter and Ross, Robert and Lee, Sean and Gasser, Emily and Calladine, Jasmine and Spike, Matthew and Mann, Stephen and Shcherbakova, Olena and Singer, Ruth and Zhang, Shuya and Benítez-Burraco, Antonio and Kliesch, Christian and Thomas-Colquhoun, Ewan and Skirgård, Hedvig and Tamariz, Monica and Passmore, Sam and Pellard, Thomas and Jordan, Fiona},
	year         = {2020},
	volume       = {5},
	number       = {2},
	pages        = {101–120},
}

@inProceedings{rouces-etal-2020-creating-290695,
	title        = {Creating an Annotated Corpus for Aspect-Based Sentiment Analysis in Swedish},
	abstract     = {Aspect-Based Sentiment Analysis constitutes a more fine-grained alternative to traditional sentiment analysis at sentence level. In addition to a sentiment value denoting how positive or negative a particular opinion or sentiment expression is, it identifies additional aspects or 'slots' that characterize the opinion. Some typical aspects are target and source, i.e. who holds the opinion and about which entity or aspect is the opinion. We present a large Swedish corpus annotated for Aspect-Based Sentiment Analysis. Each sentiment expression is annotated as a tuple that contains the following fields: one among 5 possible sentiment values, the target, the source, and whether the sentiment expressed is ironic.  In addition, the linguistic element that conveys the sentiment is identified too. Sentiment for a particular topic is also annotated at title, paragraph and document level.
The documents are articles obtained from two Swedish media (Svenska Dagbladet and Aftonbladet) and one online forum (Flashback), totalling around 4000 documents. The corpus is freely available and we plan to use it for training and testing an Aspect-Based Sentiment Analysis system.},
	booktitle    = {Proceedings of the 5th conference in Digital Humanities in the Nordic Countries, Riga, Latvia, October 21-23, 2020.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {CEUR Workshop Proceedings},
}

@inProceedings{virk-etal-2020-from-295339,
	title        = {From Linguistic Descriptions to Language Profiles},
	abstract     = {Language catalogues and typological databases are two important types of resources containing different types of knowledge about the world’s natural languages. The former provide metadata such as number of speakers, location (in prose descriptions and/or GPS coordinates), language code, literacy, etc., while the latter contain information about a set of structural and functional attributes of languages. Given that both types of resources are developed and later maintained manually, there are practical limits as to the number of languages and the number of features that can be surveyed. We introduce the concept of a language profile, which is intended to be a structured representation of various types of knowledge about a natural language extracted semi-automatically from descriptive documents and stored at a central location. It has three major parts: (1) an introductory; (2) an attributive; and (3) a reference part, each containing different types of knowledge about a given natural language. As a case study, we develop and present a language profile of an example language. At this stage, a language profile is an independent entity, but in the future it is envisioned to become part of a network of language profiles connected to each other via various types of relations. Such a representation is expected to be suitable both for humans and machines to read and process for further deeper linguistic analyses and/or comparisons.},
	booktitle    = {Proceedings of the 7th Workshop on Linked Data in Linguistics (LDL-2020). Language Resources and Evaluation Conference (LREC 2020), Marseille, 11–16 May 2020 / Edited by : Maxim Ionov, John P. McCrae, Christian Chiarcos, Thierry Declerck, Julia Bosque-Gil, and Jorge Gracia},
	author       = {Virk, Shafqat and Hammarström, Harald and Borin, Lars and Forsberg, Markus and Wichmann, Søren },
	year         = {2020},
	publisher    = {European Language Resources Association},
	address      = {Paris},
	ISBN         = {979-10-95546-36-8},
}

@inProceedings{virk-etal-2020-dream-295338,
	title        = {The DReaM Corpus: A Multilingual Annotated Corpus of Grammars for the World’s Languages},
	booktitle    = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020), Marseille, 11–16 May 2020  / Editors : Nicoletta Calzolari, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis},
	author       = {Virk, Shafqat and Hammarström, Harald and Forsberg, Markus and Wichmann, Søren },
	year         = {2020},
	publisher    = {European Language Resources Association},
	address      = {Paris},
	ISBN         = {979-10-95546-34-4 },
}

@inProceedings{waldispuhl-etal-2020-material-293332,
	title        = {Material Philology Meets Digital Onomastic Lexicography: The NordiCon Database of Medieval Nordic Personal Names in Continental Sources},
	abstract     = {We present NordiCon, a database containing medieval Nordic personal names attested in Continental sources. The database combines formally interpreted and richly interlinked onomastic data with digitized versions of the medieval manuscripts from which the data originate and information on the tokens' context. The structure of NordiCon is inspired by other online historical given name dictionaries. It takes up challenges reported on in previous works, such as how to cover material properties of a name token and how to define lemmatization principles, and elaborates on possible solutions. The lemmatization principles for NordiCon are further developed in order to facilitate the connection to other name dictionaries and corpuses, and the integration of the database into Språkbanken Text, an infrastructure containing modern and historical written data.},
	booktitle    = {Proceedings of The 12th Language Resources and Evaluation Conference, Marseille, 11–16 May 2020 / editors: Nicoletta Calzolari... [et. al.]},
	author       = {Waldispühl, Michelle and Dannélls, Dana and Borin, Lars},
	year         = {2020},
	publisher    = {European Language Resources Association},
	address      = {Marseille},
	ISBN         = {979-10-95546-34-4},
}