@inProceedings{alfter-graen-2019-interconnecting-285731, title = {Interconnecting lexical resources and word alignment: How do learners get on with particle verbs?}, abstract = {In this paper, we present a prototype for an online exercise aimed at learners of English and Swedish that serves multiple purposes. The exercise allows learners of the aforementioned languages to train their knowledge of particle verbs receiving clues from the exercise application. The user themselves decide which clue to receive and pay in virtual currency for each, which provides us with valuable information about the utility of the clues that we provide as well as the learners willingness to trade virtual currency versus accuracy of their choice. As resources, we use list with annotated levels from the proficiency scale defined by the Common European Framework of Reference (CEFR) and a multilingual corpus with syntactic dependency relations and word annotation for all language pairs. From the latter resource, we extract translation equivalents for particle verb construction together with a list of parallel corpus examples that can be used as clues in the exercise.}, booktitle = {Linköping Electronic Conference Proceeding, No. 167, NEAL Proceedings of the 22nd Nordic Conference on Computational Linguistics (NoDaLiDa), September 30-October 2, Turku, Finland / Editor(s): Mareike Hartman and Barbara Plank}, author = {Alfter, David and Graën, Johannes}, year = {2019}, publisher = {Linköping University Electronic Press, Linköpings universitet}, address = {Linköping university}, ISBN = {978-91-7929-995-8}, } @inProceedings{graen-etal-2019-modelling-284429, title = {Modelling large parallel corpora: The Zurich Parallel Corpus Collection}, abstract = {Text corpora come in many different shapes and sizes and carry heterogeneous annotations, depending on their purpose and design. The true benefit of corpora is rooted in their annotation and the method by which this data is encoded is an important factor in their interoperability. We have accumulated a large collection of multilingual and parallel corpora and encoded it in a unified format which is compatible with a broad range of NLP tools and corpus linguistic applications. In this paper, we present our corpus collection and describe a data model and the extensions to the popular CoNLL-U format that enable us to encode it.}, booktitle = {Proceedings of the Workshop on Challenges in the Management of Large Corpora (CMLC-7) 2019. Cardiff, 22nd July 2019 / Piotr Bański, Adrien Barbaresi, Hanno Biber, Evelyn Breiteneder, Simon Clematide, Marc Kupietz, Harald Lüngen, Caroline Iliadi (eds.)}, author = {Graën, Johannes and Kew, Tannon and Shaitarova, Anastassia and Volk, Martin}, year = {2019}, publisher = {Leibniz-Institut für Deutsche Sprache}, address = {Mannheim}, }