@techreport{Hammarstedt-Martin2017-256056, title = {Korp 6 - Användarmanual}, author = {Hammarstedt, Martin and Borin, Lars and Forsberg, Markus and Roxendal, Johan and Schumacher, Anne and Öhrman, Maria}, year = {2017}, publisher = {Institutionen för svenska språket, Göteborgs universitet}, } @techreport{Hammarstedt-Martin2017-256055, title = {Korp 6 - Technical Report}, author = {Hammarstedt, Martin and Roxendal, Johan and Öhrman, Maria and Borin, Lars and Forsberg, Markus and Schumacher, Anne}, year = {2017}, publisher = {Institutionen för svenska språket, Göteborgs universitet}, } @article{Pilán-Ildikó2017-260382, title = {Candidate sentence selection for language learning exercises: From a comprehensive framework to an empirical evaluation}, abstract = {We present a framework and its implementation relying on Natural Language Processing methods, which aims at the identification of exercise item candidates from corpora. The hybrid system combining heuristics and machine learning methods includes a number of relevant selection criteria. We focus on two fundamental aspects: linguistic complexity and the dependence of the extracted sentences on their original context. Previous work on exercise generation addressed these two criteria only to a limited extent, and a refined overall candidate sentence selection framework appears also to be lacking. In addition to a detailed description of the system, we present the results of an empirical evaluation conducted with language teachers and learners which indicate the usefulness of the system for educational purposes. We have integrated our system into a freely available online learning platform.}, author = {Pilán, Ildikó and Volodina, Elena and Borin, Lars}, year = {2017}, volume = {57}, number = {3}, pages = {67--91}, } @inProceedings{Borin-Lars2017-261157, title = {Swe-Clarin: Language resources and technology for Digital Humanities}, abstract = {CLARIN is a European Research Infrastructure Consortium (ERIC), which aims at (a) making extensive language-based materials available as primary research data to the humanities and social sciences (HSS); and (b) offering state-of-the-art language technology (LT) as an e-research tool for this purpose, positioning CLARIN centrally in what is often referred to as the digital humanities (DH). The Swedish CLARIN node Swe-Clarin was established in 2015 with funding from the Swedish Research Council. In this paper, we describe the composition and activities of Swe-Clarin, aiming at meeting the requirements of all HSS and other researchers whose research involves using text and speech as primary research data, and spreading the awareness of what Swe-Clarin can offer these research communities. We focus on one of the central means for doing this: pilot projects conducted in collaboration between HSS researchers and Swe-Clarin, together formulating a research question, the addressing of which requires working with large language-based materials. Four such pilot projects are described in more detail, illustrating research on rhetorical history, second-language acquisition, literature, and political science. A common thread to these projects is an aspiration to meet the challenge of conducting research on the basis of very large amounts of textual data in a consistent way without losing sight of the individual cases making up the mass of data, i.e., to be able to move between Moretti’s “distant” and “close reading” modes. While the pilot projects clearly make substantial contributions to DH, they also reveal some needs for more development, and in particular a need for document-level access to the text materials. As a consequence of this, work has now been initiated in Swe-Clarin to meet this need, so that Swe-Clarin together with HSS scholars investigating intricate research questions can take on the methodological challenges of big-data language-based digital humanities.}, booktitle = {Digital Humanities 2016. Extended Papers of the International Symposium on Digital Humanities (DH 2016) Växjö, Sweden, November, 7-8, 2016. Edited by Koraljka Golub, Marcelo Milra. Vol-2021}, author = {Borin, Lars and Tahmasebi, Nina and Volodina, Elena and Ekman, Stefan and Jordan, Caspar and Viklund, Jon and Megyesi, Beáta and Näsman, Jesper and Palmér, Anne and Wirén, Mats and Björkenstam, Kristina and Grigonyte, Gintare and Gustafson Capková, Sofia and Kosiński, Tomasz}, year = {2017}, publisher = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen.}, adress = {Aachen}, } @inProceedings{Virk-Shafqat2017-261789, title = {Automatic extraction of typological linguistic features from descriptive grammars}, abstract = {The present paper describes experiments on automatically extracting typological linguistic features of natural languages from traditional written descriptive grammars. The feature-extraction task has high potential value in typological, genealogical, historical, and other related areas of linguistics that make use of databases of structural features of languages. Until now, extraction of such features from grammars has been done manually, which is highly time and labor consuming and becomes prohibitive when extended to the thousands of languages for which linguistic descriptions are available. The system we describe here starts from semantically parsed text over which a set of rules are applied in order to extract feature values. We evaluate the system’s performance on the manually curated Grambank database as the gold standard and report the first measures of precision and recall for this problem.}, booktitle = {Text, Speech, and Dialogue 20th International Conference, TSD 2017, Prague, Czech Republic, August 27-31, 2017, Proceedings / edited by Kamil Ekštein, Václav Matoušek.}, author = {Virk, Shafqat and Borin, Lars and Saxena, Anju and Hammarström, Harald}, year = {2017}, publisher = {Springer International Publishing}, adress = {Cham}, ISBN = {978-3-319-64205-5}, } @inProceedings{Volodina-Elena2017-262848, title = {SVALex. En andraspråksordlista med CEFR-nivåer}, abstract = {När man planerar att utveckla en språkkurs i ett andra- eller främmandespråk (L2) ställs man inför utmaningen att definiera vilket ordförråd inlärarna behöver tillägna sig. Forskning inom andraspråksinlärning tyder på att läsaren behöver kunna 95–98 % av löporden i en text för att förstå den (Laufer & Ravenhorst-Kalovski 2010). Sådana studier är användbara för att uppskatta storleken på det ordförråd som behövs för att tillägna sig innehållet i en text, men de ger ingen närmare metodologisk vägledning för den som vill utveckla nivåstrukturerade läromedel eller kurser för andraspråksundervisning. Speciellt tydligt är detta inom CALL, Computer-Assisted Language Learning, där läromaterial (t.ex. övningar) genereras automatiskt, och behöver elektroniska resurser som kunskapskälla. Man kan istället angripa problemet från andra hållet. Om man har en samling nivåklassificerade texter för andraspråksinlärare kan man utifrån dem bygga ordlistor där varje ord är placerat på en färdighetsskala. Om man känner till den förutsatta färdighetsnivån hos läsaren, kan man helt enkelt anta att den textnivå där ett ord dyker upp första gången också anger ordets svårighetsgrad. SVALex är ett lexikon som har byggts enligt den principen. Resursen ska kunna användas av inlärare och lärare i svenska som andraspråk, men även av lexikografer, av kursutvecklare och provkonstruktörer samt av dem som likt oss själva ägnar sig åt utveckling av språkteknologibaserade datorstöd för språkinlärning och språktestning. SVALex utgör en vidareutveckling i förhållande till tidigare lexikonresurser för svenska som andraspråk (se avsnitt 2), genom att den konsekvent relaterar de 15 681 lexikoningångarna till en vida använd färdighetsskala för andra- och främmandespråksinlärning, Europarådets gemensamma europeiska referensram för språk (Common European Framework of Reference, i fortsättningen refererad till som CEFR) (Council of Europe 2001; Skolverket 2009). Nivåklassningen av lexikonenheterna i SVALex görs på basis av deras distribution i COCTAILL, en korpus innehållande lärobokstexter i svenska som andraspråk, där lärare har placerat in varje text i någon av CEFR-nivåerna (Volodina et al. 2014). }, booktitle = {Svenskans beskrivning. 35, Förhandlingar vid trettiofemte sammankomsten : Göteborg 11–13 maj 2016 / Redigerad av Emma Sköldberg, Maia Andréasson, Henrietta Adamsson Eryd, Filippa Lindahl, Sven Lindström, Julia Prentice & Malin Sandberg}, author = {Volodina, Elena and Borin, Lars and Pilán, Ildikó and François, Thomas and Tack, Annaïs}, year = {2017}, publisher = {Göteborgs universitet}, adress = {Göteborg}, ISBN = {978-91-87850-64-6}, } @misc{Volodina-Elena2017-262846, title = {Preface. Proceedings of the Joint 6th Workshop on NLP for Computer Assisted Language Learning and 2nd Workshop on NLP for Research on Language Acquisition at NoDaLiDa 2017, Gothenburg, 22nd May 2017}, abstract = {For the second year in a row we brought two related themes of NLP for Computer-Assisted Language Learning and NLP for Language Acquisition together. The goal of organizing joint workshops is to provide a meeting place for researchers working on language learning issues including both empirical and experimental studies and NLP-based applications. The resulting volume covers a variety of topics from the two fields and - hopefully - showcases the challenges and achievements in the field. The seven papers in this volume cover native language identification in learner writings, using syntactic complexity development in language learner language to identify reading comprehension texts of appropriate level, exploring the potential of parallel corpora to predict mother-language specific problem areas for learners of another language, tools for learning languages - both well-resourced ones such as English as well as endangered or under-resourced ones such as Yakut and Võro, as well as exploring the potential of automatically identifying and correcting word-level errors in Swedish learner writing.}, author = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Grigonyte, Gintare and Nilsson Björkenstam, Kristina}, year = {2017}, volume = {30}, pages = {i--vi}, } @misc{Volodina-Elena2017-262838, title = {Proceedings of the Joint 6th Workshop on NLP for Computer Assisted Language Learning and 2nd Workshop on NLP for Research on Language Acquisition at NoDaLiDa 2017, Gothenburg, 22nd May 2017}, abstract = {For the second year in a row we have brought the two related themes of NLP for Computer-Assisted Language Learning and NLP for Language Acquisition together under one umbrella. The goal of organizing these joint workshops is to provide a meeting place for researchers working on language learning issues including both empirical and experimental studies and NLP-based applications.}, author = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Grigonyte, Gintare and Nilsson Björkenstam, Kristina}, year = {2017}, publisher = {Linköping University Press}, adress = {Linköping, Sweden}, ISBN = { 978-91-7685-502-7}, }