Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@techreport{ljunglof-etal-2019-assessing-281222,
	title        = {Assessing the quality of Språkbanken’s annotations},
	abstract     = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser.
Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. },
	author       = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars},
	year         = {2019},
}

@misc{alfter-etal-2019-proceedings-285613,
	title        = {Proceedings of the 8th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2019), September 30, Turku Finland},
	abstract     = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promote development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other.

The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools.

The NLP4CALL workshop series is aimed at bringing together competences from these areas for sharing experiences and brainstorming around the future of the field.
},
	author       = {Alfter, David and Volodina, Elena and Borin, Lars and Pilán, Ildikó and Lange, Herbert},
	year         = {2019},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping},
	ISBN         = {978-91-7929-998-9},
}

@inProceedings{alfter-etal-2019-larka-281344,
	title        = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning},
	abstract     = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.},
	booktitle    = {Linköping Electronic Conference Proceedings},
	author       = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena},
	year         = {2019},
	publisher    = {Linköping University Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-034-3},
}

@inProceedings{virk-etal-2019-exploiting-290903,
	title        = {Exploiting frame semantics and frame-semantic parsing for automatic extraction of typological information from descriptive grammars of natural languages},
	abstract     = {We describe a novel system for automatic extraction of typological linguistic information from descriptive grammars of natural languages, applying the theory of frame semantics in the form of frame-semantic parsing. The current proof-of-concept system covers a few selected linguistic features, but the methodology is general and can be extended not only to other typological features but also to descriptive grammars written in languages other than English. Such a system is expected to be a useful assistance for automatic curation of typological databases which otherwise are built manually, a very labor and time consuming as well as cognitively taxing enterprise.},
	booktitle    = {12th International Conference on Recent Advances in Natural Language Processing, RANLP 2019, Varna, Bulgaria, 2-4 September 2019},
	author       = {Virk, Shafqat and  Muhammad,  Azam Sheikh and Borin, Lars and Aslam, Muhammad Irfan and Iqbal, Saania and Khurram, Nazia},
	year         = {2019},
	publisher    = {INCOMA Ltd.},
	address      = {Shoumen, Bulgaria},
	ISBN         = {978-954-452-055-7},
}

@inProceedings{fridlund-etal-2019-trawling-287968,
	title        = {Trawling for Terrorists: A Big Data Analysis of Conceptual Meanings and Contexts in Swedish Newspapers, 1780–1926},
	abstract     = {The conceptual history of terrorism has to a significant extent been studied through canonical texts or historical key figures or organisations. However, through the increasing digitization of text materials convential research questions can now be approached from new angles or established results verified on the basis of exhaustive collections of data, rather than limited samples. Specifically, we are interested in evaluating and expanding on prior research claims regarding the meanings and con- texts associated with the concepts terrorism and terrorist up until the twentieth century in a Swedish context. The investigation is guided by the following research questions: What historical meanings of the concept of terrorism were expressed in the Swedish newspaper discourse? What social and ideological contexts and violent political practices was the concept primarily associated with before the First World War?},
	booktitle    = {Proceedings of the 5th International Workshop on Computational History (HistoInformatics 2019) co-located with the 23rd International Conference on Theory and Practice of Digital Libraries (TPDL 2019) Oslo, Norway, September 12th, 2019, Melvin Wevers, Mohammed Hasanuzzaman, Gaël Dias, Marten Düring, & Adam Jatowt, eds. },
	author       = {Fridlund, Mats and Olsson, Leif-Jöran and Brodén, Daniel and Borin, Lars},
	year         = {2019},
	publisher    = {CEUR-WS},
	address      = {Aachen},
}

@inProceedings{lindahl-etal-2019-towards-286588,
	title        = {Towards Assessing Argumentation Annotation - A First Step},
	abstract     = {This paper presents a first attempt at using Walton’s argumentation schemes for annotating arguments in Swedish political text and assessing the feasibility of using this particular set of schemes with two linguistically trained annotators. The texts are not pre-annotated with argumentation structure beforehand. The results show that the annotators differ both in number of annotated arguments and selection of the conclusion and premises which make up the arguments. They also differ in their labeling of the schemes, but grouping the schemes increases their agreement. The outcome from this will be used to develop guidelines for future annotations.},
	booktitle    = {Proceedings of the 6th Workshop on Argument Mining, August 1, 2019, Florence, Italy / Benno Stein, Henning Wachsmuth (Editors)},
	author       = {Lindahl, Anna and Borin, Lars and Rouces, Jacobo},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-33-8},
}

@inProceedings{rouces-etal-2019-tracking-281308,
	title        = {Tracking Attitudes Towards Immigration in Swedish Media},
	abstract     = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019. },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen },
}

@inProceedings{rouces-etal-2019-political-281307,
	title        = {Political Stance Analysis Using Swedish Parliamentary Data},
	abstract     = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR },
	address      = {Aachen },
}

@misc{tahmasebi-etal-2019-proceedings-285886,
	title        = {Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change, August 2, 2019, Florence, Italy},
	author       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam  and Xu, Yang},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-31-4},
}