Språkbanken Text är en avdelning inom Språkbanken.
BibTeX

@inProceedings{bouma-2019-exploring-289484,
	title        = {Exploring Combining Training Datasets for the CLIN 2019 Shared Task on Cross-genre Gender Detection in Dutch},
	abstract     = {We present our entries to the Shared Task on Cross-genre Gender Detection in Dutch at CLIN 2019. We start from a simple logistic regression model with commonly used features, and consider two ways of combining training data from different sources.Our in-genre models do reasonably well, but the cross-genre models area lot worse. Post-task experiments show no clear systematic advantage of one way of combining training data sources over the other, but do suggest  accuracy  can  be  gained  from  a  better  way  of  setting  model hyperparameters.},
	booktitle    = {CEUR Workshop Proceedings, vol 2453. Proceedings of the Shared Task on Cross-Genre Gender Prediction in Dutch at CLIN29 (GxG-CLIN29) co-located with the 29th Conference on Computational Linguistics in The Netherlands (CLIN29). Groningen, The Netherlands, January 31, 2019. Edited by Hessel Haagsma, Tim Kreutz, Masha Medvedeva, Walter Daelemans and Malvina Nissim},
	author       = {Bouma, Gerlof},
	year         = {2019},
	publisher    = {CEUR-WS.org},
	address      = {Aachen },
}

@inProceedings{dannells-etal-2019-evaluation-278761,
	title        = {Evaluation and refinement of an enhanced OCR process for mass digitisation. },
	abstract     = {Great expectations are placed on the capacity of heritage institutions to make their collections available in digital format. Datadriven research is becoming a key concept within the humanities and social sciences. Kungliga biblioteket’s (National Library of Sweden, KB)collections of digitised newspaper can thus be regarded as unique cultural data sets with information that rarely is conveyed in other media types. The digital format makes it possible to explore these resources in ways not feasible while in printed form. As texts are no longer only read but also subjected to computer based analysis the demand on the correct rendering of the original text increases. OCR technologies for converting images to machine-readable text play a fundamental part in making these resources available, but the effectiveness vary with the type of document being processed. This is evident in relation to the digitisation of newspapers where factors relating to their production, layout and paper quality often impair the OCR production. In order to improve the machine readable text, especially in relation to the digitisation of newspapers, KB initiated the development of an OCR-module where key parameters can be adjusted according to the characteristics of the material being processed. The purpose of this paper is to present the project goals and methods.},
	booktitle    = {Proceedings of the Digital Humanities in the Nordic Countries 4th Conference (DHN 2019), Copenhagen, Denmark, March 5-8, 2019. Edited by: Costanza Navarretta, Manex Agirrezabal, Bente Maegaard},
	author       = {Dannélls, Dana and Johansson, Torsten and Björk, Lars},
	year         = {2019},
	publisher    = {University of Copenhagen, Faculty of Humanities},
	address      = {Copenhagen},
}

@article{volodina-etal-2019-swell-285609,
	title        = {The SweLL Language Learner Corpus: From Design to Annotation},
	abstract     = {The article presents a new language learner corpus for Swedish, SweLL, and the methodology from collection and pesudonymisation to protect personal information of learners to annotation adapted to second language learning. The main aim is to deliver a well-annotated corpus of essays written by second language learners of Swedish and make it available for research through a browsable environment. To that end, a new annotation tool and a new project management tool have been implemented, – both with the main purpose to ensure reliability and quality of the final corpus. In the article we discuss reasoning behind metadata selection, principles of gold corpus compilation and argue for separation of normalization from correction annotation.},
	journal      = {Northern European Journal of Language Technology},
	author       = {Volodina, Elena and Granstedt, Lena and Matsson, Arild and Megyesi, Beáta and Pilán, Ildikó and Prentice, Julia and Rosén, Dan and Rudebeck, Lisa and Schenström, Carl-Johan and Sundberg, Gunlög and Wirén, Mats},
	year         = {2019},
	volume       = {6},
	pages        = {67--104},
}

@article{kokkinakis-edstrom-2019-alderism-284251,
	title        = {Ålderism i dagens mediala Sverige
},
	journal      = {Språkbruk},
	author       = {Kokkinakis, Dimitrios and Edström, Maria},
	year         = {2019},
	number       = {3/2019},
	pages        = {22--27},
}

@article{fyndanis-themistocleous-2019-there-268753,
	title        = {Are there prototypical associations between time frames and aspectual values? Evidence from Greek aphasia and healthy ageing},
	abstract     = {Time reference, which has been found to be selectively impaired in agrammatic aphasia, is often interwoven with grammatical aspect. A recent study on Russian aphasia found that time reference and aspect interact: Past reference was less impaired when tested within a perfective aspect context (compared to when tested within an imperfective aspect context), and reference to the non-past was less impaired when tested within an imperfective aspect context (compared to when tested within a perfective aspect context). To explain this pattern, the authors argued that there are prototypical associations between time frames and aspectual values. The present study explores the relationship between time reference and aspect focusing on Greek aphasia and healthy ageing and using a sentence completion task that crosses time reference and aspect. The findings do not support prototypical matches between different time frames and aspectual values. Building on relevant studies, we propose that patterns of performance of healthy or language-impaired speakers on constrained tasks tapping different combinations of time frames with aspectual values should reflect the relative frequency of these combinations in a given language. The analysis of the results at the individual level revealed a double dissociation, which indicates that a given time frame–aspectual value combination may be relatively easy to process for some persons with aphasia but demanding for some others.},
	journal      = {Clinical Linguistics & Phonetics},
	author       = {Fyndanis, Valantis and Themistocleous, Charalambos},
	year         = {2019},
	volume       = {33},
	number       = {1-2},
	pages        = {191--217},
}

@inProceedings{volodina-etal-2019-svala-285617,
	title        = {SVALA: an Annotation Tool for Learner Corpora generating parallel texts},
	abstract     = {Learner corpora are actively used for research on Language Acquisition and in Learner Corpus Research (LCR).  The  data  is,  however,  very  expensive  to  collect  and  manually  annotate,  and  includes  steps  like  anonymization,  normalization, error annotation, linguistic annotation. In the past, projects often re - used tools from a number of  different projects for the above steps. As a result, various input and output formats between the tools needed to  be converted, which increased the complexity of the task. In  the  present  project,  we  are  developing  a  tool  that  handles  all  of  the  above - mentioned  steps  in  one  environment maintaining a stable interpretable  format between the  steps. A distinguishing feature of the tool is  that users work in a usual environment (plain text) while the tool visualizes all performed edits via a graph that  links an original learner text with an edited one, token by token.},
	booktitle    = {Learner Corpus Research conference (LCR-2019), Warsaw, 12-14 September 2019, Book of abstracts},
	author       = {Volodina, Elena and Matsson, Arild and Rosén, Dan and Wirén, Mats},
	year         = {2019},
}

@inProceedings{r?dveneide-2019-swedish-289474,
	title        = {The Swedish PoliGraph},
	abstract     = {As part of a larger project on argument mining of Swedish parliamentary data, we have created a semantic graph that, together with named entity recognition and resolution (NER), should make it easier to establish connections between arguments in a given debate. The graph is essentially a semantic database that keeps track of Members of Parliament (MPs), in particular their presence in the parliament and activity in debates, but also party affiliation and participation in commissions. The hope is that the Swedish PoliGraph will enable us to perform named entity resolution on debates in the Swedish parliament with a high accuracy, with the aim of determining to whom an argument is directed.},
	booktitle    = {Proceedings of the 6th Workshop on Argument Mining, August 1, 2019 Florence, Italy / Benno Stein, Henning Wachsmuth (Editors)},
	author       = {Rødven-Eide, Stian},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-33-8},
}

@inProceedings{themistocleous-kokkinakis-2019-speech-289021,
	title        = {Speech and Mild Cognitive Impairment detection},
	abstract     = {It is of great importance to detect objective markers that can enable the early and fast identification of individuals with Mild Cognitive Impairment (MCI) from healthy individuals to inform, patient care, family and treatment planning. Connected speech productions can offer such markers. This study analyses recordings from picture description tasks by Swedish individuals with MCI and healthy control individuals (HC) and shows that voice quality, periodicity, and speech rate distinguish individuals with MCI from HC. 
},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Themistocleous, Charalambos and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = { ExLing Society},
	ISBN         = {978-618-84585-0-5},
}

@article{themistocleous-2019-dialect-341526,
	title        = {Dialect Classification From a Single Sonorant Sound Using Deep Neural Networks},
	abstract     = {During spoken communication, the fine acoustic properties of human speech can reveal vital sociolinguistic and linguistic information about speakers and thus, these properties can function as reliable identification markers of speakers' identity. One key piece of information speech reveals is speakers' dialect. The first aim of this study is to provide a machine learning method that can distinguish the dialect from acoustic productions of sonorant sounds. The second aim is to determine the classification accuracy of dialects from the temporal and spectral information of a single sonorant sound and the classification accuracy of dialects using additional co-articulatory information from the adjacent vowel. To this end, this paper provides two classification approaches. The first classification approach aims to distinguish two Greek dialects, namely Athenian Greek, the prototypical form of Standard Modern Greek and Cypriot Greek using measures of temporal and spectral information (i.e., spectral moments) from four sonorant consonants /m n l r/. The second classification study aims to distinguish the dialects using coarticulatory information (e.g., formants frequencies F1 - F5, F0, etc.) from the adjacent vowel in addition to spectral and temporal information from sonorants. In both classification approaches, we have employed Deep Neural Networks, which we compared with Support Vector Machines, Random Forests, and Decision Trees. The findings show that neural networks distinguish the two dialects using a combination of spectral moments, temporal information, and formant frequency information with 81% classification accuracy, which is a 14% accuracy gain over employing temporal properties and spectral moments alone. In conclusion, Deep Neural Networks can classify the dialect from single consonant productions, making them capable of identifying sociophonetic shibboleths.},
	journal      = {FRONTIERS IN COMMUNICATION},
	author       = {Themistocleous, Charalambos},
	year         = {2019},
	volume       = {4},
}

@inProceedings{kokkinakis-edstrom-2019-alderism-279386,
	title        = {Ålderism i svenska nyhetsmedier.},
	abstract     = {Ålderdom existerar inte. Det finns människor som är mindre unga än andra. Det är allt.” (Simone de Beauvoir, 1908-1986).
Ålderism syftar till “fördomar eller stereotypa föreställningar som utgår från en människas ålder och som kan leda till diskriminering”. Ålderism och media är ett område som under de senaste åren har uppmärksammats på ett sätt som aldrig tidigare skett (WHO). Detta antyder på att stereotypa beskrivningar och diskriminering av individer eller grupper av individer på grund av sin kronologiska ålder i (tryckta) nyhetsmedier är ett stort problem. För ålderismstudier är det värdefullt och viktigt att förstå hur olika typer av texter och medier beskriver eller presenterar åldrande och ålderdom. Därmed är syftet med denna forskning att samla och sammanställa korpusbaserade data från olika publicerade svenska mediekällor för att kunna svara på frågan om hur utbrett fenomenet är i den svenska verkligheten och därmed kunna frambringa en mer omfattande empirisk bevisning rörande fenomenet. Två pilotstudier har genomförts; en som använde förnamn och deras frekvenser av bärarnas ålder enligt Statistiska centralbyrån (SCB) i olika synkrona on-line tidningskällor och en som använde generella mönstermatchningstekniker som tillämpades på 13 utgåvor av Göteborgs Posten (1994, 2001-13). Äldre, i vår studie, är personer ≥60 år. Preliminära, kvantitativa, resultat tyder på att det finns tydliga och konsekventa skillnader i hur olika åldersgrupper representeras i dessa medier. Ett tydligt band visar att omnämnanden av 25-52-åringar är mycket överrepresenterat än den svenska befolkningspyramiden säger att de borde (SCB). Medan 0-24-åringar och personer över 52 är underrepresenterade. Mönstermatchning pekar åt liknande resultat med undantag av dödsannonser där omnämnanden om äldre är mycket vanligare. Vår pilotstudie bekräftar den introspektiva synen på underrepresentation av ålderdom och äldre i synkrona mediekällor. Men fler studier krävs och inom den närmaste tiden planerar vi att förbättra, skala upp och tillämpa språkteknologisk metodik på både synkronisk och diakronisk textkorpora och därmed få ett nytt och bredare perspektiv på skillnader och trender om åldrandet och äldre och vad olika publicerade källor ur en större tidsperiod kan avslöja.},
	booktitle    = {Svenskans beskrivning 37, 8–10.5.2019, Åbo, Finland.},
	author       = {Kokkinakis, Dimitrios and Edström, Maria},
	year         = {2019},
}

@inProceedings{matsson-etal-2019-imagettr-284011,
	title        = {ImageTTR: Grounding Type Theory with Records in Image Classification for Visual Question Answering},
	abstract     = {We present ImageTTR, an extension to the Python implementation of Type Theory with Records (pyTTR) which connects formal record type representation with image classifiers implemented as deep neural networks. The Type Theory with Records framework serves as a knowledge representation system for natural language the representations of which are grounded in perceptual information of neural networks. We demonstrate the benefits of this symbolic and data-driven hybrid approach on the task of visual question answering.},
	booktitle    = {Proceedings of the IWCS 2019 Workshop on Computing Semantics with Types, Frames and Related Structures, May 24, 2019, Gothenburg, Sweden / Rainer Osswald, Christian Retoré, Peter Sutton (Editors)},
	author       = {Matsson, Arild and Dobnik, Simon and Larsson, Staffan},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-950737-25-3},
}

@inProceedings{dubossarsky-etal-2019-time-295438,
	title        = {Time for change: Evaluating models of semantic change without evaluation tasks},
	booktitle    = {Cambridge Language Sciences Annual Symposium 2019 : Perspectives on Language Change},
	author       = {Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik},
	year         = {2019},
}

@inProceedings{bouma-etal-2019-building-289485,
	title        = {Building a Diachronic and Contrastive Parallel Corpus – and an Intended Application in the Form of a Study of Germanic Complex Verb Constructions },
	abstract     = {We present a parallel corpus under construction, which is parallel in diachronically (through time) as well as contrastively (between languages). The corpus is made up of Bible texts spanning almost 6 centuries in 4 languages. Our project's direct purpose of building the corpus is to track the development of verb combinations containing multiple auxiliary verbs through time in German, Dutch, English and Swedish. We will also make the corpus available to other researchers.

In this poster, we discuss the design of the corpus, our selection of sources, issues with bringing together a wide variety of sources, and alignment of the data. We will also touch upon intended future work concerning the automatic linguistic processing needed to facilitate the study of verb constructions, and the methodological challenges of doing corpus linguistic research on the varying quality of annotations produced by automatic methods on materials from such a wide range of origins.},
	booktitle    = {Digital Humanities 2019, 9 -12 July 2019, Utrecht, the Netherlands},
	author       = {Bouma, Gerlof and Coussé, Evie and de Kooter, Dirk-Jan and van der Sijs, Nicoline},
	year         = {2019},
}

@inProceedings{tahmasebi-etal-2019-convergence-280684,
	title        = {A Convergence of Methodologies: Notes on Data-Intensive Humanities Research},
	abstract     = {In this paper, we discuss a data-intensive research methodology for the digital humanities. We highlight the differences and commonalities between quantitative and qualitative research methodologies in  relation  to  a  data-intensive  research  process.  We  argue  that  issues of  representativeness  and  reduction  must  be  in  focus  for  all  phases  of the process; from the status of texts as such, over their digitization topre-processing and methodological exploration.},
	booktitle    = {CEUR workshop proceedings ; 2364. Proceedings of the 4th Conference on Digital Humanities in the Nordic Countries, Copenhagen, Denmark, March 5-8, 2019},
	editor       = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard},
	author       = {Tahmasebi, Nina and Hagen, Niclas and Brodén, Daniel and Malm, Mats},
	year         = {2019},
	publisher    = {CEUR workshop proceedings},
	address      = {Aachen },
}

@misc{alfter-etal-2019-proceedings-285613,
	title        = {Proceedings of the 8th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2019), September 30, Turku Finland},
	abstract     = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promote development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other.

The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools.

The NLP4CALL workshop series is aimed at bringing together competences from these areas for sharing experiences and brainstorming around the future of the field.
},
	author       = {Alfter, David and Volodina, Elena and Borin, Lars and Pilán, Ildikó and Lange, Herbert},
	year         = {2019},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping},
	ISBN         = {978-91-7929-998-9},
}

@inProceedings{hoang-etal-2019-aspect-284269,
	title        = {Aspect-Based Sentiment Analysis using BERT},
	booktitle    = {Proceedings of the 22nd Nordic Conference on Computational Linguistics, 30 September–2 October, 2019, Turku, Finland / Mareike Hartmann, Barbara Plank (Editors)},
	author       = {Hoang, M. and Bihorac, O. A. and Rouces, Jacobo},
	year         = {2019},
	publisher    = {Linköping University Electronic Press},
	address      = {Sweden},
	ISBN         = {978-91-7929-995-8},
}

@inProceedings{alfter-etal-2019-larka-281344,
	title        = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning},
	abstract     = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.},
	booktitle    = {Linköping Electronic Conference Proceedings},
	author       = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena},
	year         = {2019},
	publisher    = {Linköping University Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-034-3},
}

@article{adesam-bouma-2019-koala-288026,
	title        = {The Koala Part-of-Speech Tagset},
	abstract     = {We present the Koala part-of-speech tagset for written Swedish. The categorization takes the Swedish Academy Grammar (SAG) as its main starting point, to fit with the current descriptive view on Swedish grammar. We argue that neither SAG, as is, nor any of the existing part-of-speech tagsets meet our requirements for a broadly applicable categorization. Our proposal is outlined and compared to the other descriptions, and motivations for both the tagset as a whole as well as decisions about individual tags are discussed.},
	journal      = {Northern European Journal of Language Technology},
	author       = {Adesam, Yvonne and Bouma, Gerlof},
	year         = {2019},
	volume       = {6},
	pages        = {5--41},
}

@inProceedings{lindahl-etal-2019-towards-286588,
	title        = {Towards Assessing Argumentation Annotation - A First Step},
	abstract     = {This paper presents a first attempt at using Walton’s argumentation schemes for annotating arguments in Swedish political text and assessing the feasibility of using this particular set of schemes with two linguistically trained annotators. The texts are not pre-annotated with argumentation structure beforehand. The results show that the annotators differ both in number of annotated arguments and selection of the conclusion and premises which make up the arguments. They also differ in their labeling of the schemes, but grouping the schemes increases their agreement. The outcome from this will be used to develop guidelines for future annotations.},
	booktitle    = {Proceedings of the 6th Workshop on Argument Mining, August 1, 2019, Florence, Italy / Benno Stein, Henning Wachsmuth (Editors)},
	author       = {Lindahl, Anna and Borin, Lars and Rouces, Jacobo},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-33-8},
}

@inProceedings{adesam-etal-2019-exploring-279948,
	title        = {Exploring the Quality of the Digital Historical Newspaper Archive KubHist},
	abstract     = {The KubHist Corpus is a massive corpus of Swedish historical newspapers, digitized by the Royal Swedish library, and available through the Språkbanken corpus infrastructure Korp. This paper contains a first overview of the KubHist corpus, exploring some of the difficulties with the data, such as OCR errors and spelling variation, and discussing possible paths for improving the quality and the searchability.},
	booktitle    = {Proceedings of the 4th Conference of The Association Digital Humanities in the Nordic Countries (DHN), Copenhagen, Denmark, March 5-8, 2019},
	editor       = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard},
	author       = {Adesam, Yvonne and Dannélls, Dana and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen},
}

@inProceedings{rouces-etal-2019-tracking-281308,
	title        = {Tracking Attitudes Towards Immigration in Swedish Media},
	abstract     = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019. },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen },
}

@inProceedings{virk-etal-2019-exploiting-290903,
	title        = {Exploiting frame semantics and frame-semantic parsing for automatic extraction of typological information from descriptive grammars of natural languages},
	abstract     = {We describe a novel system for automatic extraction of typological linguistic information from descriptive grammars of natural languages, applying the theory of frame semantics in the form of frame-semantic parsing. The current proof-of-concept system covers a few selected linguistic features, but the methodology is general and can be extended not only to other typological features but also to descriptive grammars written in languages other than English. Such a system is expected to be a useful assistance for automatic curation of typological databases which otherwise are built manually, a very labor and time consuming as well as cognitively taxing enterprise.},
	booktitle    = {12th International Conference on Recent Advances in Natural Language Processing, RANLP 2019, Varna, Bulgaria, 2-4 September 2019},
	author       = {Virk, Shafqat and Muhammad, Azam Sheikh and Borin, Lars and Aslam, Muhammad Irfan and Iqbal, Saania and Khurram, Nazia},
	year         = {2019},
	publisher    = {INCOMA Ltd.},
	address      = {Shoumen, Bulgaria},
	ISBN         = {978-954-452-055-7},
}

@inProceedings{johansson-etal-2019-lexical-284330,
	title        = {Lexical diversity and mild cognitive impairment},
	abstract     = {This paper explores the role that various lexical-based measures play for differentiating between individuals with mild forms of cognitive impairment (MCI) and healthy controls (HC). Recent research underscores the importance of language and linguistic analysis as essential components that can contribute to a variety of sensitive cognitive measures for the identification of milder forms of cognitive
impairment. Subtle language changes serve as a sign that an individual’s cognitive functions have been impacted, potentially leading to early diagnosis. Our research aims to identify linguistic biomarkers that could distinguish between individuals with MCI and HC and also be useful in predicting MCI.},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Johansson, Sofie and Lundholm Fors, Kristina and Antonsson, Malin and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = {ExLing Society},
	address      = {Athens, Greece},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{lundholmfors-etal-2019-reading-284036,
	title        = {Reading and mild cognitive impairment},
	abstract     = {In the present study, we investigated the discriminatory power of eye-tracking features in distinguishing between individuals with mild cognitive impairment (MCI) and healthy controls (HC). The eye movements of the study participants were recorded at two different time points, 18 months apart. Using a machine learning approach with leave-one-out cross-validation, we were able to discriminate between the groups with 73.6 AUC. However, somewhat surprisingly the classification was less successful using data from the second recording session, which might be attributed to the non-static nature of cognitive status. Still, the outcome suggests that eye-tracking measures can be exploited as useful markers of MCI.
},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Lundholm Fors, Kristina and Antonsson, Malin and Kokkinakis, Dimitrios and Fraser, Kathleen},
	year         = {2019},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{linz-etal-2019-temporal-279131,
	title        = {Temporal Analysis of Semantic Verbal Fluency Tasks in Persons with Subjective and Mild Cognitive Impairment.},
	abstract     = {The Semantic Verbal Fluency (SVF) task is a classical neuropsychological assessment where persons are asked to produce words belonging to a semantic category (e.g., animals) in a given time. This paper introduces a novel method of temporal analysis for SVF tasks utilizing time intervals and applies it to a corpus of elderly Swedish subjects (mild cognitive impairment, subjective cognitive impairment and healthy controls). A general decline in word count and lexical frequency over the course of the task is revealed, as well as an increase in word transition times. Persons with subjective cognitive impairment had a higher word count during the last intervals, but produced words of the same lexical frequencies. Persons with MCI had a steeper decline in both word count and lexical frequencies during the third interval. Additional correlations with neuropsychological scores suggest these findings are linked to a person’s overall vocabulary size and processing speed, respectively. Classification results improved when adding the novel features (AUC = 0.72), supporting their diagnostic value.},
	booktitle    = {Sixth Workshop on Computational Linguistics and Clinical Psychology: Reconciling Outcomes. Minneapolis, Minnesota,  USA,  June 6, 2019 / Kate Niederhoffer, Kristy Hollingshead, Philip Resnik, Rebecca Resnik, Kate Loveys (Editors)},
	author       = {Linz, Nicklas and Lundholm Fors, Kristina and Lindsay, Hali and Eckerström, Marie and Alexandersson, Jan and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = {Association for Computational Linguistics },
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-948087-95-7},
}

@inProceedings{antonsson-etal-2019-discourse-284038,
	title        = {Discourse in Mild Cognitive Impairment },
	abstract     = {This paper reports on how persons with mild cognitive impairment (MCI) perform on two types of narrative tasks compared to a group of healthy controls (HC). The first task is a widely used picture description task and the other task is a more complex discourse task. Since the latter task puts higher demands on cognitive linguistic skills, as seen in previous research, we expected this task to be more efficient in discriminating between the two groups. The results confirm this hypothesis. 
},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Antonsson, Malin and Lundholm Fors, Kristina and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = { ExLing Society},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{kokkinakis-lundholmfors-2019-"hund-279384,
	title        = {"hund, katt, ko...": Semantiskt ordflödestest som indikator på kognitiv nedsättning hos äldre.},
	abstract     = {Ordflödestest är en typ av test som ofta ingår vid språkliga och neuropsykologiska utredningar, och de används för att bedöma språkliga förmågor, så som ordmobilisering, och exekutiva funktioner, så som verbalt arbetsminne och bearbetningshastighet. Vid ett fonologiskt ordflödestest får personen i uppgift att på en begränsad tid (oftast 60 sekunder) producera så många ord som möjlighet som börjar med en viss bokstav (ofta F, A och S), medan vid ett semantiskt ordflödestest får personen istället i uppgift att producera ord som tillhör en viss kategori (t ex djur eller grönsaker). Dessa tester tar liten tid att genomföra, är lätta att administrera och ger värdefull information om kognitiva färdigheter och begränsningar.  Tidigare forskning har visat att ordflödestester har hög reliabilitet och är känsliga för kognitiva nedsättningar. Vid analys av testen mäts traditionellt enbart antalet korrekta ord som producerats, men med hjälp av digital ljudinspelning samt den utveckling som skett inom språkteknologi kan man nu göra mer detaljerade analyser och få ny information om de strategier man använder vid exempelvis ordgenereringen; nämligen klustring (produktion av en grupp relaterade ord inom den redan identifierade subkategorin) och växling (sökning efter och växling till nya subkategorier). I vår forskning studerar vi bl.a. semantiskt ordflödestest som nyanserad indikator på olika aspekter av exekutiva och språkliga förmågor hos personer med degenerativa lindriga eller milda kognitiva nedsättningar samt en kontrollgrupp med kognitivt friska individer. Studien kommer presentera detaljer av vår språkteknologiska analys, visa på de skillnader som finns mellan grupperna och de samband som eventuellt finns med andra, redan genomförda, neuropsykiatriska tester för samma population.},
	booktitle    = {Svenskans beskrivning 37, 8–10.5.2019, Åbo, Finland.},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina},
	year         = {2019},
}

@inProceedings{kokkinakis-etal-2019-multifaceted-278217,
	title        = {A Multifaceted Corpus for the Study of Cognitive Decline in a Swedish Population},
	abstract     = {A potential, early-stage diagnostic marker for neurodegenerative diseases, such as Alzheimer’s disease, is the onset of language disturbances which is often characterized by subtle word-finding difficulties, impaired spontaneous speech, slight speech hesitancy, object naming difficulties and phonemic errors. Connected speech provides valuable information in a non-invasive and easy-to-assess way for determining aspects of the severity of language impairment. Data elicitation is an established method of obtaining highly constrained samples of connected speech that allows us to study the intricate interactions between various linguistic levels and cognition. In the paper, we describe the collection and content of a corpus consisting of spontaneous Swedish speech from individuals with Mild Cognitive Impairment (MCI), with Subjective Cognitive Impairment SCI) and healthy, age-matched controls (HC). The subjects were pooled across homogeneous subgroups for age and education, a sub-cohort from the Gothenburg-MCI study. The corpus consists of high quality audio recordings (including transcriptions) of several tasks, namely:
(i)	a picture description task – the Cookie-theft picture, an ecologically valid approximation to spontaneous discourse that has been widely used to elicitate speech from speakers with different types of language and communication disorders; 
(ii)	a read aloud task (including registration of eye movements) – where participants read a text from the IREST collection twice, both on a computer screen (while eye movements are registered), and the same text on paper;
(iii)	a complex planning task – a subset of executive functioning that tests the ability to identify, organize and carry out (complex) steps and elements that are required to achieve a goal;
(iv)	a map task – a spontaneous speech production/semi-structured conversation in which the participants are encouraged to talk about a predefined, cooperative task-oriented topic;
(v)	a semantic verbal fluency task – category animals: where participants have to produce as many words as possible from a category in a given time (60 seconds). The fluency tests require an elaborate retrieval of words from conceptual (semantic) and lexical (phonetic) memory involving specific areas of the brain in a restricted timeframe. 
All samples are produced by Swedish speakers after obtaining written consent approved by the local ethics committee. Tasks (i) and (ii) have been collected twice in a diachronically apart period of 18 months between 2016 and 2018.
The corpus represents an approximation to speech in a natural setting: The material for elicitation is controlled in the sense that the speakers are given specific tasks to talk about, and they do so in front of a microphone. The corpus may serve as a basis for many linguistic and/or speech technological investigations and has being already used for various investigations of language features.},
	booktitle    = {CLARe4 : Corpora for Language and Aging Research, 27 February – 1 March 2019, Helsinki, Finland},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Fraser, Kathleen and Eckerström, Marie and Horn, Greta and Themistocleous, Charalambos},
	year         = {2019},
}

@article{fraser-etal-2019-multilingual-270713,
	title        = {Multilingual word embeddings for the assessment of narrative speech in mild cognitive impairment},
	abstract     = {We analyze the information content of narrative speech samples from individuals with mild cognitive impairment (MCI), in both English and Swedish, using a combination of supervised and unsupervised learning techniques. We extract information units using topic models trained on word embeddings in monolingual and multilingual spaces, and find that the multilingual approach leads to significantly better classification accuracies than training on the target language alone. In many cases, we find that augmenting the topic model training corpus with additional clinical data from a different language is more effective than training on additional monolingual data from healthy controls. Ultimately we are able to distinguish MCI speakers from healthy older adults with accuracies of up to 63% (English) and 72% (Swedish) on the basis of information content alone. We also compare our method against previous results measuring information content in Alzheimer's disease, and report an improvement over other topic-modeling approaches. Furthermore, our results support the hypothesis that subtle differences in language can be detected in narrative speech, even at the very early stages of cognitive decline, when scores on screening tools such as the Mini-Mental State Exam are still in the “normal” range.},
	journal      = {Computer Speech and Language},
	author       = {Fraser, Kathleen and Lundholm Fors, Kristina and Kokkinakis, Dimitrios},
	year         = {2019},
	volume       = {53},
	pages        = {121--139},
}

@inProceedings{fraser-etal-2019-multilingual-280280,
	title        = {Multilingual prediction of Alzheimer’s disease through domain adaptation and concept-based language modelling},
	abstract     = {There is growing evidence that changes in speech and language may be early markers of dementia, but much of the previous NLP work in this area has been limited by the size of the available datasets. Here, we compare several methods of domain adaptation to augment a small French dataset of picture descriptions (n = 57) with a much larger English dataset (n = 550), for the task of automatically distinguishing participants with dementia from controls. The first challenge is to identify a set of features that transfer across languages; in addition to previously used features based on information units, we introduce a new set of features to model the order in which information units are produced by dementia patients and controls. These concept-based language model features improve classification performance in both English and French separately, and the best result (AUC = 0.89) is achieved using the multilingual training set with a combination of information and language model features.},
	booktitle    = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), June 2 - June 7, 2019, Minneapolis, Minnesota /  Jill Burstein, Christy Doran, Thamar Solorio (Editors) },
	author       = {Fraser, Kathleen and Linz, Nicklas and Lundholm Fors, Kristina and Rudzicz, Frank and König, Alexandra and Alexandersson, Jan and Robert, Philippe and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-950737-13-0},
}

@misc{tahmasebi-etal-2019-proceedings-285886,
	title        = {Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change, August 2, 2019, Florence, Italy},
	author       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam and Xu, Yang},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-31-4},
}

@article{skoldberg-etal-2019-state-279701,
	title        = {State-of-the-art on monolingual lexicography for Sweden},
	abstract     = {The minireview describes the state-of-the-art of Swedish monolingual lexicography. The main actors in the field, both commercial and non-commercial, are mentioned alongside with the description of lexicographic products that have been offered by them to the lexicon users. The minireview makes it clear that there is an obvious tendency among the Swedish dictionary users to abandon paper-based dictionaries and switch over to online portals and apps, which influences the practices adopted by commercial publishing houses, such as Norstedts, Bonniers, Natur & Kultur. Among the leading non-commercial players, the Swedish Academy, the Swedish Language Bank, Institute for Language and Folklore are named. Swedish monolingual lexicography offers, however, dictionaries produced not only by experts but also by non-experts (i.e. using the efforts of the crowd).},
	journal      = {Slovenščina 2.0: Empirical, Applied and Interdisciplinary Research},
	author       = {Sköldberg, Emma and Holmer, Louise and Volodina, Elena and Pilán, Ildikó},
	year         = {2019},
	volume       = {7},
	number       = {1},
	pages        = {13--24},
}

@inProceedings{fridlund-etal-2019-trawling-287968,
	title        = {Trawling for Terrorists: A Big Data Analysis of Conceptual Meanings and Contexts in Swedish Newspapers, 1780–1926},
	abstract     = {The conceptual history of terrorism has to a significant extent been studied through canonical texts or historical key figures or organisations. However, through the increasing digitization of text materials convential research questions can now be approached from new angles or established results verified on the basis of exhaustive collections of data, rather than limited samples. Specifically, we are interested in evaluating and expanding on prior research claims regarding the meanings and con- texts associated with the concepts terrorism and terrorist up until the twentieth century in a Swedish context. The investigation is guided by the following research questions: What historical meanings of the concept of terrorism were expressed in the Swedish newspaper discourse? What social and ideological contexts and violent political practices was the concept primarily associated with before the First World War?},
	booktitle    = {Proceedings of the 5th International Workshop on Computational History (HistoInformatics 2019) co-located with the 23rd International Conference on Theory and Practice of Digital Libraries (TPDL 2019) Oslo, Norway, September 12th, 2019, Melvin Wevers, Mohammed Hasanuzzaman, Gaël Dias, Marten Düring, & Adam Jatowt, eds. },
	author       = {Fridlund, Mats and Olsson, Leif-Jöran and Brodén, Daniel and Borin, Lars},
	year         = {2019},
	publisher    = {CEUR-WS},
	address      = {Aachen},
}

@inProceedings{rouces-etal-2019-political-281307,
	title        = {Political Stance Analysis Using Swedish Parliamentary Data},
	abstract     = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR },
	address      = {Aachen },
}

@book{nietopina-2019-splitting-282680,
	title        = {Splitting rocks: Learning word sense representations from corpora and lexica},
	abstract     = {The representation of written language semantics is a central problem of language technology and a crucial component of many natural language processing applications, from part-of-speech tagging to text summarization. These representations of linguistic units, such as words or sentences, allow computer applications that work with language to process and manipulate the meaning of text. In particular, a family of models has been successfully developed based on automatically learning semantics from large collections of text and embedding them into a vector space, where semantic or lexical similarity is a function of geometric distance. Co-occurrence information of words in context is the main source of data used to learn these representations.

Such models have typically been applied to learning representations for word forms, which have been widely applied, and proven to be highly successful, as characterizations of semantics at the word level. However, a word-level approach to meaning representation implies that the different meanings, or senses, of any polysemic word share one single representation. This might be problematic when individual word senses are of interest and explicit access to their specific representations is required. For instance, in cases such as an application that needs to deal with word senses rather than word forms, or when a digital lexicon's sense inventory has to be mapped to a set of learned semantic representations.

In this thesis, we present a number of models that try to tackle this problem by automatically learning representations for word senses instead of for words. In particular, we try to achieve this by using two separate sources of information: corpora and lexica for the Swedish language. Throughout the five publications compiled in this thesis, we demonstrate that it is possible to generate word sense representations from these sources of data individually and in conjunction, and we observe that combining them yields superior results in terms of accuracy and sense inventory coverage. Furthermore, in our evaluation of the different representational models proposed here, we showcase the applicability of word sense representations both to downstream natural language processing applications and to the development of existing linguistic resources.},
	author       = {Nieto Piña, Luis},
	year         = {2019},
	publisher    = {University of Gothenburg},
	address      = {Gothenburg},
	ISBN         = {978-91-87850-75-2},
}

@inProceedings{bamutura-ljunglof-2019-towards-284293,
	title        = {Towards a resource grammar for Runyankore and Rukiga},
	abstract     = {Currently, there is a lack of computational grammar resources for many under-resourced languages which limits the ability to develop Natural Language Processing (NLP) tools and applications such as Multilingual Document Authoring, Computer-Assisted Language Learning (CALL) and Low-Coverage Machine Translation (MT) for these languages. In this paper, we present our attempt to formalise the grammar of two such languages: Runyankore and Rukiga. For this formalisation we use the Grammatical Framework (GF) and its Resource Grammar Library (GF-RGL).},
	booktitle    = {WiNLP 2019, the 3rd Workshop on Widening NLP, Florence, Italy, 28th July 2019},
	author       = {Bamutura, David and Ljunglöf, Peter},
	year         = {2019},
}

@inProceedings{alfter-etal-2019-legato-285625,
	title        = {LEGATO: A flexible lexicographic annotation tool.},
	abstract     = {This article is a report from an ongoing project aiming at analyzing lexical and grammatical competences of Swedish as a Second language (L2). To facilitate lexical analysis, we need access to metalinguistic information about relevant vocabulary that L2 learners can use and understand. The focus of the current article is on the lexical annotation of the vocabulary scope for a range of lexicographical aspects, such as morphological analysis, valency, types of multi-word units, etc. We perform parts of the analysis automatically, and other parts manually. The rationale behind this is that where there is no possibility to add information automatically, manual effort needs to be added. To facilitate the latter, a tool LEGATO has been designed, implemented and currently put to active testing.},
	booktitle    = {Linköping Electronic Conference Proceedings, No. 167, NEAL Proceedings of the 22nd Nordic Conference on Computational Linguistics (NoDaLiDa), September 30-October 2, Turku, Finland Editor(s): Mareike Hartman and Barbara Plank},
	author       = {Alfter, David and Lindström Tiedemann, Therese and Volodina, Elena},
	year         = {2019},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping university},
	ISBN         = {978-91-7929-995-8},
}

@article{kosem-etal-2019-image-275354,
	title        = {The image of the monolingual dictionary across Europe. Results of the European survey of dictionary use and culture},
	abstract     = {The article presents the results of a survey on dictionary use in Europe, focusing on general monolingual dictionaries. The survey is the broadest survey of dictionary use to date, covering close to 10,000 dictionary users (and non-users) in nearly thirty countries. Our survey covers varied user groups, going beyond the students and translators who have tended to dominate such studies thus far. The survey was delivered via an online survey platform, in language versions specific to each target country. It was completed by 9,562 respondents, over 300 respondents per country on average. The survey consisted of the general section, which was translated and presented to all participants, as well as country-specific sections for a subset of 11 countries, which were drafted by collaborators at the national level. The present report covers the general section},
	journal      = {International Journal of Lexicography},
	author       = {Kosem, Iztok and Lew, Robert and Müller-Spitzer, Carolin and Ribeiro Silveira, Maria and Wolfer, Sascha and Volodina, Elena and Pilán, Ildikó and Sköldberg, Emma and Holmer, Louise and Dorn, Amelie and Gurrutxaga, Antton and Lorentzen, Henrik and Kallas, Jelena and Abel, Andrea and Tiberius, Carole and Partners, Local},
	year         = {2019},
	volume       = {32},
	number       = {1},
	pages        = {92–114},
}

@article{fraser-etal-2019-predicting-282807,
	title        = {Predicting MCI Status From Multimodal Language Data Using Cascaded Classifiers},
	abstract     = {Recent work has indicated the potential utility of automated language analysis for the detection of mild cognitive impairment (MCI). Most studies combining language processing and machine learning for the prediction of MCI focus on a single language task; here, we consider a cascaded approach to combine data from multiple language tasks. A cohort of 26 MCI participants and 29 healthy controls completed three language tasks: picture description, reading silently, and reading aloud. Information from each task is captured through different modes (audio, text, eye-tracking, and comprehension questions). Features are extracted from each mode, and used to train a series of cascaded classifiers which output predictions at the level of features, modes, tasks, and finally at the overall session level. The best classification result is achieved through combining the data at the task level (AUC = 0.88, accuracy = 0.83). This outperforms a classifier trained on neuropsychological test scores (AUC = 0.75, accuracy = 0.65) as well as the "early fusion" approach to multimodal classification (AUC = 0.79, accuracy = 0.70). By combining the predictions from the multimodal language classifier and the neuropsychological classifier, this result can be further improved to AUC = 0.90 and accuracy = 0.84. In a correlation analysis, language classifier predictions are found to be moderately correlated (rho = 0.42) with participant scores on the Rey Auditory Verbal Learning Test (RAVLT). The cascaded approach for multimodal classification improves both system performance and interpretability. This modular architecture can be easily generalized to incorporate different types of classifiers as well as other heterogeneous sources of data (imaging, metabolic, etc.).},
	journal      = {Frontiers in Aging Neuroscience},
	author       = {Fraser, Kathleen and Lundholm Fors, Kristina and Eckerström, Marie and Öhman, Fredrik and Kokkinakis, Dimitrios},
	year         = {2019},
	volume       = {11},
	number       = {205},
}

@inProceedings{stemle-etal-2019-working-319453,
	title        = {Working together towards an ideal infrastructure for language learner corpora},
	abstract     = {In this article we provide an overview of first-hand experiences and vantage points for best practices from projects in seven European countries dedicated to learner corpus research (LCR) and the creation of language learner corpora. The corpora and tools involved in LCR are becoming more and more important, as are careful preparation and easy retrieval and reusability of corpora and tools. But the lack of commonly agreed solutions for many aspects of LCR, interoperability between learner corpora and the exchange of data from different learner corpus projects remains a challenge. We show how concepts like metadata, anonymization, error taxonomies and linguistic annotations as well as tools, toolchains and data formats can be individually challenging and how the challenges can be solved. },
	booktitle    = {Widening the Scope of Learner Corpus Research. Selected papers from the fourth Learner Corpus Research Conference. Corpora and Language in Use – Proceedings 5 / Andrea Abel, Aivars Glaznieks, Verena Lyding and Lionel Nicolas (eds.)},
	author       = {Stemle, Egon and Boyd, Adriane and Janssen, Maarten and Preradović, Nives Mikelić and Rosen, Alexandr and Rosén, Dan and Volodina, Elena},
	year         = {2019},
	publisher    = {PUL, Presses Universitaires de Louvain},
	address      = {Louvain-la-Neuve },
	ISBN         = {978-2-87558-868-5},
}

@article{sandberg-etal-2019-issue-285614,
	title        = {Issue Salience on Twitter During Swedish Party Leaders’ Debates },
	abstract     = {The objective of this study is to contribute knowledge about formation of political agendas on Twitter during mediated political events, using the party leaders’ debates in Sweden before the general election of 2014 as a case study. Our findings show that issues brought up during the debates were largely mirrored on Twitter, with one striking discrepancy. Contrary to our expectations, issues on the left-right policy dimension were more salient on Twitter than in the debates, whereas issues such as the environment, immigration and refugees, all tied to a liberal-authoritarian value axis, were less salient on Twitter.},
	journal      = {Nordicom Review},
	author       = {Sandberg, Linn and Bjereld, Ulf and Bunyik, Karina and Forsberg, Markus and Johansson, Richard},
	year         = {2019},
	volume       = {40},
	number       = {2},
	pages        = {49--61},
}

@techreport{ljunglof-etal-2019-assessing-281222,
	title        = {Assessing the quality of Språkbanken’s annotations},
	abstract     = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser.
Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. },
	author       = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars},
	year         = {2019},
}

@inProceedings{alfter-volodina-2019-from-285728,
	title        = {From river to bank: The importance of sense-based graded word lists},
	booktitle    = { EUROCALL 2019 - CALL and Complexity, Book of Abstracts, Louvain-la-Neuve, Belgium, 28-31 August 2019},
	author       = {Alfter, David and Volodina, Elena},
	year         = {2019},
}

@article{agebjorn-alfter-2019-review-281196,
	title        = {Review of Advanced Proficiency and Exceptional Ability in Second Languages},
	journal      = {Linguist List},
	author       = {Agebjörn, Anders and Alfter, David},
	year         = {2019},
	number       = { Jan 16},
}

@inProceedings{dubossarsky-etal-2019-time-281304,
	title        = {Time-Out: Temporal Referencing for Robust Modeling of Lexical Semantic Change },
	abstract     = {State-of-the-art models of lexical semantic change detection suffer from noise stemming from vector space alignment. We have empirically tested the Temporal Referencing method for lexical semantic change and show that, by avoiding alignment, it is less affected by this noise. We show that, trained on a diachronic corpus, the skip-gram with negative sampling architecture with temporal referencing outperforms alignment models on a synthetic task as well as a manual testset. We introduce a principled way to simulate lexical semantic change and systematically control for possible biases. },
	booktitle    = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, Florence, Italy, July 28 - August 2, 2019 / Anna Korhonen, David Traum, Lluís Màrquez (Editors)},
	author       = {Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-48-2},
}

@article{tahmasebi-hengchen-2019-strengths-291189,
	title        = {The Strengths and Pitfalls of Large-Scale Text Mining for Literary Studies},
	abstract     = {This paper is an overview of the opportunities and challenges of using large-scale text mining to answer research questions that stem from the humanities in general and literature specifically.  In  this  paper,  we  will  discuss  a  data-intensive  research  methodology  and  how  different  views of digital text affect answers to research questions. We will discuss results derived from text mining, how these results can be evaluated, and their relation to hypotheses and research questions. Finally, we will discuss some pitfalls of computational literary analysis and give some pointers as to how these can be avoided.},
	journal      = {Samlaren : tidskrift för svensk litteraturvetenskaplig forskning},
	author       = {Tahmasebi, Nina and Hengchen, Simon},
	year         = {2019},
	volume       = {140},
	pages        = {198–227},
}

@inProceedings{alfter-graen-2019-interconnecting-285731,
	title        = {Interconnecting lexical resources and word alignment: How do learners get on with particle verbs?},
	abstract     = {In this paper, we present a prototype for an online exercise aimed at learners of English and Swedish that serves multiple purposes. The exercise allows learners of the aforementioned languages to train their knowledge of particle verbs receiving clues from the exercise application. The user themselves decide which clue to receive and pay in virtual currency for each, which provides us with valuable information about the utility of the clues that we provide as well as the learners willingness to trade virtual currency versus accuracy of their choice. As resources, we use list with annotated levels from the proficiency scale defined by the Common European Framework of Reference (CEFR) and a multilingual corpus with syntactic dependency relations and word annotation for all language pairs. From the latter resource, we extract translation equivalents for particle verb construction together with a list of parallel corpus examples that can be used as clues in the exercise.},
	booktitle    = {Linköping Electronic Conference Proceeding, No. 167, NEAL Proceedings of the 22nd Nordic Conference on Computational Linguistics (NoDaLiDa), September 30-October 2, Turku, Finland / Editor(s): Mareike Hartman and Barbara Plank},
	author       = {Alfter, David and Graën, Johannes},
	year         = {2019},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping university},
	ISBN         = {978-91-7929-995-8},
}
Sidansvarig: sb-webb