Hoppa till huvudinnehåll

BibTeX

@inProceedings{dubossarsky-etal-2019-time-281304,
	title        = {Time-Out: Temporal Referencing for Robust Modeling of Lexical Semantic Change },
	abstract     = {State-of-the-art models of lexical semantic change detection suffer from noise stemming from vector space alignment. We have empirically tested the Temporal Referencing method for lexical semantic change and show that, by avoiding alignment, it is less affected by this noise. We show that, trained on a diachronic corpus, the skip-gram with negative sampling architecture with temporal referencing outperforms alignment models on a synthetic task as well as a manual testset. We introduce a principled way to simulate lexical semantic change and systematically control for possible biases. },
	booktitle    = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, Florence, Italy, July 28 - August 2, 2019 / Anna Korhonen, David Traum, Lluís Màrquez (Editors)},
	author       = { Dubossarsky, Haim and Hengchen, Simon and Tahmasebi, Nina and Schlechtweg, Dominik },
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-48-2},
}

@inProceedings{abualhajia-etal-2017-parameter-256642,
	title        = {Parameter Transfer across Domains for Word Sense Disambiguation},
	abstract     = {Word  sense  disambiguation  is  defined  as finding the corresponding sense for a target word in a given context,  which comprises  a  major  step  in  text  applications. Recently, it has been addressed as an optimization problem.  The idea behind is to find a sequence of senses that corresponds
to the words in a given context with a maximum semantic similarity.  Metaheuristics like simulated annealing and D-Bees provide approximate good-enough solutions, but are usually influenced by the starting parameters. In this paper, we study the parameter tuning for both algorithms within the  word  sense  disambiguation  problem. The experiments are conducted on different datasets to cover different disambiguation scenarios. We show that D-Bees is robust and less sensitive towards the initial parameters compared to simulated annealing,  hence,  it is sufficient to tune the parameters once and reuse them for different datasets, domains or languages.},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing Meet Deep Learning, Varna, Bulgaria 2–8 September 2017 / Edited by Galia Angelova, Kalina Bontcheva, Ruslan Mitkov, Ivelina  Nikolova, Irina Temnikova  },
	author       = {Abualhajia, Sallam and Tahmasebi, Nina and Forin, Diane  and Zimmermann, Karl-Heinz},
	year         = {2017},
	ISBN         = { 978-954-452-048-9},
}

@article{adesam-etal-2016-sprakteknologi-237884,
	title        = {Språkteknologi för svenska språket genom tiderna},
	abstract     = {Språkbanken, the Swedish Language Bank, is a language technology research unit at the Department of Swedish, University of Gothenburg. We develop language resources – such as corpora, lexical resources, and analytical tools – for all variants of Swedish, from Old Swedish laws to present-day social media. Historical texts offer exciting theoretical and methodological challenges for language technology because they often defy the assumption inherent in most automatic analysis tools that the texts contain a standardized written language. In this article, we describe our ongoing work on the development of annotated historical corpora, as well as our efforts on linking various resources (both corpora and lexical resources).
This research advances the state of the art of language technology as well as enables new research for scholars in other disciplines.},
	journal      = {Kungliga Skytteanska Samfundets Handlingar},
	author       = {Adesam, Yvonne and Ahlberg, Malin and Andersson, Peter and Borin, Lars and Bouma, Gerlof and Forsberg, Markus},
	year         = {2016},
	volume       = {76},
	number       = {Studier i svensk språkhistoria 13},
	pages        = {65--87},
}

@inProceedings{adesam-etal-2014-computer-198794,
	title        = {Computer-aided Morphology Expansion for Old Swedish},
	abstract     = {In this paper we describe and evaluate a tool for paradigm induction and lexicon extraction that has been applied to Old Swedish. The tool is semi-supervised and uses a small seed lexicon and unannotated corpora to derive full inflection tables for input lemmata. In the work presented here, the tool has been modified to deal with the rich spelling variation found in Old Swedish texts. We also present some initial experiments, which are the first steps towards creating a large-scale morphology for Old Swedish.},
	booktitle    = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14) May 26-31, 2014 Reykjavik, Iceland  },
	author       = {Adesam, Yvonne and Ahlberg, Malin and Andersson, Peter and Bouma, Gerlof and Forsberg, Markus and Hulden, Mans},
	year         = {2014},
	ISBN         = { 978-2-9517408-8-4},
	pages        = {1102--1105},
}

@inProceedings{adesam-etal-2012-processing-166657,
	title        = {Processing spelling variation in historical text},
	booktitle    = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)},
	author       = {Adesam, Yvonne and Ahlberg, Malin and Bouma, Gerlof},
	year         = {2012},
}

@inProceedings{adesam-etal-2012-bokstaffua-163218,
	title        = {bokstaffua, bokstaffwa, bokstafwa, bokstaua, bokstawa... Towards lexical link-up for a corpus of Old Swedish},
	booktitle    = {Proceedings of the LTHist workshop at Konvens},
	author       = {Adesam, Yvonne and Ahlberg, Malin and Bouma, Gerlof},
	year         = {2012},
}

@inProceedings{adesam-etal-2018-fsvreader-267311,
	title        = {FSvReader – Exploring Old Swedish Cultural Heritage Texts},
	abstract     = {This paper describes FSvReader, a tool for easier access to Old Swedish (13th–16th century) texts. Through automatic fuzzy linking of words in a text to a dictionary describing the language of the time, the reader has direct access to dictionary pop-up definitions, in spite of the large amount of  morphological and spelling variation. The linked dictionary entries can also be used for simple searches in the text, highlighting possible further instances of the same entry. },
	booktitle    = {CEUR Workshop Proceedings, vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018. Edited by Eetu, Mäkelä Mikko, Tolonen Jouni Tuominen},
	author       = {Adesam, Yvonne and Ahlberg, Malin and Bouma, Gerlof},
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@inProceedings{adesam-etal-2014-koala-211376,
	title        = {Koala – Korp’s Linguistic Annotations Developing an infrastructure for text-based research with high-quality annotations},
	booktitle    = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014},
	author       = {Adesam, Yvonne and Borin, Lars and Bouma, Gerlof and Forsberg, Markus and Johansson, Richard},
	year         = {2014},
}

@inProceedings{adesam-bouma-2016-swedish-251827,
	title        = {Old Swedish Part-of-Speech Tagging between Variation and External Knowledge},
	booktitle    = {Proceedings of the 10th SIGHUM Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities, Berlin, Germany, August 11, 2016},
	author       = {Adesam, Yvonne and Bouma, Gerlof},
	year         = {2016},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-945626-09-8},
}

@article{adesam-bouma-2019-koala-288026,
	title        = {The Koala Part-of-Speech Tagset},
	abstract     = {We present the Koala part-of-speech tagset for written Swedish. The categorization takes the Swedish Academy Grammar (SAG) as its main starting point, to fit with the current descriptive view on Swedish grammar. We argue that neither SAG, as is, nor any of the existing part-of-speech tagsets meet our requirements for a broadly applicable categorization. Our proposal is outlined and compared to the other descriptions, and motivations for both the tagset as a whole as well as decisions about individual tags are discussed.},
	journal      = {Northern European Journal of Language Technology},
	author       = {Adesam, Yvonne and Bouma, Gerlof},
	year         = {2019},
	volume       = {6},
	pages        = {5--41},
}

@inProceedings{adesam-etal-2015-multiwords-228833,
	title        = {Multiwords, Word Senses and Multiword Senses in the Eukalyptus Treebank of Written Swedish},
	abstract     = {Multiwords reside at the intersection of the lexicon and syntax and in an annotation project, they will affect both levels.  In the Eukalyptus treebank of written Swedish, we treat multiwords formally as syntactic objects, which are assigned a lexical type and sense. With the help of a simple dichotomy, analyzed vs unanalyzed multiwords, and the expressiveness of the syntactic annotation formalism employed, we are able to flexibly handle most multiword types and usages.},
	booktitle    = {Proceedings of the Fourteenth International Workshop on Treebanks and Linguistic Theories (TLT14), 11–12 December 2015 Warsaw, Poland},
	author       = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard},
	year         = {2015},
	ISBN         = {978-83-63159-18-4},
	pages        = {3--12},
}

@inProceedings{adesam-etal-2015-defining-217815,
	title        = {Defining the Eukalyptus forest – the Koala treebank of Swedish},
	abstract     = {This paper details the design of the lexical and syntactic layers of a new annotated corpus of Swedish contemporary texts. In order to make the corpus adaptable into a variety of representations, the annotation is of a hybrid type with head-marked constituents and function-labeled edges, and with a rich annotation of non-local dependencies. The source material has been taken from public sources, to allow the resulting corpus to be made freely available.},
	booktitle    = {Proceedings of the 20th Nordic Conference of Computational Linguistics, NODALIDA 2015, May 11-13, 2015, Vilnius, Lithuania. Edited by Beáta Megyesi},
	author       = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard},
	year         = {2015},
	ISBN         = {978-91-7519-098-3},
	pages        = {1--9},
}

@inProceedings{adesam-etal-2018-koala-273841,
	title        = {The Koala Part-of-Speech and Morphological Tagset for Swedish},
	booktitle    = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7-9 November, 2018},
	author       = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard},
	year         = {2018},
}

@inProceedings{adesam-etal-2018-eukalyptus-273839,
	title        = {The Eukalyptus Treebank of Written Swedish},
	booktitle    = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018},
	author       = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus},
	year         = {2018},
}

@inProceedings{adesam-etal-2018-exploring-273835,
	title        = {Exploring the Quality of the Digital Historical Newspaper Archive KubHist},
	booktitle    = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7-9 November 2018},
	author       = {Adesam, Yvonne and Dannélls, Dana and Tahmasebi, Nina},
	year         = {2018},
}

@inProceedings{adesam-etal-2019-exploring-279948,
	title        = {Exploring the Quality of the Digital Historical Newspaper Archive KubHist},
	abstract     = {The KubHist Corpus is a massive corpus of Swedish historical newspapers, digitized by the Royal Swedish library, and available through the Språkbanken corpus infrastructure Korp. This paper contains a first overview of the KubHist corpus, exploring some of the difficulties with the data, such as OCR errors and spelling variation, and discussing possible paths for improving the quality and the searchability.},
	booktitle    = {Proceedings of the 4th Conference of The Association Digital Humanities in the Nordic Countries (DHN), Copenhagen, Denmark, March 5-8, 2019},
	editor       = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard},
	author       = {Adesam, Yvonne and Dannélls, Dana and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen},
}

@inProceedings{adouane-johansson-2016-gulf-242243,
	title        = {Gulf Arabic Resource Building for Sentiment Analysis},
	abstract     = {This paper deals with building linguistic resources for Gulf Arabic, one of the Arabic variations, for sentiment analysis task using machine learning. To our knowledge, no previous works were done for Gulf Arabic sentiment analysis despite the fact that it is present in different online platforms. Hence, the first challenge is the absence of annotated data and sentiment lexicons. To fill this gap, we created these two main linguistic resources. Then we conducted different experiments: use Naive Bayes classifier without any lexicon; add a sentiment lexicon designed basically for MSA; use only the compiled Gulf Arabic sentiment lexicon and finally use both MSA and Gulf Arabic sentiment lexicons. The Gulf Arabic lexicon gives a good improvement of the classifier accuracy (90.54 %) over a baseline that does not use the lexicon (82.81%), while the MSA lexicon causes the accuracy to drop to (76.83%). Moreover, mixing MSA and Gulf Arabic lexicons causes the accuracy to drop to (84.94%) compared to using only Gulf Arabic lexicon. This indicates that it is useless to use MSA resources to deal with Gulf Arabic due to the considerable differences and conflicting structures between these two languages.},
	booktitle    = {Proceedings of the Language Resources and Evaluation Conference (LREC), 23-28 May 2016, Portorož, Slovenia},
	author       = {Adouane, Wafia and Johansson, Richard},
	year         = {2016},
	publisher    = {European Language Resources Association},
	ISBN         = {978-2-9517408-9-1},
}

@inProceedings{adouane-etal-2016-romanized-255457,
	title        = {Romanized Arabic and Berber Detection Using Prediction by Partial Matching and Dictionary Methods},
	abstract     = {Arabic is one of the Semitic languages written in Arabic script in its standard form. However, the recent rise of social media and new technologies has contributed considerably to the emergence of a new form of Arabic, namely Arabic written in Latin scripts, often called Romanized Arabic or Arabizi. While Romanized Arabic is an informal language, Berber or Tamazight uses Latin script in its standard form with some orthography differences depending on the country it is used in. Both these languages are under-resourced and unknown to the state-of-theart language identifiers. In this paper, we present a language automatic identifier for both Romanized Arabic and Romanized Berber. We also describe the built linguistic resources (large dataset and lexicons) including a wide range of Arabic dialects (Algerian, Egyptian, Gulf, Iraqi, Levantine, Moroccan and Tunisian dialects) as well as the most popular Berber varieties (Kabyle, Tashelhit, Tarifit, Tachawit and Tamzabit). We use the Prediction by Partial Matching (PPM) and dictionary-based methods. The methods reach a macro-average F-Measure of 98.74% and 97.60% respectively.},
	booktitle    = {2016 IEEE/ACS 13TH INTERNATIONAL CONFERENCE OF COMPUTER SYSTEMS AND APPLICATIONS (AICCSA)},
	author       = {Adouane, Wafia and Semmar, N. and Johansson, Richard},
	year         = {2016},
	ISBN         = {978-1-5090-4320-0},
}

@inProceedings{adouane-etal-2016-arabicized-252492,
	title        = {Arabicized and Romanized Berber Automatic Identification},
	abstract     = {We present an automatic language identification tool for both Arabicized Berber (Berber written in the Arabic script) and Romanized Berber (Berber written in the Latin script). The focus is on short texts (social media content). We use supervised machine learning method with character and word-based n-gram models as features. We also describe the corpora used in this paper. For both Arabicized and Romanized Berber, character-based 5-grams score the best giving an F-score of 99.50%.},
	booktitle    = {Proceedings of TICAM 2016},
	author       = {Adouane, Wafia and Semmar, Nasredine  and Johansson, Richard},
	year         = {2016},
	publisher    = {IRCAM},
	address      = {Morocco},
}

@inProceedings{adouane-etal-2016-romanized-246849,
	title        = {Romanized Berber and Romanized Arabic Automatic Language Identification Using Machine Learning},
	abstract     = {The identification of the language of text/speech input is the first step to be able to properly do any language-dependent natural language processing.  The task is called Automatic Language Identification (ALI). Being a well-studied field since early 1960’s, various methods have been applied to many standard languages. The ALI standard methods require datasets for training and use character/word-based n-gram models.   However,  social media and new technologies have contributed to the rise of informal and minority languages on the Web. The state-of-the-art automatic language identifiers fail to properly identify many of them.  Romanized Arabic (RA) and Romanized Berber (RB) are cases of these informal languages which are under-resourced.  The goal of this paper is twofold: detect RA and RB, at a document level, as separate languages and distinguish between them as they coexist in North Africa. We consider the task as a classification problem and use supervised machine learning to solve it.  For both languages, character-based 5-grams combined with additional lexicons score the best, F-score of 99.75% and 97.77% for RB and RA respectively.},
	booktitle    = {Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects; 53–61; December 12, 2016 ; Osaka, Japan},
	author       = {Adouane, Wafia and Semmar, Nasredine and Johansson, Richard},
	year         = {2016},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{adouane-etal-2016-asirem-246853,
	title        = {ASIREM Participation at the Discriminating Similar Languages Shared Task 2016},
	booktitle    = {Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects; 163–169; December 12; Osaka, Japan},
	author       = {Adouane, Wafia and Semmar, Nasredine and Johansson, Richard},
	year         = {2016},
}

@inProceedings{adouane-etal-2016-automatic-246765,
	title        = {Automatic Detection of Arabicized Berber and Arabic Varieties},
	abstract     = {Automatic Language Identification (ALI) is the detection of the natural language of an input text by a machine. It is the first necessary step to do any language-dependent natural language processing task. Various methods have been successfully applied to a wide range of languages, and the state-of-the-art automatic language identifiers are mainly based on character n-gram models trained on huge corpora. However, there are many languages which are not yet automatically processed, for instance minority and informal languages. Many of these languages are only spoken and do not exist in a written format. Social media platforms and new technologies have facilitated the emergence of written format for these spoken languages based on pronunciation. The latter are not well represented on the Web, commonly referred to as under-resourced languages, and the current available ALI tools fail to properly recognize them. In this paper, we revisit the problem of ALI with the focus on Arabicized Berber and dialectal Arabic short texts. We introduce new resources and evaluate the existing methods. The results show that machine learning models combined with lexicons are well suited for detecting Arabicized Berber and different Arabic varieties and distinguishing between them, giving a macro-average F-score of 92.94%.},
	booktitle    = {Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects; 63–72; December 12; Osaka, Japan},
	author       = {Adouane, Wafia and Semmar, Nasredine and Johansson, Richard and Bobicev, Victoria},
	year         = {2016},
}

@article{agebjorn-alfter-2019-review-281196,
	title        = {Review of Advanced Proficiency and Exceptional Ability in Second Languages},
	journal      = {Linguist List},
	author       = {Agebjörn, Anders and Alfter, David},
	year         = {2019},
	number       = { Jan 16},
}

@inProceedings{agfjord-etal-2014-grammar-208776,
	title        = {Grammar-based Suggestion Engine with Keyword Search.},
	booktitle    = {The Fifth Swedish Language Technology Conference},
	author       = {Agfjord, Martin and Angelov, Krasimir and Fredelius, Per and Marinov, Svetoslav},
	year         = {2014},
}

@inProceedings{ahlberg-etal-2015-case-217988,
	title        = {A case study on supervised classification of Swedish pseudo-coordination},
	abstract     = {We present a case study on supervised classification of Swedish pseudo-coordination (SPC). The classification is attempted on the type-level with data collected from two data sets: a blog corpus and a fiction corpus. Two small experiments were designed to evaluate the feasability of this task. The first experiment explored a classifier’s ability to discriminate pseudo-coordinations from ordinary verb coordinations, given a small labeled data set created during the experiment. The second experiment evaluated how well the classifier performed at detecting and ranking SPCs in a set of unlabeled verb coordinations, to investigate if it could be used as a semi-automatic discovery procedure to find new SPCs.},
	booktitle    = {Proceedings of the 20th Nordic Conference of Computational Linguistics, NODALIDA 2015, May 11-13, 2015, Vilnius, Lithuania},
	author       = {Ahlberg, Malin and Andersson, Peter and Forsberg, Markus and Tahmasebi, Nina},
	year         = {2015},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7519-098-3},
}

@inProceedings{ahlberg-etal-2014-swedish-210083,
	title        = {Swedish FrameNet++ The Beginning of the End and the End of the Beginning},
	booktitle    = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014},
	author       = {Ahlberg, Malin and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Friberg Heppin, Karin and Johansson, Richard and Kokkinakis, Dimitrios and Olsson, Leif-Jöran and Uppström, Jonatan},
	year         = {2014},
}

@inProceedings{ahlberg-etal-2013-korp-178355,
	title        = {Korp and Karp – a bestiary of language resources: the research infrastructure of Språkbanken},
	abstract     = {A central activity in Språkbanken, an R&D unit at the University of Gothenburg, is the systematic construction of a research infrastructure based on interoperability and widely accepted standards for metadata and data. The two main components of this infrastructure deal with text corpora and with lexical resources. For modularity and flexibility, both components have a backend, or server-side part, accessed through an API made up of a set of well-defined web services. This means that there can be any number of different user interfaces to these components, corresponding, e.g., to different research needs. Here, we will demonstrate the standard corpus and lexicon search interfaces, designed primarily for linguistic searches: Korp and Karp.},
	booktitle    = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013), May 22–24, 2013, Oslo University, Norway. NEALT Proceedings Series 16},
	author       = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Hammarstedt, Martin and Olsson, Leif-Jöran and Olsson, Olof and Roxendal, Johan and Uppström, Jonatan},
	year         = {2013},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
}

@inProceedings{ahlberg-etal-2016-sprakbanken's-246063,
	title        = {Språkbanken’s Open Lexical Infrastructure},
	abstract     = {Karp is an open lexical infrastructure and a web based tool for searching, exploring and developing lexical resources. Språkbanken currently hosts a number of lexicons in Karp and on-going work aims at broadening the type of resources that can be developed in the system. This abstract gives a short overview of Karp's basic functionality, and describes some current projects and on-going work.},
	booktitle    = {SLTC 2016. The Sixth Swedish Language Technology Conference. Umeå University, 17-18 November, 2016},
	author       = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Olsson, Olof and Schumacher, Anne and Uppström, Jonatan},
	year         = {2016},
}

@inProceedings{ahlberg-etal-2016-karp-246072,
	title        = {Karp: Språkbanken’s Open Lexical Infrastructure},
	booktitle    = {Globalex 2016, May 24, Portorož, Slovenia},
	author       = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Olsson, Olof and Schumacher, Anne and Uppström, Jonatan},
	year         = {2016},
}

@inProceedings{ahlberg-bouma-2012-best-172769,
	title        = { A best-first anagram hashing filter for approximate string matching with generalized edit distance},
	abstract     = {This paper presents an efficient method for approximate string matching against a lexicon. We
define a filter that for each source word selects a small set of target lexical entries, from which
the best match is then selected using generalized edit distance, where edit operations can be
assigned an arbitrary weight. The filter combines a specialized hash function with best-first
search. Our work extends and improves upon a previously proposed hash-based filter, developed
for matching with uniform-weight edit distance. We evaluate an approximate matching system
implemented with the new best-first filter, by conducting several experiments on a historical
corpus and a set of weighted rules taken from the literature. We present running times and
discuss how performance varies using different stopping criteria and target lexica. The results
show that the filter is suitable for large rule sets and million word corpora, and encourage
further development.
},
	booktitle    = {24th International Conference on Computational Linguistics COLING,  8-15 December 2012, Mumbai, India. Proceedings},
	author       = {Ahlberg, Malin and Bouma, Gerlof},
	year         = {2012},
}

@inProceedings{ahlberg-enache-2012-type-166722,
	title        = {A Type-Theoretical Wide-Coverage Computational Grammar for Swedish},
	booktitle    = {Proceedings of the 15th International Conference, TSD(Text, Speech and Dialogue) 2012, Brno, Czech Republic, September 3-7, 2012,LNCS series "Text, Speech and Dialogue"},
	author       = {Ahlberg, Malin and Enache, Ramona},
	year         = {2012},
	volume       = {7499},
	ISBN         = {978-3-642-32790-2},
	pages        = {183--190},
}

@inProceedings{ahlberg-etal-2014-semi-198791,
	title        = {Semi-supervised learning of morphological paradigms and lexicons},
	abstract     = {We present a semi-supervised approach to the problem of paradigm induction from inflection tables.  
Our system extracts generalizations from inflection tables, representing the resulting paradigms in 
an abstract form.  The process is intended to be language-independent, and to provide human-readable
 generalizations of paradigms.  The tools we provide can be used by linguists for the rapid creation
 of lexical resources.  We evaluate the system through an inflection table reconstruction task using
 Wiktionary data for German, Spanish, and Finnish. With no additional corpus information available, 
the evaluation yields per word form accuracy scores on inflecting unseen base forms in different lan
guages ranging from 87.81% (German nouns) to 99.52% (Spanish verbs); with additional unlabeled tex
t corpora available for training the scores range from 91.81% (German nouns) to 99.58% (Spanish verbs).  We separately evaluate the system in a simulated task of Swedish lexicon creation, and show that on the basis of a small number of inflection tables, the system can accurately collect from a list of noun forms a lexicon with inflection information ranging from 100.0% correct (collect 100 words), to 96.4% correct (collect 1000 words).},
	booktitle    = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, Gothenburg, Sweden 26–30 April 2014 },
	author       = {Ahlberg, Malin and Forsberg, Markus and Hulden, Mans},
	year         = {2014},
	ISBN         = {978-1-937284-78-7},
	pages        = {569--578},
}

@inProceedings{ahlberg-etal-2015-paradigm-217987,
	title        = {Paradigm classification in supervised learning of morphology},
	abstract     = {Supervised morphological paradigm learning by identifying and aligning the longest common subsequence found in inflection tables has recently been proposed as a simple yet competitive way to induce morphological patterns. We combine this non-probabilistic strategy of inflection table generalization with a discriminative classifier to permit the reconstruction of complete inflection tables of unseen words. Our system learns morphological paradigms from labeled examples of inflection patterns (inflection tables) and then produces inflection tables from unseen lemmas or base forms. We evaluate the approach on datasets covering 11 different languages and show that this approach results in consistently higher accuracies vis-a-vis other methods on the same task, thus indicating that the general method is a viable approach to quickly creating high-accuracy morphological resources.},
	booktitle    = {Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
	author       = {Ahlberg, Malin and Forsberg, Markus and Huldén, Måns},
	year         = {2015},
}

@inProceedings{alfter-2016-learning-241664,
	title        = {Learning the Learner: User Modeling in Intelligent Computer Assisted Language Learning Systems},
	booktitle    = {CEUR Workshop Proceedings, v.1618. UMAP 2016 Extended Proceedings. Halifax, Canada, July 13-16, 2016. Edited by : Federica Cena, Michel Desmarais, Darina Dicheva, Jie Zhang},
	author       = {Alfter, David},
	year         = {2016},
}

@article{alfter-agebjorn-2017-review-253359,
	title        = {Review of Developing, Modelling and Assessing Second Languages},
	journal      = {Linguistlist},
	author       = {Alfter, David and Agebjörn, Anders},
	year         = {2017},
}

@inProceedings{alfter-bizzoni-2016-hybrid-246348,
	title        = {Hybrid Language Segmentation for Historical Documents},
	booktitle    = {Proceedings CLiC-it 2016 and EVALITA 2016, Napoli, Italy, December 5-7, 2016. Edited by : Pierpaolo Basile, Anna Corazza, Franco Cutugno, Simonetta Montemagni, Malvina Nissim, Viviana Patti, Giovanni Semeraro, Rachele Sprugnoli},
	author       = {Alfter, David and Bizzoni, Yuri},
	year         = {2016},
}

@inProceedings{alfter-etal-2016-from-246345,
	title        = {From Distributions to Labels: A Lexical Proficiency Analysis using Learner Corpora},
	abstract     = {In this work we look at how information from second language learner essay corpora can be used for the evaluation of unseen learner essays. Using a corpus of learner essays which have been graded by well-trained human assessors using the CEFR scale, we extract a list of word distributions over CEFR levels. For the analysis of unseen essays, we want to map each word to a so-called target CEFR level using this word list. However, the task of mapping from a distribution to a single label is not trivial. We are also investigating how we can evaluate the mapping from distribution to label. We show that the distributional profile of words from the essays, informed with the essays’ levels, consistently overlaps with our frequency-based method, in the sense that words holding the same level of proficiency as predicted by our mapping tend to cluster together in a semantic space. In the absence of a gold standard, this information can be useful to see how often a word is associated with the same level in two different models. Also, in this case we have a similarity measure that can show which words are more central to a given level and which words are more peripheral.
},
	booktitle    = {Linköping Electronic Conference Proceedings},
	author       = {Alfter, David and Bizzoni, Yuri and Agebjörn, Anders and Volodina, Elena and Pilán, Ildikó},
	year         = {2016},
	publisher    = {Linköping University Electronic Press},
	ISBN         = {978-91-7685-633-8},
}

@inProceedings{alfter-etal-2018-from-275364,
	title        = {From Language Learning Platform to Infrastructure for Research on Language Learning},
	abstract     = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.},
	booktitle    = {Proceedings of CLARIN-2018 conference, Pisa, Italy},
	author       = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann,  Therese  and Volodina, Elena},
	year         = {2018},
}

@inProceedings{alfter-etal-2019-larka-281344,
	title        = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning},
	abstract     = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.},
	booktitle    = {Linköping Electronic Conference Proceedings},
	author       = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena},
	year         = {2019},
	publisher    = {Linköping University Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-034-3},
}

@inProceedings{alfter-graen-2019-interconnecting-285731,
	title        = {Interconnecting lexical resources and word alignment: How do learners get on with particle verbs?},
	abstract     = {In this paper, we present a prototype for an online exercise aimed at learners of English and Swedish that serves multiple purposes. The exercise allows learners of the aforementioned languages to train their knowledge of particle verbs receiving clues from the exercise application. The user themselves decide which clue to receive and pay in virtual currency for each, which provides us with valuable information about the utility of the clues that we provide as well as the learners willingness to trade virtual currency versus accuracy of their choice. As resources, we use list with annotated levels from the proficiency scale defined by the Common European Framework of Reference (CEFR) and a multilingual corpus with syntactic dependency relations and word annotation for all language pairs. From the latter resource, we extract translation equivalents for particle verb construction together with a list of parallel corpus examples that can be used as clues in the exercise.},
	booktitle    = {Linköping Electronic Conference Proceeding, No. 167, NEAL Proceedings of the 22nd Nordic Conference on Computational Linguistics (NoDaLiDa), September 30-October 2, Turku, Finland / Editor(s): Mareike Hartman and Barbara Plank},
	author       = {Alfter, David and Graën, Johannes},
	year         = {2019},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping university},
	ISBN         = {978-91-7929-995-8},
}

@inProceedings{alfter-etal-2019-legato-285625,
	title        = {LEGATO: A flexible lexicographic annotation tool.},
	abstract     = {This article is a report from an ongoing project aiming at analyzing lexical and grammatical competences of Swedish as a Second language (L2). To facilitate lexical analysis, we need access to metalinguistic information about relevant vocabulary that L2 learners can use and understand. The focus of the current article is on the lexical annotation of the vocabulary scope for a range of lexicographical aspects, such as morphological analysis, valency, types of multi-word units, etc. We perform parts of the analysis automatically, and other parts manually. The rationale behind this is that where there is no possibility to add information automatically, manual effort needs to be added. To facilitate the latter, a tool LEGATO has been designed, implemented and currently put to active testing.},
	booktitle    = {Linköping Electronic Conference Proceedings, No. 167, NEAL Proceedings of the 22nd Nordic Conference on Computational Linguistics (NoDaLiDa), September 30-October 2, Turku, Finland Editor(s): Mareike Hartman and Barbara Plank},
	author       = {Alfter, David and Lindström Tiedemann, Therese and Volodina, Elena},
	year         = {2019},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping university},
	ISBN         = {978-91-7929-995-8},
}

@inProceedings{alfter-pilan-2018-complex-276407,
	title        = {SB@ GU at the Complex Word Identification 2018 Shared Task},
	booktitle    = {Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications, New Orleans, Louisiana, June 5, 2018},
	author       = {Alfter, David and Pilán, Ildikó},
	year         = {2018},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA, USA},
	ISBN         = {978-1-948087-11-7},
}

@inProceedings{alfter-volodina-2016-modeling-246347,
	title        = {Modeling Individual Learner Knowledge in a Computer Assisted Language Learning System},
	booktitle    = {Proceedings of the Sixth Swedish Language Technology Conference. Umeå University, 17-18 November, 2016},
	author       = {Alfter, David and Volodina, Elena},
	year         = {2016},
}

@inProceedings{alfter-volodina-2018-whole-275362,
	title        = {Is the whole greater than the sum of its parts? A corpus-based pilot study of the lexical complexity in multi-word expressions.},
	abstract     = {Multi-word expressions (MWE) are assumed to be good predictors of language learner proficiency, however, there are no methods to establish at which level which MWEs can be assumed to be known. In this study we look at whether the target (proficiency) level of MWEs can be calculated based on the known level of its constituents.},
	booktitle    = {Proceedings of SLTC 2018, Stockholm, October 7-9, 2018},
	author       = {Alfter, David and Volodina, Elena},
	year         = {2018},
}

@inProceedings{alfter-volodina-2018-towards-275368,
	title        = {Towards Single Word Lexical Complexity Prediction.},
	abstract     = {In this paper we present work-in-progress where we investigate the usefulness of previously created word lists to the task of single-word lexical complexity analysis and prediction of the complexity level for learners of Swedish as a second language. The word lists used map each word to a single CEFR level, and the task consists of predicting CEFR levels for unseen words. In contrast to previous work on word-level lexical complexity, we experiment with topics as additional features and show that linking words to topics significantly increases accuracy of classification.},
	booktitle    = {Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications, New Orleans, Louisiana, June 5, 2018},
	author       = {Alfter, David and Volodina, Elena},
	year         = {2018},
	publisher    = {Association of Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-948087-11-7},
}

@inProceedings{alfter-volodina-2019-from-285728,
	title        = {From river to bank: The importance of sense-based graded word lists},
	booktitle    = { EUROCALL 2019 - CALL and Complexity, Book of Abstracts, Louvain-la-Neuve, Belgium, 28-31 August 2019},
	author       = {Alfter, David and Volodina, Elena},
	year         = {2019},
}

@misc{alfter-etal-2019-proceedings-285613,
	title        = {Proceedings of the 8th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2019), September 30, Turku Finland},
	abstract     = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promote development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other.

The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools.

The NLP4CALL workshop series is aimed at bringing together competences from these areas for sharing experiences and brainstorming around the future of the field.
},
	author       = {Alfter, David and Volodina, Elena and Borin, Lars and Pilán, Ildikó and Lange, Herbert},
	year         = {2019},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping},
	ISBN         = {978-91-7929-998-9},
}

@edited_book{allen-etal-2009-svensk-99825,
	title        = {Svensk ordbok utgiven av Svenska Akademien. 1-2},
	editor       = {Allén, Sture (vetenskaplig rådgivare) and Berg, Daniel and Berg, Sture and Gellerstam, Martin and Holmer, Louise and Hult, Ann-Kristin and Lindstrand, Susanne and Lövfors, Sven and Malmgren, Sven-Göran and Sjögreen, Christian and Sköldberg, Emma and Tegner, Lennart and Toporowska Gronostaj, Maria},
	year         = {2009},
	ISBN         = {978-91-1-302267-3},
}

@inProceedings{allvin-etal-2010-characteristics-120479,
	title        = {Characteristics and Analysis of Finnish and Swedish Clinical Intensive Care Nursing Narratives},
	abstract     = {We present a comparative study of Finnish and Swedish free-text nursing narratives from intensive care. Although the two languages
are linguistically very dissimilar, our hypothesis is that there are similarities that are important
and interesting from a language
technology point of view. This may have implications when building tools to support producing and using health care documentation.
We perform a comparative qualitative analysis based on structure and content, as well as a comparative quantitative analysis on Finnish and Swedish Intensive Care Unit (ICU) nursing narratives. Our findings are that ICU nursing narratives in Finland and Sweden have many properties in common, but that many of these are challenging when it comes to developing language technology tools.
},
	booktitle    = {Proceedings of the NAACL HLT 2010 Second Louhi Workshop on Text and Data Mining of Health Documents},
	author       = {Allvin, H. and Carlsson, E. and Dalianis, H. and Danielsson-Ojala, R. and Daudaravicius, V. and Hassel, M. and Kokkinakis, Dimitrios and Lundgren-Laine, H. and Nilsson, G. and Nytrø, Ø. and Salanterä, S. and Skeppstedt, M. and Suominen, H. and Velupillai, S.},
	year         = {2010},
	pages        = {53 -- 60},
}

@misc{andersen-forsberg-2012-sibirientyska-162958,
	title        = {Sibirientyska},
	abstract     = {German in Siberia are transcriptions of German spoken in the region of Krasnoyarsk (Russia). The corpus contains about 34 000 running words. Codeswitching to Russian and verb forms are annotated (Russian word forms in brackets like [vot], finite verb forms (FINIT), infinite verb forms (INFIN)). The transcription and annotation of the corpus have been established in collaboration with the Astafyev University Krasnoyarsk. The corpus is a part of a research project at the University of Gothenburg, see http://www.sprak.gu.se/kontakta-oss/larare/andersen-christiane/syntax-in-contact/
The data base is currently in the test phase.

},
	author       = {Andersen, Christiane and Forsberg, Markus},
	year         = {2012},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@misc{andersen-etal-2015-sibirientyska-215757,
	title        = {Sibirientyska kvinnor (Siberian German women)},
	abstract     = {Siberian German women

The corpus consists of dialogs between four women born in 1927 to 1937 in the Soviet Volga Republic. Their mother tongue is a German variety spoken in Russia since the second half of the 18th century. Since the end of the Second World War, the women have lived in the region of Krasnoyarsk. They talk about their backgrounds and their everyday lives in the village. The corpus consists of about 16 000 words. Russian words and hybrids are given in [brackets], the turns of the interviewers are in {brackets}; all verb forms have got the attribute FINIT or INFINIT. More information on the research project see Syntax in contact.
},
	author       = {Andersen, Christiane and Forsberg, Markus and Hammarstedt, Martin and Pankow, Alexander},
	year         = {2015},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@article{andersson-ahlberg-2013-towards-181972,
	title        = {Towards automatic tracking of lexical change: linking historical lexical resources},
	journal      = {NEALT Proceedings Series},
	author       = {Andersson, Peter and Ahlberg, Malin},
	year         = {2013},
	volume       = {18},
}

@inProceedings{andreasson-etal-2009-swedish-102211,
	title        = {Swedish CLARIN activities},
	booktitle    = {Proceedings of the Nodalida 2009 workshop on CLARIN activities in the Nordic countries. NEALT Proceedings Series},
	author       = {Andréasson, Maia and Borin, Lars and Forsberg, Markus and Beskow, Jonas and Carlson, Rolf and Edlund, Jens and Elenius, Kjell and Hellmer, Kahl and House, David and Merkel, Magnus and Forsbom, Eva and Megyesi, Beáta and Eriksson, Anders and Strömqvist, Sven},
	year         = {2009},
	volume       = {5},
	pages        = {1--5},
}

@techreport{andreasson-etal-2008-habeas-102220,
	title        = {Habeas Corpus: A survey for SNK - a Swedish national corpus},
	author       = {Andréasson, Maia and Borin, Lars and Merkel, Magnus},
	year         = {2008},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@inProceedings{antonsson-etal-2019-discourse-284038,
	title        = {Discourse in Mild Cognitive Impairment },
	abstract     = {This paper reports on how persons with mild cognitive impairment (MCI) perform on two types of narrative tasks compared to a group of healthy controls (HC). The first task is a widely used picture description task and the other task is a more complex discourse task. Since the latter task puts higher demands on cognitive linguistic skills, as seen in previous research, we expected this task to be more efficient in discriminating between the two groups. The results confirm this hypothesis. 
},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Antonsson, Malin and Lundholm Fors, Kristina and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = { ExLing Society},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{bamutura-ljunglof-2019-towards-284293,
	title        = {Towards a resource grammar for Runyankore and Rukiga},
	abstract     = {Currently, there is a lack of computational grammar resources for many under-resourced languages which limits the ability to develop Natural Language Processing (NLP) tools and applications such as Multilingual Document Authoring, Computer-Assisted Language Learning (CALL) and Low-Coverage Machine Translation (MT) for these languages. In this paper, we present our attempt to formalise the grammar of two such languages: Runyankore and Rukiga. For this formalisation we use the Grammatical Framework (GF) and its Resource Grammar Library (GF-RGL).},
	booktitle    = {WiNLP 2019, the 3rd Workshop on Widening NLP, Florence, Italy, 28th July 2019},
	author       = {Bamutura, David and Ljunglöf, Peter},
	year         = {2019},
}

@techreport{barnett-etal-2015-state-234687,
	title        = {State Chart XML (SCXML): State Machine Notation for Control Abstraction},
	abstract     = {This document describes SCXML, or the "State Chart extensible Markup Language". SCXML provides a generic state-machine based execution environment based on CCXML and Harel State Tables.},
	author       = {Barnett, Jim and Akolkar, Rahul and Auburn, RJ and Bodell, Michael and Burnett, Daniel C. and Carter, Jerry and McGlashan, Scott and Lager, Torbjörn and Helbing, Mark and Hosn, Rafah and Raman, T.V. and Reifenrath, Klaus and Rosenthal, No'am and Roxendal, Johan},
	year         = {2015},
	publisher    = {World Wide Web Consortium},
	address      = {Massachusetts, USA},
}

@inProceedings{baud-etal-2005-interchanging-33867,
	title        = {Interchanging lexical information for a multilingual dictionary},
	booktitle    = {AMIA 2005 Proceedings},
	author       = {Baud, Robert and Nyström, Mikael and Borin, Lars and Evans, Roger and Schulz, Stefan and Zweigenbaum, Pierre},
	year         = {2005},
	pages        = {31--35},
}

@inProceedings{bennaceur-etal-2012-machine-160393,
	title        = {Machine Learning for Emergent Middleware},
	abstract     = {Highly dynamic and heterogeneous distributed systems are challenging today's middleware technologies. Existing middleware paradigms are unable to deliver on their most central promise, which is offering interoperability. In this paper, we argue for the need to dynamically synthesise distributed system infrastructures according to the current operating environment, thereby generating "Emergent Middleware'' to mediate interactions among heterogeneous networked systems that interact in an ad hoc way. The paper outlines the overall architecture of Enablers underlying Emergent Middleware, and in particular focuses on the key role of learning in supporting such a process, spanning statistical learning to infer the semantics of networked system functions and automata learning to extract the related behaviours of networked systems.},
	booktitle    = {Proceedings of the Joint Workshop on Intelligent Methods for Software System Engineering (JIMSE)},
	author       = {Bennaceur, Amel and Howar, Falk and Issarny, Valérie and Johansson, Richard and Moschitti, Alessandro and Spalazzese, Romina and Steffen, Bernhard and Sykes, Daniel},
	year         = {2012},
	volume       = {Accepted},
}

@inProceedings{bennaceur-etal-2013-automatic-158812,
	title        = {Automatic Service Categorisation through Machine Learning in Emergent Middleware},
	booktitle    = {Lecture notes in computer sciences},
	author       = {Bennaceur, Amel and Johansson, Richard and Moschitti, Alessandro and Sykes, Daniel and Issarny, Valérie},
	year         = {2013},
	volume       = {7542},
	pages        = {133--149},
}

@inProceedings{berdicevskis-2020-older-290636,
	title        = {Older English Words Are More Polysemous},
	booktitle    = {The Evolution of Language: Proceedings of the 13th International Conference (EvoLang13). Pp. 14-21},
	author       = {Berdicevskis, Aleksandrs},
	year         = {2020},
	publisher    = {The Evolution of Language Conferences },
	address      = {Nijmegen },
}

@inProceedings{bergenmar-olsson-2012-connecting-169845,
	title        = {Connecting European Women Writers. The Selma Lagerlöf Archive and Women Writers Database},
	booktitle    = {Digital Humanities 2012. 16-20 July 2012, Hamburg. Book of Abstracts},
	author       = {Bergenmar, Jenny and Olsson, Leif-Jöran},
	year         = {2012},
}

@inProceedings{bergenmar-olsson-2015-tracing-228773,
	title        = {Tracing Cultural Transfer Through Multiple Translation Analysis. The Case of the Swedish 19th-Century Bourgeois Novel in German and Czech},
	abstract     = {In the last decades, Comparative Literature has become more directed towards questions of transculturality. This renders translations of literary texts an important role as a vehicle not just for the transfer of text and language, but also of ideas and cultures. Digital methods for comparing multiple translations within and across languages might prove to be important for exploring how, for example, a Swedish 19th century bourgeois novel is reframed in Czech translations. The chosen example is A Merchant House (1859) by Emilie Flygare–Carlén (1807–1892) who was one of the most popular authors in Czech speaking regions in the late 19th Century. In this paper existing collation tools are used for comparing two different Czech translations (1872 and 1910), by two different translators. This might both reveal how the gender, context and position of the translator colours the literary text and how the translations are adapted to changing literary trends. Furthermore, parallel text alignment is tried as a method for comparing across languages, since the Czech translation is made from a German translation. Are the Czech translations subject to “foreignization” or “domestication”? Or do they retain the same traits as the German translation, which is the source of the first Czech translation? Does the systematic comparison of multiple translations contribute to the understanding of how texts move from certain gendered cultural contexts and ideologies to others?
},
	booktitle    = {Digital Literary Studies. International Conference May 14-15 2015, Coimbra, Portugal},
	author       = {Bergenmar, Jenny and Olsson, Leif-Jöran},
	year         = {2015},
}

@inProceedings{bjorkner-etal-2017-voice-256522,
	title        = {Voice acoustic parameters for detecting signs of early cognitive impairment},
	abstract     = {Aiding the detection of very early cognitive impairment in Alzheimer's disease (AD) and assessing the disease progression are essential foundations for effective psychological assessment, diagnosis and planning. Efficient tools for routine dementia screening in primary health care, particularly non-invasive and cost-effective methods, are desirable. The aim of this study is to find out if voice acoustic analysis can be a useful tool for detecting signs of early cognitive impairment.},
	booktitle    = {PEVOC (PanEuropean Voice Conference) 12, August 30th - September 1st 2017, Ghent, Belgium},
	author       = {Björkner, Eva and Lundholm Fors, Kristina and Kokkinakis, Dimitrios and Nordlund, Arto},
	year         = {2017},
}

@incollection{borin-2004-language-33976,
	title        = {Language technology resources for less prevalent languages: Will the Münchhausen Model work?},
	booktitle    = {Holmboe, H. (ed). Nordisk sprogteknologi 2003. Nordic language technology. Årbog for Nordisk Sprogteknologisk Forskningsprogram 2000-2004},
	author       = {Borin, Lars},
	year         = {2004},
	publisher    = {Museum Tusculanums Forlag},
	address      = {København},
	ISBN         = {87-7289-997-2},
	pages        = {71--82},
}

@article{borin-2005-mannen-33865,
	title        = {Mannen är faderns mormor: Svenskt associationslexikon reinkarnerat},
	journal      = {LexicoNordica},
	author       = {Borin, Lars},
	year         = {2005},
	volume       = {12},
	pages        = {39--54},
}

@incollection{borin-2006-sparv-44950,
	title        = {Sparv i tranedansen eller fisken i vattnet? Språkteknologi och språklärande},
	booktitle    = {Från vision till praktik: Språkutbildning och informationsteknik},
	author       = {Borin, Lars},
	year         = {2006},
	publisher    = {NSHU - Myndigheten för nätverk och samarbete inom högre utbildning},
	address      = {Härnösand},
	ISBN         = {978-91-975425-8-6},
	pages        = {25--49},
}

@incollection{borin-2006-supporting-33863,
	title        = {Supporting lesser-known languages: The promise of language technology},
	booktitle    = {Saxena, A. & Borin, L. (eds). Lesser-known languages of South Asia. Status and policies, case studies and applications of information technology},
	author       = {Borin, Lars},
	year         = {2006},
	publisher    = {Mouton de Gruyter},
	address      = {Berlin},
	ISBN         = {3-11-018976-3},
	pages        = {317--337},
}

@incollection{borin-2006-gar-33864,
	title        = {Vi som går köksvägen: Språkteknologer och korpuslingvister i Litteraturbanken},
	booktitle    = {Börjesson, M. (red). Fältanteckningar: Utbildnings- och kultursociologiska texter tillägnade Donald Broady},
	author       = {Borin, Lars},
	year         = {2006},
	publisher    = {Forskningsgruppen för utbildnings- och kultursociologi (ILU), Uppsala universitet},
	address      = {Uppsala},
	ISBN         = {91-631-8807-4},
	pages        = {399--404},
}

@article{borin-2008-review-72506,
	title        = {Review of Stig Johansson: Seeing through multilingual corpora: On the use of corpora in contrastive studies},
	journal      = {ICAME Journal},
	author       = {Borin, Lars},
	year         = {2008},
	volume       = {32},
	pages        = {261--267},
}

@incollection{borin-2008-lemma-72507,
	title        = {Lemma, lexem eller mittemellan? Ontologisk ångest i den digitala domänen},
	booktitle    = {Nog ordat? Festskrift till Sven-Göran Malmgren},
	author       = {Borin, Lars},
	year         = {2008},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
	pages        = {59--67},
}

@inProceedings{borin-2009-linguistic-102209,
	title        = {Linguistic diversity in the information society},
	booktitle    = {Proceedings of the SALTMIL 2009 workshop on Information Retrieval and Information Extraction for Less Resourced Languages},
	author       = {Borin, Lars},
	year         = {2009},
	ISBN         = {978-84-692-4940-6},
	pages        = {1--7},
}

@techreport{borin-2009-bush-102214,
	title        = {One in the bush: Low-density language technology},
	author       = {Borin, Lars},
	year         = {2009},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@incollection{borin-2010-avtryck-136656,
	title        = {Avtryck  från  WGLN-­projekten  i  forskningen},
	booktitle    = {Kunskapens  nya  världar},
	author       = {Borin, Lars},
	year         = {2010},
	publisher    = {Uppsala universitet, Uppsala Learning Lab},
	address      = {Uppsala},
	ISBN         = {978-91-506-2189-1},
	pages        = {127--133},
}

@article{borin-2010-zipf-130257,
	title        = {Med Zipf mot framtiden - en integrerad lexikonresurs för svensk språkteknologi},
	journal      = {LexicoNordica},
	author       = {Borin, Lars},
	year         = {2010},
	volume       = {17},
	pages        = {35--54},
}

@incollection{borin-2012-core-162377,
	title        = {Core vocabulary: A useful but mystical concept in some kinds of linguistics},
	booktitle    = {Shall we play the festschrift game ? Essays on the Occasion of Lauri Carlson's 60th Birthday},
	author       = {Borin, Lars},
	year         = {2012},
	publisher    = {Springer},
	address      = {Berlin},
	ISBN         = {978-3-642-30772-0},
	pages        = {53--65},
}

@incollection{borin-2013-measuring-184758,
	title        = {The why and how of measuring linguistic differences},
	booktitle    = {Approaches to Measuring Linguistic Differences},
	editor       = {Lars Borin and Anju Saxena},
	author       = {Borin, Lars},
	year         = {2013},
	publisher    = {De Gruyter Mouton},
	address      = {Berlin},
	ISBN         = {978-3-11-030525-8},
	pages        = {3--26},
}

@incollection{borin-2016-lexikografi-246607,
	title        = {Lexikografi för maskiner och lexikografi för människor},
	booktitle    = {Framtidens lexikografi: Rapport från ett symposium  i Göteborg 5 oktober 2012},
	author       = {Borin, Lars},
	year         = {2016},
	publisher    = {Meijerbergs institut vid Göteborgs universitet},
	address      = {Göteborg},
	ISBN         = {978-91-87850-01-1},
	pages        = {9--27},
}

@inProceedings{borin-etal-2014-bring-198549,
	title        = {Bring vs. MTRoget: Evaluating automatic thesaurus translation},
	booktitle    = {Proceedings of LREC 2014, May 26-31, 2014 Reykjavik, Iceland},
	author       = {Borin, Lars and Allwood, Jens and de Melo, Gerard},
	year         = {2014},
	publisher    = {European Language Resources Association},
	ISBN         = {978-2-9517408-8-4},
}

@techreport{borin-etal-2016-free-233768,
	title        = {A free cloud service for OCR / En fri molntjänst för OCR},
	author       = {Borin, Lars and Bouma, Gerlof and Dannélls, Dana},
	year         = {2016},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@book{borin-etal-2012-svenska-163410,
	title        = {Svenska språket i den digitala tidsåldern},
	author       = {Borin, Lars and Brandt, Martha and Edlund, Jens and Lindh, Jonas and Parkvall, Mikael},
	year         = {2012},
	publisher    = {Springer},
	address      = {Berlin},
	ISBN         = {978-3-642-30831-4},
}

@incollection{borin-etal-2013-intercontinental-184760,
	title        = {The Intercontinental Dictionary Series – a rich and principled database for language comparison},
	booktitle    = {Approaches to Measuring Linguistic Differences},
	editor       = {Lars Borin ; Anju Saxena},
	author       = {Borin, Lars and Comrie, Bernard and Saxena, Anju},
	year         = {2013},
	publisher    = {De Gruyter Mouton},
	address      = {Berlin},
	ISBN         = {978-3-11-030525-8},
	pages        = {285--302},
}

@inProceedings{borin-etal-2014-representing-204731,
	title        = {Representing Swedish Lexical Resources in RDF with lemon},
	abstract     = {The paper presents an ongoing project which aims to publish Swedish lexical-semantic resources using Semantic Web and Linked Data technologies. In this article, we highlight the practical conversion methods and challenges of
converting three of the Swedish language resources in RDF with lemon.},
	booktitle    = { Proceedings of the ISWC 2014 Posters & Demonstrations Track a track within the 13th International Semantic Web Conference (ISWC 2014)},
	author       = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and McCrae, John P.},
	year         = {2014},
	volume       = {1272 },
	pages        = {329--332},
}

@inProceedings{borin-etal-2009-thinking-110343,
	title        = {Thinking Green: Toward Swedish FrameNet++},
	abstract     = {Access to multi-layered lexical, grammatical and semantic information representing
text content is a prerequisite for efficient automatic understanding
and generation of natural language. A FrameNet is considered a valuable
resource for both linguistics and language technology research that
may contribute to the achievement of these goals.
Currently, FrameNet-like resources exist for a few languages,1 including
some domain-specific and multilingual initiatives (Dolbey et al., 2006;
Boas, 2009; Uematsu et al., 2009; Venturi et al., 2009), but are unavailable
for most languages, including Swedish, although there have been some
pilot studies exploring the semi-automatic acquisition of Swedish frames
(Johansson & Nugues, 2006; Borin et al., 2007).
At the University of Gothenburg, we are now embarking on a project to
build a Swedish FrameNet-like resource. A novel feature of this project is
that the Swedish FrameNetwill be an integral part of a largermany-faceted
lexical resource. Hence the name Swedish FrameNet++ (SweFN++).
},
	booktitle    = {FrameNet Masterclass and Workshop},
	author       = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios},
	year         = {2009},
}

@inProceedings{borin-etal-2010-past-110368,
	title        = {The past meets the present in Swedish FrameNet++},
	abstract     = {The paper is about a recently initiated project which aims at the development of a Swedish FrameNet as an integral part of a larger lexical resource, hence the name “Swedish FrameNet++” (SweFN++). It focuses on reuse of free electronic resources and their role in the acquisition and population of Swedish frames. After a brief overview of Swedish resources, we reflect on three approaches to recycling the available lexical data in a semi-automatic manner. SweFN++ will be a multi-functional resource supporting research within lexicology and linguistics as well as different applications within computational lexicography and language technology, not to mention e-science.},
	booktitle    = {14th EURALEX International Congress},
	author       = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios},
	year         = {2010},
	pages        = {269--281},
}

@article{borin-etal-2010-swedish-129126,
	title        = {Swedish FrameNet++},
	journal      = {Swedish Language Technology Conference 2010},
	author       = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios},
	year         = {2010},
}

@incollection{borin-etal-2018-linguistics-269084,
	title        = {Linguistics vs. language technology in constructicon building and use},
	abstract     = {In this chapter, we describe the close interaction of linguists and language technologists in the Swedish constructicon project. This kind of collaboration is not so common today, because of the way that language technology has developed in recent decades, but in our case the collaboration has been very successful, and constituted a genuine instance of cross-fertilization, where an evolving language technology infrastructure and a computational lexical macroresource described in the chapter has formed an integral part of the Swedish constructicon development environment, while at the same time the structured linguistic knowledge described in the constructicon has informed the language technology making up the infrastructure.},
	booktitle    = {Constructicography: Constructicon development across languages},
	editor       = {Benjamin Lyngfelt and Lars Borin and Kyoko Ohara and Tiago Timponi Torrent},
	author       = {Borin, Lars and Dannélls, Dana and Gruzitis, Normunds},
	year         = {2018},
	publisher    = {John Benjamins},
	address      = {Amsterdam},
	ISBN         = {9789027263865},
	pages        = {229--253},
}

@article{borin-etal-2014-geographic-198286,
	title        = {Geographic visualization of place names in Swedish literary texts},
	abstract     = {This article describes the development of a geographical information system (GIS) at Språkbanken as part of a visualization solution to be used in an archive of historical Swedish literary texts. The research problems we are aiming to address concern orthographic and morphological variation, missing place names, and missing place name coordinates. Some of these problems form a central part in the development of methods and tools for the automatic analysis of historical Swedish literary texts at our research unit. We discuss the advantages and challenges of covering large-scale spelling variation in place names from different sources and in generating maps with focus on different time periods. },
	journal      = {Literary & Linguistic Computing},
	author       = {Borin, Lars and Dannélls, Dana and Olsson, Leif-Jöran},
	year         = {2014},
	volume       = {29},
	number       = {3},
	pages        = {400--404},
}

@article{borin-etal-2014-introduction-202127,
	title        = {Introduction: Constructions and frames meet language technology},
	journal      = {Constructions and Frames},
	author       = {Borin, Lars and de Melo, Gerard and Friberg Heppin, Karin and Torrent, Tiago Timponi},
	year         = {2014},
	volume       = {6},
	number       = {1},
	pages        = {1--8},
}

@inProceedings{borin-etal-2013-mining-188846,
	title        = {Mining semantics for culturomics: towards a knowledge-based approach},
	abstract     = {The massive amounts of text data made available through the Google Books digitization project have inspired a new field of big-data textual research. Named culturomics, this field has attracted the attention of a growing number of scholars over recent years. However, initial studies based on these data have been criticized for not referring to relevant work in linguistics and language technology. This paper provides some ideas, thoughts and first steps towards a new culturomics initiative, based this time on Swedish data, which pursues a more knowledge-based approach than previous work in this emerging field. The amount of new Swedish text produced daily and older texts being digitized in cultural heritage projects grows at an accelerating rate. These volumes of text being available in digital form have grown far beyond the capacity of human readers, leaving automated semantic processing of the texts as the only realistic option for accessing and using the information contained in them. The aim of our recently initiated research program is to advance the state of the art in language technology resources and methods for semantic processing of Big Swedish text and focus on the theoretical and methodological advancement of the state of the art in extracting and correlating information from large volumes of Swedish text using a combination of knowledge-based and statistical methods.},
	booktitle    = {2013 ACM International Workshop on Mining Unstructured Big Data Using Natural Language Processing, UnstructureNLP 2013, Held at 22nd ACM International Conference on Information and Knowledge Management, CIKM 2013; San Francisco, CA; United States; 28 October 2013 through 28 October 2013},
	author       = {Borin, Lars and Dubhashi, Devdatt and Forsberg, Markus and Johansson, Richard and Kokkinakis, Dimitrios and Nugues, Pierre},
	year         = {2013},
	ISBN         = {978-1-4503-2415-1},
	pages        = {3--10},
}

@incollection{borin-edlund-2018-language-269047,
	title        = {Language technology and 3rd wave HCI: Towards phatic communication and situated interaction},
	abstract     = {In the field of language technology, researchers are starting to pay more attention to various interactional aspects of language – a development prompted by a confluence of factors, and one which applies equally to the processing of written and spoken language. Notably, the so-called ‘phatic’ aspects of linguistic communication are coming into focus in this work, where linguistic interaction is increasingly recognized as being fundamentally situated. This development resonates well with the concerns of third wave HCI, which involves a shift in focus from stating the requirements on HCI design primarily in terms of “context-free” information flow, to a view where it is recognized that HCI – just like interaction among humans – is indissolubly embedded in complex, shifting contexts. These – together with the different backgrounds and intentions of interaction participants – shape the interaction in ways which are not readily understandable in terms of rational information exchange, but which are nevertheless central aspects of the interaction, and which therefore must be taken into account in HCI design, including its linguistic aspects, forming the focus of this chapter.},
	booktitle    = {New Directions in Third Wave Human-Computer Interaction: Volume 1 - Technologies},
	editor       = {Michael Filimowicz and Veronika Tzankova.},
	author       = {Borin, Lars and Edlund, Jens},
	year         = {2018},
	publisher    = {Springer International Publishing},
	address      = {Cham},
	ISBN         = {978-3-319-73355-5},
	pages        = {251--264},
}

@edited_book{borin-etal-2013-proceedings-190260,
	title        = {Proceedings of the workshop on lexical semantic resources for NLP at NODALIDA 2013, May 22-24, 2013, Oslo, Norway},
	editor       = {Borin, Lars and Fjeld, Ruth Vatvedt and Forsberg, Markus and Nimb, Sanni and Nugues, Pierre and Pedersen, Bolette Sandford},
	year         = {2013},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7519-586-5},
}

@inProceedings{borin-forsberg-2008-something-72502,
	title        = {Something old, something new: A computational morphological description of Old Swedish},
	booktitle    = {LREC 2008 Workshop on Language Technology for Cultural Heritage Data (LaTeCH 2008)},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2008},
	pages        = {9--16},
}

@inProceedings{borin-forsberg-2009-family-102212,
	title        = {All in the family: A comparison of SALDO and WordNet},
	booktitle    = {Proceedings of the Nodalida 2009 Workshop on WordNets and other Lexical Semantic Resources - between Lexical Semantics, Lexicography, Terminology and Formal Ontologies. NEALT Proceedings Series},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2009},
	volume       = {7},
}

@article{borin-forsberg-2010-beyond-129125,
	title        = {Beyond the synset: Swesaurus – a fuzzy Swedish wordnet},
	journal      = {Re-thinking synonymy: semantic sameness and similarity in languages and their description},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2010},
}

@inProceedings{borin-forsberg-2010-from-118908,
	title        = {From the People’s Synonym Dictionary to fuzzy synsets - first steps},
	booktitle    = {Proceedings of the LREC 2010 workshop Semantic relations. Theory and Applications},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2010},
	pages        = {18--25},
}

@article{borin-forsberg-2011-swesaurus-151331,
	title        = {Swesaurus – ett svenskt ordnät med fria tyglar},
	journal      = {LexicoNordica},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2011},
	volume       = {18},
	pages        = {17--39},
}

@incollection{borin-forsberg-2011-diachronic-144291,
	title        = {A diachronic computational lexical resource for 800 years of Swedish},
	booktitle    = {Language technology for cultural heritage},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2011},
	publisher    = {Springer},
	address      = {Berlin},
	ISBN         = {978-3-642-20226-1},
	pages        = {41--61},
}

@inProceedings{borin-forsberg-2014-swesaurus;-193085,
	title        = {Swesaurus; or, The Frankenstein Approach to Wordnet Construction},
	abstract     = {Swesaurus is a freely available (under a CC-BY license) Swedish wordnet under construction, built primarily by scavenging and recycling information from a number of existing lexical resources. Among its more unusual characteristics are graded lexical-semantic relations and inclusion of all parts of speech, not only open-class items.
},
	booktitle    = {Proceedings of the Seventh Global WordNet Conference (GWC 2014)},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2014},
	ISBN         = {978-9949-32-492-7},
}

@inProceedings{borin-etal-2011-semantic-140686,
	title        = {Semantic Search in Literature as an e-Humanities Research Tool: CONPLISIT – Consumption Patterns and Life-Style in 19th Century Swedish Literature},
	abstract     = {We present our ongoing work on language technology-based e-science in the humanities, with a focus on text-based research in the historical sciences. Currently, we are
working on the adaptation and integration of lexical resources representing different historical stages of Swedish into a lexical
and morphological toolbox that will allow us to develop semantically oriented text search applications for historical research on Swedish text. We describe a semantic search prototype which was built using REST web services from this toolbox
as components, and which has been
evaluated by historians interested in using digitized 19th century novels as primary data for an historical investigation of the emerging consumer society in 19th century
Sweden.},
	booktitle    = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)},
	author       = {Borin, Lars and Forsberg, Markus and Ahlberger, Christer},
	year         = {2011},
	volume       = {11},
	pages        = {58--65},
}

@inProceedings{borin-etal-2012-search-157338,
	title        = {Search Result Diversification Methods to Assist Lexicographers},
	abstract     = {We show how the lexicographic task of finding informative and diverse example sentences can be cast as a search result diversification problem, where an objective based on relevance and diversity is maximized. This problem has been studied intensively in the information retrieval community during recent years, and efficient algorithms have been devised.

We finally show how the approach has been implemented in a
lexicographic project, and describe the relevance and diversity
functions used in that context.
},
	booktitle    = {Proceedings of the 6th Linguistic Annotation Workshop},
	author       = {Borin, Lars and Forsberg, Markus and Friberg Heppin, Karin and Johansson, Richard and Kjellandsson, Annika},
	year         = {2012},
	pages        = {113--117},
}

@inProceedings{borin-etal-2016-sparv-246053,
	title        = {Sparv: Språkbanken’s corpus annotation pipeline infrastructure},
	abstract     = {Sparv is Språkbanken's corpus annotation pipeline infrastructure. The easiest way to use the pipeline is from its web interface with a plain text document. The pipeline uses in-house and external tools on the text to segment it into sentences and paragraphs, tokenise, tag parts-of-speech, look up in dictionaries and analyse compounds. The pipeline can also be run using a web API with XML results, and it is run locally at Språkbanken to prepare the documents in Korp, our corpus search tool. While the most sophisticated support is for modern Swedish, the pipeline supports 15 languages.},
	booktitle    = {SLTC 2016. The Sixth Swedish Language Technology Conference, Umeå University, 17-18 November, 2016},
	author       = {Borin, Lars and Forsberg, Markus and Hammarstedt, Martin and Rosén, Dan and Schäfer, Roland and Schumacher, Anne},
	year         = {2016},
}

@inProceedings{borin-etal-2012-transferring-157213,
	title        = {Transferring Frames: Utilization of Linked Lexical Resources},
	abstract     = {In our experiment, we evaluate the transferability of  frames from Swedish to Finnish in parallel corpora. We evaluate both the theoretical possibility of transferring frames and the possibility of performing it using available lexical resources. We add the frame information to an extract of the Swedish side of the Kotus and JRC-Acquis corpora using an automatic frame labeler and copy it to the Finnish side. We focus on evaluating the results to get an estimation on how often the parallel sentences can be said to
express the same frame. This sheds light to the questions: Are the same situations in the two languages expressed using different frames, i.e. are the frames transferable even in theory? How well can the frame information of running text be transferred from language to another?
},
	booktitle    = {Proceedings of the Workshop on Inducing Linguistic Structure Submission (WILS)},
	author       = {Borin, Lars and Forsberg, Markus and Johansson, Richard and Muhonen, Kristiina and Purtonen, Tanja and Voionmaa, Kaarlo},
	year         = {2012},
	pages        = {8--15},
}

@inProceedings{borin-etal-2010-diabase-118907,
	title        = {Diabase: Towards a diachronic BLARK in support of historical studies},
	booktitle    = {Proceedings of LREC 2010},
	author       = {Borin, Lars and Forsberg, Markus and Kokkinakis, Dimitrios},
	year         = {2010},
}

@article{borin-etal-2013-close-187063,
	title        = {Close encounters of the fifth kind: Some linguistic and computational aspects of the Swedish FrameNet++ project},
	abstract     = {The Swedish FrameNet++ (SweFN++) project aims at developing an integrated Swedish lexical macro-resource to be used primarily in language technology R&D to build natural language processing (NLP) applications. Most of the component resources making up SweFN++ are existing digital lexical resources; in their case the central project effort is directed at making them interoperable on as many levels as possible. An important new resource being created in the project is a Swedish framenet. Now a sister project is starting with the aim of adding a Swedish constructicon (SweCxn) to the macro-resource. In this paper, we discuss some theoretical and conceptual issues which have arisen in the course of our work on the SweFN++ and the planning of the SweCxn, in the close encounter between the practical requirements of NLP and the theory and practice of linguistic – lexical and grammatical – description.
},
	journal      = {Veredas},
	author       = {Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin},
	year         = {2013},
	volume       = {17},
	number       = {1},
	pages        = {28--43},
}

@inProceedings{borin-etal-2012-growing-171988,
	title        = {Growing a Swedish constructicon in lexical soil},
	booktitle    = {Proceedings of the Swedish Language Technology Conference. Lund, October 24-26, 2012},
	author       = {Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia},
	year         = {2012},
	pages        = {10--11},
}

@incollection{borin-etal-2008-hunting-72504,
	title        = {The hunting of the BLARK - SALDO, a freely available lexical database for Swedish language technology},
	booktitle    = {Resourceful language technology. Festschrift in honor of Anna Sågvall Hein},
	author       = {Borin, Lars and Forsberg, Markus and Lönngren, Lennart},
	year         = {2008},
	publisher    = {Uppsala University},
	address      = {Uppsala},
	pages        = {21--32},
}

@article{borin-etal-2008-saldo-110525,
	title        = {SALDO 1.0 (Svenskt associationslexikon version 2)},
	journal      = {Språkbanken, Göteborg universitet},
	author       = {Borin, Lars and Forsberg, Markus and Lönngren, Lennart},
	year         = {2008},
}

@article{borin-etal-2013-saldo-188604,
	title        = {SALDO: a touch of yin to WordNet's yang},
	abstract     = {The English-language Princeton WordNet (PWN) and some wordnets for other languages have been extensively used as lexical–semantic knowledge sources in language technology applications, due to their free availability and their size. The ubiquitousness of PWN-type wordnets tends to overshadow the fact that they represent one out of many possible choices for structuring a lexical-semantic resource, and it could be enlightening to look at a differently structured resource both from the point of view of theoretical–methodological considerations and from the point of view of practical text processing requirements. The resource described here—SALDO—is such a lexical–semantic resource, intended primarily for use in language technology applications, and offering an alternative organization to PWN-
style wordnets. We present our work on SALDO, compare it with PWN, and discuss some implications of the differences. We also describe an integrated infrastructure for computational lexical resources where SALDO forms the central component.},
	journal      = {Language resources and evaluation},
	author       = {Borin, Lars and Forsberg, Markus and Lönngren, Lennart},
	year         = {2013},
	volume       = {47},
	number       = {4},
	pages        = {1191--1211},
}

@inProceedings{borin-etal-2013-lexical-186032,
	title        = {The lexical editing system of Karp},
	abstract     = {Karp is the open lexical infrastructure of Språkbanken (the Swedish Language Bank). The infrastructure has three main functions: (1) to support the work on creating, curating, and integrating our various lexical resources; (2) to publish the resources, making them 
searchable and downloadable; and (3) to offer advanced editing functionalities. An important feature of the lexical infrastructure is also that we maintain a strong bidirectional connection to our corpus infrastructure. At the heart of the infrastructure is the SweFN++ project with the goal to create free Swedish lexical resources geared towards language technology applications. The infrastructure currently hosts 23 Swedish lexical resources. The resources are integrated through links to a pivot lexical resource, SALDO, a large morphological and lexical-semantic resource for modern Swedish.},
	booktitle    = {Kosem, I., Kallas, J., Gantar, P., Krek, S., Langemets, M., Tuulik, M. (eds.) 2013. Electronic lexicography in the 21st century: thinking outside the paper. Proceedings of the eLex 2013 conference, 17-19 October 2013, Tallinn, Estonia.},
	author       = {Borin, Lars and Forsberg, Markus and Olsson, Leif-Jöran and Olsson, Olof and Uppström, Jonatan},
	year         = {2013},
	publisher    = {Trojina, Institute for Applied Slovene Studies / Eesti Keele Instituut },
	address      = {Ljubljana/Tallinn},
	ISBN         = { 978-961-93594-0-2},
}

@inProceedings{borin-etal-2012-open-156079,
	title        = {The open lexical infrastructure of Språkbanken},
	abstract     = {We present our ongoing work on Karp, Språkbanken’s (the Swedish Language Bank) open lexical infrastructure, which has two main functions: (1) to support the work on creating, curating, and integrating our various lexical resources; and (2) to publish daily versions of the resources, making them searchable and downloadable. An important requirement on the lexical infrastructure is also that we maintain a strong bidirectional connection to our corpus infrastructure. At the heart of the infrastructure is the SweFN++ project with the goal to create free Swedish lexical resources geared towards language technology applications. The infrastructure currently hosts 15 Swedish lexical resources, including historical ones, some of which have been created from scratch using existing free resources, both external and in-house. The resources are integrated through links to a pivot lexical resource, SALDO, a large morphological and lexical-semantic resource for modern Swedish. SALDO has been selected as the pivot partly because of its size and quality, but also because its form and sense units have been assigned persistent identifiers (PIDs) to which the lexical information in other lexical resources and in corpora are linked.},
	booktitle    = {Proceedings of the 8th International Conference on Language Resources and Evaluation : May 23-25, 2012 / eds. Nicoletta Calzolari },
	author       = {Borin, Lars and Forsberg, Markus and Olsson, Leif-Jöran and Uppström, Jonatan},
	year         = {2012},
	ISBN         = {978-2-9517408-7-7},
	pages        = {3598--3602},
}

@inProceedings{borin-etal-2012-korp-156080,
	title        = {Korp – the corpus infrastructure of Språkbanken},
	abstract     = {We present Korp, the corpus infrastructure of Språkbanken (the Swedish Language Bank). The infrastructure consists of three main components: the Korp corpus pipeline, the Korp backend, and the Korp frontend. The Korp corpus pipeline is used for importing corpora, annotating them, and then exporting the annotated corpora into different formats. An essential feature of the pipeline is the ability to leave existing annotations untouched, both structural and word level annotations, and to use the existing annotations as the foundation of other annotations. The Korp backend consists of a set of REST-based web services for searching in and retrieving information about the corpora. Finally, the Korp frontend is a graphical search interface that interacts with the Korp backend. The interface has been inspired by corpus search interfaces such as SketchEngine, Glossa, and DeepDict, and it uses State Chart XML (SCXML) in order to enable users to bookmark interaction states. We give a functional and technical overview of the three components, followed by a discussion of planned future work.
},
	booktitle    = {Proceedings of LREC 2012. Istanbul: ELRA},
	author       = {Borin, Lars and Forsberg, Markus and Roxendal, Johan},
	year         = {2012},
	volume       = {Accepted},
	pages        = {474–478},
}

@techreport{borin-etal-2007-empowering-53590,
	title        = {Empowering the patient with language techno­logy},
	author       = {Borin, Lars and Grabar, Natalia and Hallett, Catalina and Hardcastle, david and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios and Williams, Sandra and Willis, Alistair},
	year         = {2007},
	publisher    = {Göteborg University},
	address      = {Göteborg},
}

@article{borin-johansson-2014-kulturomik-192931,
	title        = {Kulturomik: Att spana efter språkliga och kulturella förändringar i digitala textarkiv},
	journal      = {Historia i en digital värld},
	author       = {Borin, Lars and Johansson, Richard},
	year         = {2014},
}

@incollection{borin-kokkinakis-2010-literary-124517,
	title        = {Literary onomastics and language technology},
	booktitle    = {Literary education and digital learning},
	author       = {Borin, Lars and Kokkinakis, Dimitrios},
	year         = {2010},
	publisher    = {Information Science Reference},
	address      = {Hershey - New York},
	ISBN         = {978-1-60566-932-8},
	pages        = {53--78},
}

@inProceedings{borin-etal-2007-naming-44954,
	title        = {Naming the past: Named entity and animacy recognition in 19th century Swedish literature},
	booktitle    = {ACL 2007 Workshop on Language Technology for Cultural Heritage Data (LaTeCH 2007)},
	author       = {Borin, Lars and Kokkinakis, Dimitrios and Olsson, Leif-Jöran},
	year         = {2007},
	pages        = {1--8},
}

@inProceedings{borin-kosinski-2016-towards-238147,
	title        = {Towards interactive visualization of public discourse in time and space},
	abstract     = {We report on a proof-of-concept study where we (1) apply NLP tools for extracting political-discourse topics from a large Swedish Twitter dataset; and (2) design an interactive spatiotemporal visualization application allowing humanities and social-science scholars to explore how the tweet topics vary over space and time.},
	booktitle    = {Linköping Electronic Conference Proceedings},
	author       = {Borin, Lars and Kosiński, Tomasz},
	year         = {2016},
	volume       = {126},
	ISBN         = {978-91-7685-733-5},
	pages        = {1--7},
}

@techreport{borin-etal-2011-metadata-142495,
	title        = {Metadata descriptions and other interoperability standards},
	abstract     = {An important aim of META-NORD is to upgrade and harmonize national language resources and tools in order to make them interoperable, within languages and across languages, with respect to their data formats and as far as possible also as regards their content.
Since resources and to some extent tools will remain in one location – one of a number of META-NORD centers – the preferred way of accessing and utilizing resources and tools will be through metadata and APIs, allowing the assembly of on-the-fly tool-chains made up of standardized component language technology tools, processing distributed – and in many cases interlinked – language resources in standardized formats.},
	author       = {Borin, Lars and Lindh, Jonas and Brandt, Martha and Olsson, Leif-Jöran},
	year         = {2011},
}

@inProceedings{borin-etal-2015-here-217351,
	title        = {Here be dragons? The perils and promises of inter-resource lexical-semantic mapping},
	abstract     = {Lexical-semantic knowledges sources are a stock item in the language technologist’s toolbox, having proved their practical worth in many and diverse natural language processing (NLP) applications. In linguistics, lexical semantics comes in many flavors, but in the NLP world, wordnets reign more or less supreme. There has been some promising work utilizing Roget-style thesauruses instead, but wider experimentation is hampered by the limited availability of such resources. The work presented here is a first step in the direction of creating a freely available Roget-style lexical resource for modern Swedish. Here, we explore methods for automatic disambiguation of interresource mappings with the longer-term goal of utilizing similar techniques for automatic enrichment of lexical-semantic resources.},
	booktitle    = {Linköping Electronic Conference Proceedings. Semantic resources and semantic annotation for Natural Language Processing and the Digital Humanities. Workshop at NODALIDA , May 11, 13-18 2015, Vilnius},
	author       = {Borin, Lars and Nieto Piña, Luis and Johansson, Richard},
	year         = {2015},
	volume       = {112},
	ISBN         = {978-91-7519-049-5},
	pages        = {1--11},
}

@inProceedings{borin-olsson-2006-plattformen-116093,
	title        = {ITG-plattformen som korpusverktyg},
	abstract     = {En genomgång och handfast presentation om hur ITG-plattformen kan användas som korpusverktyg.},
	booktitle    = {Fjärde svenska lingvistikkonferensen (Sling 2006), 27–28 april 2006, Stockholm},
	author       = {Borin, Lars and Olsson, Leif-Jöran},
	year         = {2006},
}

@incollection{borin-prutz-2004-wine-33945,
	title        = {New wine in old skins? A corpus investigation of L1 syntactic transfer in learner language},
	booktitle    = {Aston, G., Bernardini, S. & Stewart, D. (eds). Corpora and language learners},
	author       = {Borin, Lars and Prütz, Klas},
	year         = {2004},
	publisher    = {John Benjamins},
	address      = {Amsterdam},
	ISBN         = {90-272-2288-6},
	pages        = {67--87},
}

@incollection{borin-saxena-2004-grammar-33944,
	title        = {Grammar, incorporated},
	booktitle    = {Henrichsen, P. J. (ed). CALL for the Nordic languages},
	author       = {Borin, Lars and Saxena, Anju},
	year         = {2004},
	publisher    = {Samfundslitteratur},
	address      = {Frederiksberg},
	ISBN         = {87-593-1176-2},
	pages        = {125--145},
}

@edited_book{borin-saxena-2013-approaches-184757,
	title        = {Approaches to Measuring Linguistic Differences},
	abstract     = {The present volume collects contributions addressing different aspects of the measurement of linguistic differences, a topic which probably is as old as language itself but at the same time has acquired renewed interest over the last decade or so, reflecting a rapid development of data-intensive computing in all fields of research, including linguistics.},
	editor       = {Borin, Lars and Saxena, Anju},
	year         = {2013},
	publisher    = {De Gruyter Mouton},
	address      = {Berlin},
	ISBN         = {978-3-11-030525-8},
}

@inProceedings{borin-etal-2014-linguistic-198551,
	title        = {Linguistic landscaping of South Asia using digital language resources: Genetic vs. areal linguistics},
	booktitle    = {Proceedings of LREC, May 26-31, 2014, Reykjavik, Iceland},
	author       = {Borin, Lars and Saxena, Anju and Rama, Taraka and Comrie, Bernard},
	year         = {2014},
	ISBN         = {978-2-9517408-8-4},
	pages        = {3137--3144},
}

@inProceedings{borin-etal-2017-clarin-261157,
	title        = {Swe-Clarin: Language resources and technology for Digital Humanities},
	abstract     = {CLARIN is a European Research Infrastructure Consortium (ERIC), which aims at (a) making extensive language-based materials available as primary research data to the humanities and social sciences (HSS); and (b) offering state-of-the-art language technology (LT) as an e-research tool for this purpose, positioning CLARIN centrally in what is often referred to as the digital humanities (DH). The Swedish CLARIN node Swe-Clarin was established in 2015 with funding from the Swedish Research Council.

In this paper, we describe the composition and activities of Swe-Clarin, aiming at meeting the requirements of all HSS and other researchers whose research involves using text and speech as primary research data, and spreading the awareness of what Swe-Clarin can offer these research communities. We focus on one of the central means for doing this: pilot projects conducted in collaboration between HSS researchers and Swe-Clarin, together formulating a research question, the addressing of which requires working with large language-based materials. Four such pilot projects are described in more detail, illustrating research on rhetorical history, second-language acquisition, literature, and political science. A common thread to these projects is an aspiration to meet the challenge of conducting research on the basis of very large amounts of textual data in a consistent way without losing sight of the individual cases making up the mass of data, i.e., to be able to move between Moretti’s “distant” and “close reading” modes. 

While the pilot projects clearly make substantial contributions to DH, they also reveal some needs for more development, and in particular a need for document-level access to the text materials. As a consequence of this, work has now been initiated in Swe-Clarin to meet this need, so that Swe-Clarin together with HSS scholars investigating intricate research questions can take on the methodological challenges of big-data language-based digital humanities.},
	booktitle    = {Digital Humanities 2016. Extended Papers of the International Symposium on Digital Humanities (DH 2016) Växjö, Sweden, November, 7-8, 2016.  Edited by Koraljka Golub, Marcelo Milra.  Vol-2021},
	author       = {Borin, Lars and Tahmasebi, Nina and Volodina, Elena and Ekman, Stefan and Jordan, Caspar and Viklund, Jon and Megyesi, Beáta and Näsman, Jesper and Palmér, Anne and Wirén, Mats and Björkenstam, Kristina and Grigonyte, Gintare and Gustafson Capková, Sofia and Kosiński, Tomasz},
	year         = {2017},
	publisher    = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen.},
	address      = {Aachen},
}

@inProceedings{borin-etal-2007-medical-44951,
	title        = {Medical frames as target and tool},
	booktitle    = {FRAME 2007: Building Frame Semantics resources for Scandinavian and Baltic languages. (Nodalida 2007 workshop proceedings)},
	author       = {Borin, Lars and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios},
	year         = {2007},
	ISBN         = {978-91-976939-0-5},
	pages        = {11--18},
}

@inProceedings{borin-etal-2016-towards-253952,
	title        = {Towards a Big Data View on South Asian Linguistic Diversity},
	abstract     = {South Asia with its rich and diverse linguistic tapestry of hundreds of languages, including many from four major language families, and a long history of intensive language contact, provides rich empirical data for studies of linguistic genealogy, linguistic typology, and language contact. South Asia is often referred to as a linguistic area, a region where, due to close contact and widespread multilingualism, languages have influenced one another to the extent that both related and unrelated languages are more similar on many linguistic levels than we would expect. However, with some rare exceptions, most studies are largely impressionistic, drawing examples from a few languages. In this paper we present our ongoing work aiming at turning the linguistic material available in Grierson’s Linguistic Survey of India (LSI) into a digital language resource, a database suitable for a broad array of linguistic investigations of the languages of South Asia. In addition to this, we aim to contribute to the methodological development of large-scale comparative linguistics drawing on digital language resources, by exploring NLP techniques for extracting linguistic information from free-text language descriptions of the kind found in the LSI.},
	booktitle    = {WILDRE-3 – 3rd Workshop on Indian Language Data: Resources and Evaluation},
	author       = {Borin, Lars and Virk, Shafqat and Saxena, Anju},
	year         = {2016},
	publisher    = {ELRA},
	address      = {Paris},
}

@inProceedings{borin-etal-2018-language-290841,
	title        = {Language technology for digital linguistics: Turning the Linguistic Survey of India into a rich source of linguistic information},
	abstract     = {We present our work aiming at turning the linguistic material available in Grierson’s classical Linguistic Survey of India (LSI) from a printed discursive textual description into a formally structured digital language resource, a database suitable for a broad array of linguistic investigations of the languages of South Asia. While doing so, we develop state-of-the-art language technology for automatically extracting the relevant grammatical information from the text of the LSI, and interactive linguistic information visualization tools for better analysis and comparisons of languages based on their structural and functional features.},
	booktitle    = {Lecture Notes in Computer Science. Computational Linguistics and Intelligent Text Processing, 18th International Conference, CICLing 2017, Budapest, Hungary, April 17–23, 2017},
	author       = {Borin, Lars and Virk, Shafqat and Saxena, Anju},
	year         = {2018},
	publisher    = {Springer},
	address      = {Cham},
}

@inProceedings{borin-etal-2018-many-267534,
	title        = {Many a little makes a mickle - infrastructure component reuse for a massively multilingual linguistic study},
	abstract     = {We present ongoing work aiming at turning the linguistic material available in Grierson’s classical Linguistic Survey of India (LSI) into a digital language resource, a database suitable for a broad array of linguistic investigations of the languages of South Asia and studies relating to language typology and contact linguistics. The project has two concrete main aims: (1) to conduct a linguistic investigation of the claim that South Asia constitutes a linguistic area; (2) to develop state-of-the-art language technology for automatically extracting the relevant information from the text of the LSI. In this presentation we focus on how, in the first part of the project, a number of existing research infrastructure components provided by Swe-Clarin, the Swedish CLARIN consortium, have been ‘recycled’ in order to allow the linguists involved in the project to quickly orient themselves in the vast LSI material, and to be able to provide input to the language technologists designing the tools for information extraction from the descriptive grammars.},
	booktitle    = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18–20 September 2017},
	author       = {Borin, Lars and Virk, Shafqat and Saxena, Anju},
	year         = {2018},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-273-6},
}

@edited_book{borin-volodina-2012-proceedings-188679,
	title        = {Proceedings of the SLTC 2012 workshop on NLP for CALL},
	editor       = {Borin, Lars and Volodina, Elena},
	year         = {2012},
	publisher    = {LiU Electronic Press},
	address      = {Linköping},
}

@inProceedings{bouma-2012-real-158261,
	title        = {Real-Time Persistent Queues and Deques with Logic Variables (Declarative Pearl)},
	abstract     = {             We present a Prolog implementation of real-time persistent
queues and double-ended queues. Our implementation is inspired by
Okasaki’s lazy-functional approach, but relies only on standard Prolog,
comprising of the pure subset plus if-then-else constructs to efficiently
implement guards and meta-calls for convenience. The resulting data
structure is a nice demonstration of the fact that the use of logic variables
to hold the outcome of an unfinished computation can sometimes give
the same kind of elegant and compact solutions as lazy evaluation.
},
	booktitle    = {Proceedings of the 11th International Symposium on Functional and Logic Programming (FLOPS 2012)},
	author       = {Bouma, Gerlof},
	year         = {2012},
	ISBN         = {978-3-642-29821-9},
	pages        = {62----73},
}

@inProceedings{bouma-2019-exploring-289484,
	title        = {Exploring Combining Training Datasets for the CLIN 2019 Shared Task on Cross-genre Gender Detection in Dutch},
	abstract     = {We present our entries to the Shared Task on Cross-genre Gender Detection in Dutch at CLIN 2019. We start from a simple logistic regression model with commonly used features, and consider two ways of combining training data from different sources.Our in-genre models do reasonably well, but the cross-genre models area lot worse. Post-task experiments show no clear systematic advantage of one way of combining training data sources over the other, but do suggest  accuracy  can  be  gained  from  a  better  way  of  setting  model hyperparameters.},
	booktitle    = {CEUR Workshop Proceedings, vol 2453. Proceedings of the Shared Task on Cross-Genre Gender Prediction in Dutch at CLIN29 (GxG-CLIN29) co-located with the 29th Conference on Computational Linguistics in The Netherlands (CLIN29). Groningen, The Netherlands, January 31, 2019. Edited by Hessel Haagsma, Tim Kreutz, Masha Medvedeva, Walter Daelemans and Malvina Nissim},
	author       = {Bouma, Gerlof},
	year         = {2019},
	publisher    = {CEUR-WS.org},
	address      = {Aachen },
}

@inProceedings{bouma-adesam-2013-experiments-177631,
	title        = {Experiments on sentence segmentation in Old Swedish editions},
	booktitle    = {NEALT Proceedings Series },
	author       = {Bouma, Gerlof and Adesam, Yvonne},
	year         = {2013},
	volume       = {18},
	ISBN         = {978-91-7519-587-2},
}

@inProceedings{bouma-adesam-2016-part-254389,
	title        = {Part-of-speech and Morphology Tagging Old Swedish},
	booktitle    = {Proceedings of the Sixth Swedish Language Technology Conference (SLTC) Umeå University, 17-18 November, 2016},
	author       = {Bouma, Gerlof and Adesam, Yvonne},
	year         = {2016},
}

@inProceedings{bouma-adesam-2016-multiword-251825,
	title        = {Multiword Annotation in the Eukalyptus Treebank of Written Swedish},
	booktitle    = {PARSEME, 6th general meeting, 7-8 April 2016, Struga, FYR Macedonia },
	author       = {Bouma, Gerlof and Adesam, Yvonne},
	year         = {2016},
}

@misc{bouma-adesam-2017-proceedings-254435,
	title        = {Proceedings of the NoDaLiDa 2017 Workshop on Processing Historical Language},
	author       = {Bouma, Gerlof and Adesam, Yvonne},
	year         = {2017},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping},
	ISBN         = {978-91-7685-503-4},
}

@inProceedings{bouma-etal-2019-building-289485,
	title        = {Building a Diachronic and Contrastive Parallel Corpus – and an Intended Application in the Form of a Study of Germanic Complex Verb Constructions },
	abstract     = {We present a parallel corpus under construction, which is parallel in diachronically (through time) as well as contrastively (between languages). The corpus is made up of Bible texts spanning almost 6 centuries in 4 languages. Our project's direct purpose of building the corpus is to track the development of verb combinations containing multiple auxiliary verbs through time in German, Dutch, English and Swedish. We will also make the corpus available to other researchers.

In this poster, we discuss the design of the corpus, our selection of sources, issues with bringing together a wide variety of sources, and alignment of the data. We will also touch upon intended future work concerning the automatic linguistic processing needed to facilitate the study of verb constructions, and the methodological challenges of doing corpus linguistic research on the varying quality of annotations produced by automatic methods on materials from such a wide range of origins.},
	booktitle    = {Digital Humanities 2019, 9 -12 July 2019, Utrecht, the Netherlands},
	author       = {Bouma, Gerlof and Coussé, Evie and de Kooter , Dirk-Jan and van der Sijs, Nicoline},
	year         = {2019},
}

@inProceedings{backstrom-etal-2013-automatic-178351,
	title        = {Automatic identification of construction candidates for a Swedish constructicon},
	abstract     = {We present an experiment designed for extracting construction candidates for a Swedish constructicon from text corpora. We have explored the use of hybrid n-grams with the practical goal to discover previously undescribed partially schematic constructions. The experiment was successful, in that quite a few new constructions were discovered. The precision is low, but as a push-button tool for construction discovery, it has proven a valuable tool for the work on a Swedish constructicon.},
	booktitle    = {Proceedings of the workshop on lexical semantic resources for NLP at NODALIDA 2013, May 22-24, 2013, Oslo, Norway. NEALT Proceedings Series 19},
	author       = {Bäckström, Linnéa and Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Prentice, Julia and Sköldberg, Emma},
	year         = {2013},
	pages        = {2--11},
}

@inProceedings{cap-etal-2016-sword-254388,
	title        = {SWORD: Towards Cutting-Edge Swedish Word Processing},
	abstract     = {Despite many years of research on Swedish language technology, there is still no well-documented standard for Swedish word processing covering the whole spectrum from low-level tokenization to morphological analysis and disambiguation. SWORD is a new initiative within the SWE-CLARIN consortium aiming to develop documented standards for Swedish word processing. In this paper, we report on a pilot study of Swedish tokenization, where we compare the output of six different tokenizers on four different text types.  For one text type (Wikipedia articles), we also compare to the tokenization produced by six manual annotators.},
	booktitle    = {Proceedings of the Sixth Swedish Language Technology Conference (SLTC) Umeå University, 17-18 November, 2016},
	author       = {Cap, Fabienne and Adesam, Yvonne and Ahrenberg, Lars and Borin, Lars and Bouma, Gerlof and Forsberg, Markus and Kann, Viggo and Östling, Robert and Smith, Aaron and Wirén, Mats and Nivre, Joakim},
	year         = {2016},
}

@incollection{damova-etal-2014-natural-178094,
	title        = {Natural Language Interaction with Semantic Web Knowledge Bases and Linked Open Data},
	abstract     = {Cultural heritage appears to be a very useful use case for Semantic Web technologies. The domain provides with plenty of circumstances  where linkages between different knowledge sources are required to ensure access to rich information and respond to the needs of professionals dealing with cultural heritage content. Semantic Web technologies offer the technological backbone to meet the requirement of integrating heterogeneous data easily, but they are still more adapted to be consumed by computers than by humans, especially non-engineers or developers. This chapter is about a technique which allows interaction in natural language with semantic knowledge bases. The proposed technique offers a method that allows querying a semantic repository in natural language and obtaining results from it as a coherent text. This unique solution includes several steps of transition from natural language to SPARQL and from RDF to coherent multilingual descriptions, using the Grammatical Framework, GF. The approach builds on a semantic knowledge infrastructure in RDF, it is based on OWLIM-SE and the data integration    method Reason-able View supplied with an ontological reference layer. The latter is connected via formal rules with abstract representations derived from the syntactic trees of natural language input using the GF resource grammar library.
},
	booktitle    = {Towards multilingual Semantic Web},
	author       = {Damova, Mariana and Dannélls, Dana and Mateva, Maria and Enache, Ramona and Ranta, Aarne},
	year         = {2014},
	publisher    = {Springer},
	address      = {Berlin},
	ISBN         = {978-3-662-43585-4},
	pages        = {211--226},
}

@inProceedings{dannells-borin-2012-toward-156502,
	title        = {Toward language independent methodology for generating artwork descriptions – Exploring FrameNet information},
	abstract     = {Today museums and other cultural heritage institutions are increasingly storing object descriptions using semantic web domain ontologies. To make  this content accessible in a multilingual world, it will need to be conveyed in many languages, a language generation task which is domain specific and language dependent. This paper describes how semantic and syntactic information such as that provided in a framenet can contribute to solving this task. It is argued that the kind of information offered by such lexical resources enhances the output quality of a multilingual language generation application, in particular when generating domain specific content.
},
	booktitle    = {EACL 2012 workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH)},
	author       = {Dannélls, Dana and Borin, Lars},
	year         = {2012},
}

@inProceedings{dannells-etal-2013-mapserver-178095,
	title        = {MapServer for Swedish Language Technology},
	abstract     = {The MapServer application used by the Swedish Language Bank provides new opportunities for 
visualizing geographical information found in its large repository of written texts, in particular 
literary texts. The application is capable of  performing coordinate search on the basis of  
recognized  place names and rendering both static and dynamic maps that display their 
geographical locations.
},
	booktitle    = {Digital Humanities},
	author       = {Dannélls, Dana and Borin, Lars and Olsson, Leif-Jöran},
	year         = {2013},
}

@inProceedings{dannells-etal-2014-multilingual-204733,
	title        = {A Multilingual SPARQL-Based Retrieval Interface for Cultural Heritage Objects},
	booktitle    = {Proceedings of the ISWC 2014 Posters & Demonstrations Track a track within the 13th International Semantic Web Conference (ISWC 2014)},
	author       = {Dannélls, Dana and Enache, Ramona and Damova, Mariana},
	year         = {2014},
	volume       = {1272},
	pages        = {205--208},
}

@inProceedings{dannells-etal-2014-using-201951,
	title        = {Using language technology resources and tools to construct Swedish FrameNet},
	abstract     = {Having access to large lexical and grammatical resources when creating a
new language resource
is essential for its enhancement and enrichment. This paper describes the
interplay and interac-
tive utilization of different language technology tools and resources, in p
articular the Swedish
lexicon SALDO and Swedish Constructicon, in the creation of Swedish Frame
Net. We show
how integrating resources in a larger infrastructure is much more than the su
m of the parts. },
	booktitle    = {Proceedings of the Workshop on Lexical and Grammatical Resources for Language Processing, Dublin Ireland, August 24, 2014},
	author       = {Dannélls, Dana and Friberg Heppin, Karin and Ehrlemark, Anna},
	year         = {2014},
	ISBN         = {978-1-873769-44-7},
	pages        = {8--17},
}

@inProceedings{dannells-gruzitis-2014-extracting-198499,
	title        = {Extracting a bilingual semantic grammar from FrameNet-annotated corpora},
	abstract     = {We present the creation of an English-Swedish FrameNet-based grammar in Grammatical Framework. The aim of this research is to make existing framenets computationally accessible for multilingual natural language applications via a common semantic grammar API, and to facilitate the porting of such grammar to other languages. In this paper, we describe the abstract syntax of the semantic grammar while focusing on its automatic extraction possibilities. We have extracted a shared abstract syntax from ~58,500 annotated sentences in Berkeley FrameNet (BFN) and ~3,500 annotated sentences in Swedish FrameNet (SweFN). The abstract syntax defines 769 frame-specific valence patterns that cover 77,8% examples in BFN and 74,9% in SweFN belonging to the shared set of 471 frames. As a side result, we provide a unified method for comparing semantic and syntactic valence patterns across framenets.},
	booktitle    = {Proceedings of the 9th International Conference on Language Resources and Evaluation (LREC)},
	author       = {Dannélls, Dana and Gruzitis, Normunds},
	year         = {2014},
	publisher    = {European Language Resources Association},
	ISBN         = {978-2-9517408-8-4},
}

@inProceedings{dannells-gruzitis-2014-controlled-201944,
	title        = {Controlled Natural Language Generation from a Multilingual FrameNet-based Grammar},
	abstract     = {This paper presents a currently bilingual but potentially multilingual FrameNet-based grammar library implemented in Grammatical Framework. The contribution of this paper is two-fold. First, it offers a methodological approach to automatically generate the grammar based on semantico-syntactic valence patterns extracted from FrameNet-annotated corpora. Second, it provides a proof of concept for two use cases illustrating how the acquired multilingual grammar can be exploited in different CNL applications in the domains of arts and tourism.},
	booktitle    = {Lecture Notes in Computer Science},
	author       = {Dannélls, Dana and Gruzitis, Normunds},
	year         = {2014},
	volume       = {8625},
	ISBN         = {978-3-319-10222-1},
	pages        = {155--166},
}

@inProceedings{dannells-etal-2019-evaluation-278761,
	title        = {Evaluation and refinement of an enhanced OCR process for mass digitisation. },
	abstract     = {Great expectations are placed on the capacity of heritage institutions to make their collections available in digital format. Datadriven research is becoming a key concept within the humanities and social sciences. Kungliga biblioteket’s (National Library of Sweden, KB)collections of digitised newspaper can thus be regarded as unique cultural data sets with information that rarely is conveyed in other media types. The digital format makes it possible to explore these resources in ways not feasible while in printed form. As texts are no longer only read but also subjected to computer based analysis the demand on the correct rendering of the original text increases. OCR technologies for converting images to machine-readable text play a fundamental part in making these resources available, but the effectiveness vary with the type of document being processed. This is evident in relation to the digitisation of newspapers where factors relating to their production, layout and paper quality often impair the OCR production. In order to improve the machine readable text, especially in relation to the digitisation of newspapers, KB initiated the development of an OCR-module where key parameters can be adjusted according to the characteristics of the material being processed. The purpose of this paper is to present the project goals and methods.},
	booktitle    = {Proceedings of the Digital Humanities in the Nordic Countries 4th Conference (DHN 2019), Copenhagen, Denmark, March 5-8, 2019. Edited by: Costanza Navarretta, Manex Agirrezabal, Bente Maegaard},
	author       = {Dannélls, Dana and Johansson, Torsten  and Björk, Lars },
	year         = {2019},
	publisher    = {University of Copenhagen, Faculty of Humanities},
	address      = {Copenhagen},
}

@inProceedings{dannells-olsson-2018-integrating-271181,
	title        = {Integrating language resources in two OCR engines to improve processing of historical Swedish text.},
	abstract     = {We are aiming to address the difficulties that many History and Social Sciences researchers struggle with to bring in non-digitized text into language analysis workflows. In this paper we present the language resources and material we used for training two Optical Character Recognition engines for processing historical Swedish text written in Fraktur (blackletter). The trained models, resources and dictionaries are freely available and accessible through our web service, hosted at Språkbanken, to enable users and developers easy access for extraction of historical Swedish text a that are only available in images for further processing.},
	booktitle    = {CLARIN Annual Conference},
	author       = {Dannélls, Dana and Olsson, Leif-Jöran},
	year         = {2018},
}

@inProceedings{dannells-simon-2020-supervised-289944,
	title        = {Supervised OCR Post-Correction of Historical Swedish Texts: What Role Does the OCR System Play?},
	abstract     = {Current approaches for post-correction of OCR errors offer solutions that are tailored to a specific OCR system. This can be problematic if the post-correction method was trained on a specific OCR
system but have to be applied on the result of another system. Whereas OCR post-correction of historical text has received much attention lately, the question of what role does the OCR system play for the post-correction method has not been addressed. In this study we explore a dataset of
400 documents of historical Swedish text which has been OCR processed by three state-of-the-art OCR systems: Abbyy Finereader, Tesseract and Ocropus. We examine the OCR results of each system and present a supervised machine learning post-correction method that tries to approach
the challenges exhibited by each system. We study the performance of our method by using three evaluation tools: PrimA, Språkbanken evaluation tool and Frontiers Toolkit. Based on the evaluation analysis we discuss the impact each of the OCR systems has on the results of the post-
correction method. We report on quantitative and qualitative results showing varying degrees of OCR post-processing complexity that are important to consider when developing an OCR post-correction method.},
	booktitle    = {Proceedings of the Digital Humanities in the Nordic Countries, 5th Conference, Riga, Latvia, October 21-23, 2020},
	editor       = {Sanita Reinsone and Inguna Skadiņa and Anda Baklāne and Jānis Daugavietis},
	author       = {Dannélls, Dana and Simon, Persson},
	year         = {2020},
	publisher    = {CEUR-WS},
}

@edited_book{desmedt-etal-2013-proceedings-190263,
	title        = {Proceedings of the workshop on Nordic language research infrastructure at NODALIDA 2013, May 22-24, 2013, Oslo, Norway},
	editor       = {De Smedt, Koenrad and Borin, Lars and Lindén, Krister and Maegaard, Bente and Rögnvaldsson, Eiríkur and Vider, Kadri},
	year         = {2013},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7519-585-8},
}

@inProceedings{derbring-etal-2009-subtts-148340,
	title        = {SubTTS: Light-weight automatic reading of subtitles},
	abstract     = {We present a simple tool that enables the computer to read subtitles of movies and TV shows aloud. The tool works by reading subtitle files, which can be freely downloaded or extracted from a DVD using existing tools, and read the text aloud through a speech synthesizer.

The target audience are people who have trouble reading subtitles while watching a movie, for example people with visual impairments and people with reading difficulties, such as dyslexia. The application will be evaluated together with user from these groups to see if this could be an accepted solution to their need.
},
	booktitle    = {Proceedings of the 17th Nordic Conference of Computational Linguistics NODALIDA 2009},
	author       = {Derbring, Sandra and Ljunglöf, Peter and Olsson, Maria},
	year         = {2009},
}

@article{dupplaw-etal-2014-information-195563,
	title        = {Information extraction from multimedia web documents: an open-source platform and testbed},
	abstract     = {The LivingKnowledge project aimed to enhance the current state of the art in search, retrieval and knowledge management on the web by advancing the use of sentiment and opinion analysis within multimedia applications. To achieve this aim, a diverse set of novel and complementary analysis techniques have been integrated into a single, but extensible software platform on which such applications can be built. The platform combines state-of-the-art techniques for extracting facts, opinions and sentiment from multimedia documents, and unlike earlier platforms, it exploits both visual and textual techniques to support multimedia information retrieval. Foreseeing the usefulness of this software in the wider community, the platform has been made generally available as an open-source project. This paper describes the platform design, gives an overview of the analysis algorithms integrated into the system and describes two applications that utilise the system for multimedia information retrieval.},
	journal      = {International Journal of Multimedia Information Retrieval},
	author       = {Dupplaw, David and Matthews, Michael and Johansson, Richard and Boato, Giulia and Costanzo, Andrea and Fontani, Marco and Minack, Enrico and Demidova, Elena and Blanco, Roi and Griffiths, Thomas and Lewis, Paul and Hare, Jonathon and Moschitti, Alessandro},
	year         = {2014},
	volume       = {3},
	number       = {2},
	pages        = {97--111},
}

@article{eckhoff-etal-2018-proiel-265108,
	title        = {The PROIEL treebank family: a standard for early attestations of Indo-European languages},
	abstract     = {This article describes a family of dependency treebanks of early attestations of Indo-European languages originating in the parallel treebank built by the members of the project pragmatic resources in old Indo-European languages. The treebanks all share a set of open-source software tools, including a web annotation interface, and a set of annotation schemes and guidelines developed especially for the project languages. The treebanks use an enriched dependency grammar scheme complemented by detailed morphological tags, which have proved sufficient to give detailed descriptions of these richly inflected languages, and which have been easy to adapt to new languages. We describe the tools and annotation schemes and discuss some challenges posed by the various languages that have been annotated. We also discuss problems with tokenisation, sentence division and lemmatisation, commonly encountered in ancient and mediaeval texts, and challenges associated with low levels of standardisation and ongoing morphological and syntactic change.},
	journal      = {Language Resources and Evaluation},
	author       = {Eckhoff, H. and Bech, K. and Bouma, Gerlof and Eide, K. and Haug, D. and Haugen, O. E. and Johndal, M.},
	year         = {2018},
	volume       = {52},
	number       = {1},
	pages        = {29--65},
}

@inProceedings{edstrom-etal-2018-ageism-267250,
	title        = {Ageism and Swedish news media},
	abstract     = {Ageism can be seen as a “social disease”, a casual or systematic prejudice, stereotyping and discriminating against individuals or groups on the basis of their age. This is an area of growing concern, particularly the role of mainstream media in relationship to ageism. A valuable and important step is to understand the presence of ageing and older age how different types of online news media. 
The main objective of this pilot work is to test, collate and produce evidence from Swedish news media representations of older ages and ageing. METHOD(S)
Two pilot studies/experiments; 
first names and their frequencies of the carriers’ age according to Statistics Sweden (SCB) and their presence  in 39 online news between 2015 and 2018. ( 4, 7 millions texts).
using general pattern matching techniques with regular expressions and applying them to 13 issues (1994, 2001-13) of Göteborgs-Posten (Swedish news corpora). 
Definition: Older persons ≥60 years. (25 %  of the population in Sweden is over 60 yearsRESULTS AND CONCLUSIONS: Clear and consistent differences of how various age spans are represented in the news. 
20-50 year olds is highly over represented compared with the Swedish population, while 0-24 and people over 54 are underrepresented, especially women.
Pattern matching exhibits similar characteristics with the exception of obituaries where the elderly mentions are much more frequent.Our pilot studies confirm the introspective view of underrepresentation of old age and older people in or trends can be revealed within a larger time span and synchronic media sources. More studies are required and in the near future we plan to improve, scale and apply our methodology on both synchronic and diachronic data using e.g. available text corpora and try to get a solid perspective on whether any differences or trends can be revealed within a larger time span



},
	booktitle    = {24th Nordic Congress of Gerontoloy (NKG). Oslo, Norway: 2-4 May 2018 },
	author       = {Edström, Maria and Kokkinakis, Dimitrios and Berggren, Max},
	year         = {2018},
}

@inProceedings{ehrlemark-etal-2016-retrieving-242241,
	title        = {Retrieving Occurrences of Grammatical Constructions},
	abstract     = {Finding authentic examples of grammatical constructions is central in constructionist approaches to linguistics, language processing, and second language learning. In this paper, we address this problem as an information retrieval (IR) task. To facilitate research in this area, we built a benchmark collection by annotating the occurrences of six constructions in a Swedish corpus. Furthermore, we implemented a simple and flexible retrieval system for finding construction occurrences, in which the user specifies a ranking function using lexical-semantic similarities (lexicon-based or distributional). The system was evaluated using standard IR metrics on the new benchmark, and we saw that lexical-semantical rerankers improve significantly over a purely surface-oriented system, but must be carefully tailored for each individual construction.
},
	booktitle    = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics : Technical Papers, December 11–17; Osaka, Japan},
	author       = {Ehrlemark, Anna and Johansson, Richard and Lyngfelt, Benjamin},
	year         = {2016},
	ISBN         = {978-4-87974-702-0},
}

@inProceedings{r?dveneide-2019-swedish-289474,
	title        = {The Swedish PoliGraph},
	abstract     = {As part of a larger project on argument mining of Swedish parliamentary data, we have created a semantic graph that, together with named entity recognition and resolution (NER), should make it easier to establish connections between arguments in a given debate. The graph is essentially a semantic database that keeps track of Members of Parliament (MPs), in particular their presence in the parliament and activity in debates, but also party affiliation and participation in commissions. The hope is that the Swedish PoliGraph will enable us to perform named entity resolution on debates in the Swedish parliament with a high accuracy, with the aim of determining to whom an argument is directed.},
	booktitle    = {Proceedings of the 6th Workshop on Argument Mining, August 1, 2019 Florence, Italy / Benno Stein, Henning Wachsmuth (Editors)},
	author       = {Rødven-Eide, Stian },
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-33-8},
}

@inProceedings{r?dveneide-etal-2016-swedish-250073,
	title        = {The Swedish Culturomics Gigaword Corpus: A One Billion Word Swedish Reference Dataset for NLP},
	abstract     = {In this paper we present a dataset of contemporary Swedish containing one billion words. The dataset consists of a wide range of sources, all annotated using a state-of-the-art corpus annotation pipeline, and is intended to be a static and clearly versioned dataset. This will facilitate reproducibility of experiments across institutions and make it easier to compare NLP algorithms on contemporary Swedish. The dataset contains sentences from 1950 to 2015 and has been carefully designed to feature a good mix of genres balanced over each included decade. The sources include literary, journalistic, academic and legal texts, as well as blogs and web forum entries.},
	booktitle    = {Linköping Electronic Conference Proceedings. Digital Humanities 2016. From Digitization to Knowledge 2016: Resources and Methods for Semantic Processing of Digital Works/Texts, July 11, 2016, Krakow, Poland},
	author       = {Rødven-Eide, Stian  and Tahmasebi, Nina and Borin, Lars},
	year         = {2016},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-733-5},
}

@inProceedings{eklund-kokkinakis-2012-drug-165309,
	title        = {Drug interests revealed by a public health portal},
	abstract     = {Online health information seeking has become an important part of people's everyday lives. However, studies have shown that many of those have problems forming effective queries. In order to develop better support and tools for assisting people in health-related query formation we have to gain a deeper understanding into their information seeking behaviour in relation to key issues, such as medication and drugs. The present study attempts to understand the semantics of the users' information needs with respect to medication-related information. Search log queries from the Swedish 1177.se health portal were automatically annotated and categorized according to relevant background knowledge sources. Understanding the semantics of information needs can enable optimization and tailoring of (official) health related information presented to the online consumer, provide better terminology support and thematic coding of the queries and in the long run better models of consumers’ information needs.
},
	booktitle    = {Proceedings of the SLTC-Workshop: Exploratory Query-log Analysis. Lund, Sweden.},
	author       = {Eklund, Ann-Marie and Kokkinakis, Dimitrios},
	year         = {2012},
	pages        = {2},
}

@misc{ellison-wilhelmsson-2001-implementation-249271,
	title        = {En implementation för domänoberoende textkategorisering},
	author       = {Ellison, Magnus and Wilhelmsson, Kenneth},
	year         = {2001},
	publisher    = {Datavetenskapligt program, Datalingvistikprogrammet},
	address      = {Göteborg},
}

@edited_book{ey?orsson-etal-2013-proceedings-190256,
	title        = {Proceedings of the workshop on computational historical linguistics at NODALIDA 2013, May 22-24, 2013, Oslo, Norway},
	editor       = {Eyþórsson, Þórhallur and Borin, Lars and Haug, Dag and Rögnvaldsson, Eiríkur},
	year         = {2013},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7519-587-2},
}

@inProceedings{forsberg-etal-2015-forensic-222113,
	title        = {A forensic and sociophonetic perspective on a new corpus of young urban Swedish},
	booktitle    = {10th UK Language Variation and Change (UKLVC) conference 1-3/9 2015, York, UK},
	author       = {Forsberg, Julia and Gross, Johan and Lindh, Jonas and Åkesson, Joel},
	year         = {2015},
}

@inProceedings{forsberg-etal-2015-speaker-220340,
	title        = {Speaker comparison evaluation using a new corpus of urban speech},
	booktitle    = {24th Annual Conference of the International Association for Forensic Phonetics and Acoustics, 8-10/7 2015, Leiden},
	author       = {Forsberg, Julia and Gross, Johan and Lindh, Jonas and Åkesson, Joel},
	year         = {2015},
	pages        = {46--47},
}

@article{forsberg-2011-green-140694,
	title        = {Green resources in plain sight: opening up the SweFN++ project},
	abstract     = {SweFN++ is a project focused on the cre-
ation and curation of Swedish lexical re-
sources geared towards language technol-
ogy applications. An important theme of
the project is openness and its realization
as a lexical infrastructure.
We give a short overview of the project,
elaborate on what we mean by openness,
and present the current state of the lexical infrastructure.
},
	journal      = {Proceedings of the Nodalida 2011 Workshop on visibility and availability of LT resources},
	author       = {Forsberg, Markus},
	year         = {2011},
}

@inProceedings{forsberg-hulden-2016-deriving-237061,
	title        = {Deriving Morphological Analyzers from Example Inflections},
	abstract     = {This paper presents a semi-automatic method to derive morphological analyzers from a limited number of example inflections suitable for languages with alphabetic writing systems. The system we present learns the inflectional behavior of morphological paradigms from examples and converts the learned paradigms into a finite-state transducer that is able to map inflected forms of previously unseen words into lemmas and corresponding morphosyntactic descriptions. We evaluate the system when provided with inflection tables for several languages collected from the Wiktionary.},
	booktitle    = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC-2016) May 23-28, 2016, Portorož, Slovenia},
	author       = {Forsberg, Markus and Hulden, Mans},
	year         = {2016},
	ISBN         = {978-2-9517408-9-1},
}

@incollection{forsberg-hulden-2016-learning-240208,
	title        = {Learning Transducer Models for Morphological Analysis from Example Inflections},
	abstract     = {In this paper, we present a method to convert morphological inflection tables into unweighted and weighted finite transducers that perform parsing and generation. These transducers model the inflectional behavior of morphological paradigms induced from examples and can map inflected forms of previously unseen word forms into their lemmas and give morphosyntactic descriptions of them. The system is evaluated on several languages with data collected from the Wiktionary.},
	booktitle    = {Proceedings of the SIGFSM Workshop on Statistical NLP and Weighted Automata. Association for Computational Linguistics. August 12, 2016 Berlin, Germany},
	author       = {Forsberg, Markus and Hulden, Mans},
	year         = {2016},
	publisher    = {ACL},
	address      = {Stroudsburg, PA, USA},
	ISBN         = {978-1-945626-13-5 },
	pages        = {42--50},
}

@article{forsberg-etal-2014-from-208123,
	title        = {From construction candidates to constructicon entries: An experiment using semi-automatic methods for identifying constructions in corpora},
	abstract     = { We present an experiment where natural language processing tools are used to automatically identify potential constructions in a corpus. e experiment was conducted as part of the ongoing efforts to develop a Swedish constructicon. Using an automatic method to suggest constructions has advantages not only for efficiency but also methodologically: it forces the analyst to look more objec-tively at the constructions actually occurring in corpora, as opposed to focusing on “interesting” constructions only. As a heuristic for identifying potential con-structions, the method has proved successful, yielding about 200 (out of 1,200) highly relevant construction candidates.},
	journal      = {Constructions and Frames},
	author       = {Forsberg, Markus and Johansson, Richard and Bäckström, Linnéa and Borin, Lars and Lyngfelt, Benjamin and Olofsson, Joel and Prentice, Julia},
	year         = {2014},
	volume       = {6},
	number       = {1, 2014},
	pages        = {114--135},
}

@inProceedings{forsberg-lager-2012-cloud-156078,
	title        = {Cloud Logic Programming for Integrating Language Technology Resources},
	abstract     = {The main goal of the CLT Cloud project is to equip lexica, morphological processors, parsers and other software components developed within CLT (Centre of Language Technology) with so called web API:s, thus making them available on the Internet in the form of web services. We present a proof-of-concept implementation of the CLT Cloud server where we use the logic programming language Prolog for composing and aggregating existing web services into new web services in a way that encourages creative exploration and rapid prototyping of LT applications.
},
	booktitle    = {Proceedings of LREC 2012},
	author       = {Forsberg, Markus and Lager, Torbjörn},
	year         = {2012},
	volume       = {Accepted},
}

@inProceedings{forsbom-wilhelmsson-2010-revision-259876,
	title        = {Revision of Part-of-Speech Tagging in Stockholm Umeå Corpus 2.0},
	abstract     = {Many parsers use a part-of-speech tagger as a first step in parsing. The accuracy of the tagger naturally affects the performance of the parser. In this experiment, we revise 1500+ proposed errors in SUC 2.0 that were mainly found during work with schema parsing, and evaluate tagger instances trained on the revised corpus. The revisions turned out to be beneficial also for the taggers.},
	booktitle    = {Proceedings of the Third Swedish Language Technology Conference (SLTC), Linköping, Sverige},
	author       = {Forsbom, Eva and Wilhelmsson, Kenneth},
	year         = {2010},
	address      = {Linköping},
}

@inProceedings{francois-etal-2016-svalex-248142,
	title        = {SVALex: a CEFR-graded lexical resource for Swedish foreign and second language learners.},
	abstract     = {The paper introduces SVALex, a lexical resource primarily aimed at learners and teachers of Swedish as a foreign and second language that describes the distribution of 15,681 words and expressions across the Common European Framework of Reference (CEFR). The resource  is  based  on  a  corpus  of  coursebook  texts,  and  thus  describes  receptive  vocabulary  learners  are  exposed  to  during  reading activities, as opposed to productive vocabulary they use when speaking or writing. The paper describes the methodology applied to create the list and to estimate the frequency distribution. It also discusses some chracteristics of the resulting resource and compares it to other lexical resources for Swedish.  An interesting feature of this resource is the possibility to separate the wheat from the chaff, identifying the core vocabulary at each level, i.e.  vocabulary shared by several coursebook writers at each level, from peripheral vocabulary which
is used by the minority of the coursebook writers.},
	booktitle    = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016), May 23-28, 2016   Portorož, Slovenia},
	author       = {François, Thomas  and Volodina, Elena and Pilán, Ildikó and Tack, Anaïs },
	year         = {2016},
	publisher    = {European Language Resources Association},
	address      = {Paris},
	ISBN         = {978-2-9517408-9-1},
}

@inProceedings{fraser-etal-2019-multilingual-280280,
	title        = {Multilingual prediction of Alzheimer’s disease through domain adaptation and concept-based language modelling},
	abstract     = {There is growing evidence that changes in speech and language may be early markers of dementia, but much of the previous NLP work in this area has been limited by the size of the available datasets. Here, we compare several methods of domain adaptation to augment a small French dataset of picture descriptions (n = 57) with a much larger English dataset (n = 550), for the task of automatically distinguishing participants with dementia from controls. The first challenge is to identify a set of features that transfer across languages; in addition to previously used features based on information units, we introduce a new set of features to model the order in which information units are produced by dementia patients and controls. These concept-based language model features improve classification performance in both English and French separately, and the best result (AUC = 0.89) is achieved using the multilingual training set with a combination of information and language model features.},
	booktitle    = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), June 2 - June 7, 2019, Minneapolis, Minnesota /  Jill Burstein, Christy Doran, Thamar Solorio (Editors) },
	author       = {Fraser, Kathleen and Linz, Nicklas and Lundholm Fors, Kristina and Rudzicz, Frank and König, Alexandra and Alexandersson, Jan and Robert, Philippe and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-950737-13-0},
}

@inProceedings{fraser-etal-2018-improving-264397,
	title        = {Improving the Sensitivity and Specificity of MCI Screening with Linguistic Information.},
	abstract     = {The Mini-Mental State Exam (MMSE) is a screening tool for cognitive impairment. It has been extensively validated and is widely used, but has been criticized as not being effective in detecting mild cognitive impairment (MCI). In this study, we examine the utility of augmenting MMSE scores with automatically extracted linguistic information from a narrative speech task to better differentiate between individuals with MCI and healthy controls in a Swedish population. We find that with the addition of just four linguistic features, the F score (measuring a trade-off between sensitivity and specificity) is improved from 0.67 to 0.81 in logistic regression classification. These preliminary results suggest that the accuracy of traditional screening tools may be improved through the addition of computerized language analysis.},
	booktitle    = {Proceedings of the LREC workshop: Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric impairments (RaPID-2). 8th  of May 2018,  Miyazaki, Japan  / Dimitrios  Kokkinakis  (ed.)},
	author       = {Fraser, Kathleen and Lundholm Fors, Kristina and Eckerström, Marie and Themistocleous, Charalambos and Kokkinakis, Dimitrios},
	year         = {2018},
	ISBN         = {979-10-95546-26-9},
}

@article{fraser-etal-2019-predicting-282807,
	title        = {Predicting MCI Status From Multimodal Language Data Using Cascaded Classifiers},
	abstract     = {Recent work has indicated the potential utility of automated language analysis for the detection of mild cognitive impairment (MCI). Most studies combining language processing and machine learning for the prediction of MCI focus on a single language task; here, we consider a cascaded approach to combine data from multiple language tasks. A cohort of 26 MCI participants and 29 healthy controls completed three language tasks: picture description, reading silently, and reading aloud. Information from each task is captured through different modes (audio, text, eye-tracking, and comprehension questions). Features are extracted from each mode, and used to train a series of cascaded classifiers which output predictions at the level of features, modes, tasks, and finally at the overall session level. The best classification result is achieved through combining the data at the task level (AUC = 0.88, accuracy = 0.83). This outperforms a classifier trained on neuropsychological test scores (AUC = 0.75, accuracy = 0.65) as well as the "early fusion" approach to multimodal classification (AUC = 0.79, accuracy = 0.70). By combining the predictions from the multimodal language classifier and the neuropsychological classifier, this result can be further improved to AUC = 0.90 and accuracy = 0.84. In a correlation analysis, language classifier predictions are found to be moderately correlated (rho = 0.42) with participant scores on the Rey Auditory Verbal Learning Test (RAVLT). The cascaded approach for multimodal classification improves both system performance and interpretability. This modular architecture can be easily generalized to incorporate different types of classifiers as well as other heterogeneous sources of data (imaging, metabolic, etc.).},
	journal      = {Frontiers in Aging Neuroscience},
	author       = {Fraser, Kathleen and Lundholm Fors, Kristina and Eckerström, Marie and Öhman, Fredrik and Kokkinakis, Dimitrios},
	year         = {2019},
	volume       = {11},
	number       = {205},
}

@article{fraser-etal-2019-multilingual-270713,
	title        = {Multilingual word embeddings for the assessment of narrative speech in mild cognitive impairment},
	abstract     = {We analyze the information content of narrative speech samples from individuals with mild cognitive impairment (MCI), in both English and Swedish, using a combination of supervised and unsupervised learning techniques. We extract information units using topic models trained on word embeddings in monolingual and multilingual spaces, and find that the multilingual approach leads to significantly better classification accuracies than training on the target language alone. In many cases, we find that augmenting the topic model training corpus with additional clinical data from a different language is more effective than training on additional monolingual data from healthy controls. Ultimately we are able to distinguish MCI speakers from healthy older adults with accuracies of up to 63% (English) and 72% (Swedish) on the basis of information content alone. We also compare our method against previous results measuring information content in Alzheimer's disease, and report an improvement over other topic-modeling approaches. Furthermore, our results support the hypothesis that subtle differences in language can be detected in narrative speech, even at the very early stages of cognitive decline, when scores on screening tools such as the Mini-Mental State Exam are still in the “normal” range.},
	journal      = {Computer Speech and Language},
	author       = {Fraser, Kathleen and Lundholm Fors, Kristina and Kokkinakis, Dimitrios},
	year         = {2019},
	volume       = {53},
	pages        = {121--139},
}

@inProceedings{fraser-etal-2017-analysis-257840,
	title        = {An analysis of eye-movements during reading for the detection of mild cognitive impairment},
	abstract     = {We present a machine learning analysis of eye-tracking data for the detection of mild cognitive impairment, a decline in cognitive abilities that is associated with an increased risk of developing dementia. We compare two experimental configurations (reading aloud versus reading silently), as
well as two methods of combining information from the two trials (concatenation and merging). Additionally, we annotate the words being read with information about their frequency and syntactic category, and use these annotations to generate new features. Ultimately, we are able to distinguish between participants with and without cognitive impairment with up to 86% accuracy.},
	booktitle    = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing. September 9-11, 2017 Copenhagen, Denmark / Editors Martha Palmer, Rebecca Hwa, Sebastian Riedel   },
	author       = {Fraser, Kathleen and Lundholm Fors, Kristina and Kokkinakis, Dimitrios and Nordlund, Arto},
	year         = {2017},
	publisher    = {Association for Computational Linguistics },
	ISBN         = {978-1-945626-83-8},
}

@inProceedings{fribergheppin-dannells-2015-polysemy-218276,
	title        = {Polysemy and questions of lumping or splitting in the construction of Swedish FrameNet},
	abstract     = {When working on a lexical resource, such as Swedish FrameNet (SweFN), assumptions based on linguistic theories are made, and methodological directions based upon
them  are  taken.    These directions  often need  to  be  revised  when  not  beforehand foreseen problems arise.  One assumption that was made already in the early development  stages  of  SweFN  was  that  each lexical  entry  from  the  reference  lexicon, SALDO, would evoke only one semantic frame in SweFN. If a lexical entry evoked more than one frame, it entailed more than one sense and therefore required a new entry in the lexicon.
As  work  progressed,  this  inclination  towards  splitting,  in  the  perpetual  lumpers and splitters discussion proved to be progressively untenable. This paper will give an account of the problems which  were  encountered  and  suggestions for solutions on polysemy issues forcing a discussion on lumping or splitting.},
	booktitle    = {Proceedings of the Workshop on Semantic resources and Semantic Annotation for Natural Language Processing and the Digital Humanities at NODALIDA 2015, Vilnius, 11th May, 2015},
	author       = {Friberg Heppin, Karin and Dannélls, Dana},
	year         = {2015},
	pages        = {12--20},
}

@inProceedings{fribergheppin-toporowskagronostaj-2012-rocky-158473,
	title        = {The Rocky Road towards a Swedish FrameNet – Creating SweFN},
	abstract     = {The Swedish FrameNet project, SweFN, is a lexical resource under development, designed to support both humans and different
applications within language technology, such as text generation, text understanding and information extraction. SweFN is constructed
in line with the Berkeley FrameNet and the project is aiming to make it a free, full-scale, multi-functional lexical resource covering
morphological, syntactic, and semantic descriptions of 50,000 entries. Frames populated by lexical units belonging to the general
vocabulary dominate in SweFN, but there are also frames from the medical and the art domain. As Swedish is a language with very
productive compounding, special attention is paid to semantic relations within the one word compounds which populate the frames.
This is of relevance for understanding the meaning of the compounds and for capturing the semantic and syntactic alternations which
are brought about in the course of compounding. SweFN is a component within a complex of modern and historical lexicon resources
named SweFN++, available at <http://spraakbanken.gu.se/eng/swefn>.},
	booktitle    = {Proceedings of the Eighth conference on International Language Resources and Evaluation (LREC-2012); Istanbul, Turkey},
	author       = {Friberg Heppin, Karin and Toporowska Gronostaj, Maria},
	year         = {2012},
	pages        = {256--261},
}

@article{fribergheppin-toporowskagronostaj-2014-exploiting-210058,
	title        = {Exploiting FrameNet for Swedish: Mismatch?},
	abstract     = {This paper presents work on developing Swedish FrameNet (SweFN) as a resource analogous to the original Berkeley-based FrameNet. We describe the theoretical and practical basics of FrameNet, and articulate some multilingual issues that arise in expanding a linguistic resource from one language to another. SweFN uses FrameNet as a starting point in order to save time and effort, and to make it compatible with other FrameNet-based resources. The lexical units are from the pivot lexicon SALDO, making SweFN compatible with other resources of the larger project SweFN++. It is a corpus-based resource, meant to support tasks within natural language processing relying on semantic data.},
	journal      = {Constructions and Frames},
	author       = {Friberg Heppin, Karin and Toporowska Gronostaj, Maria},
	year         = {2014},
	volume       = {6},
	number       = {1},
	pages        = {52--72},
}

@inProceedings{fridlund-etal-2019-trawling-287968,
	title        = {Trawling for Terrorists: A Big Data Analysis of Conceptual Meanings and Contexts in Swedish Newspapers, 1780–1926},
	abstract     = {The conceptual history of terrorism has to a significant extent been studied through canonical texts or historical key figures or organisations. However, through the increasing digitization of text materials convential research questions can now be approached from new angles or established results verified on the basis of exhaustive collections of data, rather than limited samples. Specifically, we are interested in evaluating and expanding on prior research claims regarding the meanings and con- texts associated with the concepts terrorism and terrorist up until the twentieth century in a Swedish context. The investigation is guided by the following research questions: What historical meanings of the concept of terrorism were expressed in the Swedish newspaper discourse? What social and ideological contexts and violent political practices was the concept primarily associated with before the First World War?},
	booktitle    = {Proceedings of the 5th International Workshop on Computational History (HistoInformatics 2019) co-located with the 23rd International Conference on Theory and Practice of Digital Libraries (TPDL 2019) Oslo, Norway, September 12th, 2019, Melvin Wevers, Mohammed Hasanuzzaman, Gaël Dias, Marten Düring, & Adam Jatowt, eds. },
	author       = {Fridlund, Mats and Olsson, Leif-Jöran and Brodén, Daniel and Borin, Lars},
	year         = {2019},
	publisher    = {CEUR-WS},
	address      = {Aachen},
}

@inProceedings{ghanimifard-johansson-2015-enriching-222749,
	title        = {Enriching Word-sense Embeddings with Translational Context},
	abstract     = {Vector-space models derived from corpora are an effective way to learn a representation of word meaning directly from data, and these models have many uses in practical applications. A  number  of  unsupervised  approaches  have  been  proposed to  automatically  learn  representations of word senses
directly  from  corpora,  but since  these  methods  use  no  information
but the words themselves, they sometimes miss distinctions that could be possible to make if more information were available.

In this paper, we present a general framework that we call context enrichment that incorporates  external  information  during the  training  of  multi-sense  vector-space models.   Our  approach  is  agnostic  as  to which external signal is used to enrich the context, but in this work we consider the use of translations
as the source of enrichment. We evaluated the models trained using the translation-enriched context using
several similarity benchmarks and a word analogy test set. In all our evaluations, the enriched  model  outperformed  the  purely word-based baseline soundly.
},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing},
	editor       = {Galia Angelova and Kalina Bontcheva and Ruslan Mitkov. International Conference and Hissar and Bulgaria 7–9 September and 2015},
	author       = {Ghanimifard, Mehdi and Johansson, Richard},
	year         = {2015},
	pages        = {208--215},
}

@inProceedings{ghosh-etal-2011-shallow-151356,
	title        = {Shallow Discourse Parsing with Conditional Random Fields},
	abstract     = {Parsing discourse is a challenging natural language processing task. In this paper we take a data driven approach to identify  arguments of explicit discourse connectives. In contrast to previous work we do not make any assumptions on the span of arguments and consider parsing as a token-level sequence labeling task. We design the argument segmentation task as a cascade of decisions based on conditional random fields (CRFs). We train the CRFs on lexical, syntactic and semantic features extracted from the Penn Discourse Treebank and evaluate feature combinations on the commonly used test split. We show that the best combination of features includes syntactic and semantic features. The comparative error analysis investigates the performance variability over connective types and argument positions.},
	booktitle    = {Proceedings of 5th International Joint Conference on Natural Language Processing; editors Haifeng Wang and David Yarowsky; Chiang Mai, Thailand; November 8-13, 2011},
	author       = {Ghosh, Sucheta and Johansson, Richard and Riccardi, Giuseppe and Tonelli, Sara},
	year         = {2011},
	pages        = {1071--1079},
}

@inProceedings{ghosh-etal-2012-improving-156399,
	title        = {Improving the Recall of a Discourse Parser by Constraint-based Postprocessing},
	abstract     = {We describe two constraint-based methods that can be used to improve the recall of a shallow discourse parser based on conditional random field chunking. These methods use a set of natural structural constraints as well as others that follow from the annotation guidelines of the Penn Discourse Treebank. We evaluated the resulting systems on the standard test set of the PDTB and achieved a rebalancing of precision and recall with improved F-measures across the board. This was especially notable when we used evaluation metrics taking partial matches into account; for these measures, we achieved F-measure improvements of several points.},
	booktitle    = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12); Istanbul, Turkey; May 23-25},
	author       = {Ghosh, Sucheta and Johansson, Richard and Riccardi, Giuseppe and Tonelli, Sara},
	year         = {2012},
	ISBN         = {978-2-9517408-7-7},
	pages        = {2791--2794},
}

@inProceedings{ghosh-etal-2012-global-157440,
	title        = {Global Features for Shallow Discourse Parsing},
	abstract     = {A coherently related group of sentences may be referred to as a discourse. In this paper we address the problem of parsing coherence relations as defined in the Penn Discourse Tree Bank (PDTB). A good model for discourse structure analysis needs to account both for local dependencies at the token-level and for global dependencies and statistics. We present techniques on using inter-sentential or sentence-level (global), data-driven, non-grammatical features in the task of parsing
discourse. The parser model follows up previous approach based on using token-level (local) features with conditional random fields for shallow discourse parsing, which is lacking in structural knowledge
of discourse. The parser adopts a two-stage approach where first the local constraints are applied and then global constraints are used on a reduced weighted search space (n-best). In the latter stage we experiment with different rerankers
trained on the first stage n-best parses, which are generated using lexico-syntactic local features. The two-stage parser yields significant improvements over the best performing model of discourse parser on the PDTB corpus.},
	booktitle    = {Proceedings of the 13th Annual Meeting of the Special Interest Group on Discourse and Dialogue (SIGDIAL)},
	author       = {Ghosh, Sucheta and Riccardi, Giuseppe and Johansson, Richard},
	year         = {2012},
	pages        = {150--159},
}

@inProceedings{ghosh-etal-2013-mining-188844,
	title        = {Mining Fine-grained Opinion Expressions with Shallow Parsing},
	abstract     = {Opinion analysis deals with public opinions and trends, but subjective language
is highly ambiguous. In this paper, we
follow a simple data-driven technique to
learn fine-grained opinions. We select
an intersection set of Wall Street Journal documents that is included both in the
Penn Discourse Tree Bank (PDTB) and in
the Multi-Perspective Question Answering (MPQA) corpus. This is done in order to explore the usefulness of discourse-level structure to facilitate the extraction
of fine-grained opinion expressions. Here
we perform shallow parsing of MPQA expressions with connective based discourse
structure, and then also with Named Entities (NE) and some syntax features using
conditional random fields; the latter feature set is basically a collection of NEs and
a bundle of features that is proved to be
useful in a shallow discourse parsing task.
We found that both of the feature-sets are
useful to improve our baseline at different
levels of this fine-grained opinion expression mining task.},
	booktitle    = {Proceedings of the International Conference Recent Advances in Natural Language Processing},
	author       = {Ghosh, Sucheta and Tonelli, Sara and Johansson, Richard},
	year         = {2013},
	pages        = {302--310},
}

@inProceedings{ghosh-etal-2011-discourse-151350,
	title        = {End-to-End Discourse Parser Evaluation},
	abstract     = {We are interested in the problem of discourse parsing of textual documents. We present a novel end-to-end discourse parser that, given a plain text document in input, identifies the discourse relations in the text, assigns them a semantic label and detects discourse arguments spans. The parsing architecture is based on a cascade of decisions supported by Conditional Random Fields (CRF). We train and evaluate three different parsers using the PDTB corpus. The three system versions are compared to evaluate their robustness with respect to deep/shallow and automatically extracted syntactic features.},
	booktitle    = {Fifth IEEE International Conference on Semantic Computing (ICSC), 2011; September 18-21, 2011; Palo Alto, United States},
	author       = {Ghosh, Sucheta and Tonelli, Sara and Riccardi, Giuseppe and Johansson, Richard},
	year         = {2011},
	ISBN         = {978-1-4577-1648-5},
}

@inProceedings{graen-etal-2019-modelling-284429,
	title        = {Modelling large parallel corpora: The Zurich Parallel Corpus Collection},
	abstract     = {Text corpora come in many different shapes and sizes and carry heterogeneous annotations, depending on their purpose and design. The true benefit of corpora is rooted in their annotation and the method by which this data is encoded is an important factor in their interoperability. We have accumulated a large collection of multilingual and parallel corpora and encoded it in a unified format which is compatible with a broad range of NLP tools and corpus linguistic applications. In this paper, we present our corpus collection and describe a data model and the extensions to the popular CoNLL-U format that enable us to encode it.},
	booktitle    = {Proceedings of the Workshop on Challenges in the Management of Large Corpora (CMLC-7) 2019. Cardiff, 22nd July 2019 / Piotr Bański, Adrien Barbaresi, Hanno Biber, Evelyn Breiteneder, Simon Clematide, Marc Kupietz, Harald Lüngen, Caroline Iliadi (eds.)},
	author       = {Graën, Johannes and Kew, Tannon and Shaitarova, Anastassia and Volk, Martin},
	year         = {2019},
	publisher    = {Leibniz-Institut für Deutsche Sprache},
	address      = {Mannheim},
}

@inProceedings{grahn-kokkinakis-2014-legitimating-216142,
	title        = {Legitimating the visit - a recurrent challenge among patients with medically unexplained symptoms},
	abstract     = {The doctor’s evaluation of presented symptoms as doctorable, is a legitimation of the patient’s decision to seek medical care. It is also a confirmation of the rational, and even the moral, status of the patient, since consulting a doctor without good reasons is  considered irrational.

The analysis focuses on how patients take initiatives to present problems and on the doctors’ responses and evaluations regarding the doctorability. Situations where participants seem to have different views of the doctorability of the problems are examined in relation to conversational practices and social actions.

The analyses shows that the doctor as well as the patient orients to the potential doctorability of the problems and to the moral challenges related to it, but that their different expectations and roles lead to communicatively unclear situations.

Further analyses will illustrate in what ways the MUS-patients’ recurrent challenge of legitimating their visits could be influenced by the interaction, and hence in what ways conscious conversational practices from the care givers might facilitate these situations.},
	booktitle    = {Conference on Communication, Medicine and Ethics (COMET), Lugano, 26-28 June 2014},
	author       = {Grahn, Inga-Lill and Kokkinakis, Dimitrios},
	year         = {2014},
}

@article{gruzitis-dannells-2017-multilingual-225789,
	title        = {A multilingual FrameNet-based grammar and lexicon for controlled natural language},
	abstract     = {Berkeley FrameNet is a lexico-semantic resource for English based on the theory of frame semantics. It has been exploited in a range of natural language processing applications and has inspired the development of framenets for many languages. We present a methodological approach to the extraction and generation of a computational multilingual FrameNet-based grammar and lexicon. The approach leverages FrameNet-annotated corpora to automatically extract a set of cross-lingual semantico-syntactic valence patterns. Based on data from Berkeley FrameNet and Swedish FrameNet, the proposed approach has been implemented in Grammatical Framework (GF), a categorial grammar formalism specialized for multilingual grammars. The implementation of the grammar and lexicon is supported by the design of FrameNet, providing a frame semantic abstraction layer, an interlingual semantic application programming interface (API), over the interlingual syntactic API already provided by GF Resource Grammar Library. The evaluation of the acquired grammar and lexicon shows the feasibility of the approach. Additionally, we illustrate how the FrameNet-based grammar and lexicon are exploited in two distinct multilingual controlled natural language applications. The produced resources are available under an open source license.},
	journal      = {Language resources and evaluation},
	author       = {Gruzitis, Normunds and Dannélls, Dana},
	year         = {2017},
	volume       = {51},
	number       = {1},
	pages        = {37–66},
}

@inProceedings{gruzitis-etal-2015-formalising-220419,
	title        = {Formalising the Swedish Constructicon in Grammatical Framework},
	abstract     = {This paper presents a semi-automatic approach to acquire a computational construction grammar from the semi-formal
Swedish Constructicon. The implementation is based on the resource grammar
library provided by Grammatical Framework and can be seen as an extension to
the existing Swedish resource grammar.
An important consequence of this work is
that it generates feedback, explicit and implicit, on how to improve the annotation
consistency and adequacy of the original
construction resource. },
	booktitle    = {Proceedings of the Grammar Engineering Across Frameworks (GEAF) Workshop, 53rd Annual Meeting of the ACL and 7th IJCNLP, Beijing, China, July 26-31, 2015},
	author       = {Gruzitis, Normunds and Dannélls, Dana and Lyngfelt, Benjamin and Ranta, Aarne},
	year         = {2015},
	ISBN         = {978-1-932432-66-4},
	pages        = {49----56},
}

@inProceedings{gruzitis-etal-2016-grammatical-233921,
	title        = {Grammatical Framework for implementing multilingual frames and constructions},
	booktitle    = {Book of Abstracts. The 9th International Conference on Construction Grammar (ICCG9) theme session on Computational Semantics with Frames and Constructions. October 05-­09, 2016, Juiz de Fora, Brazil },
	author       = {Gruzitis, Normunds and Dannélls, Dana and Ranta, Aarne and Tyers, Francis  M.},
	year         = {2016},
}

@inProceedings{gustavsson-etal-2013-neural-177670,
	title        = {Neural processing of voices - Familiarity},
	abstract     = {Brain responses to familiar and unfamiliar voices were investigated with ERPs (Event Related Potentials). Presentation of a stream of one syllable utterances from a female voice established a standard expectation, and similar samples from four other male voices where inserted as unexpected deviants in a typical mismatch paradigm. The participants were 12 students from the basic course in linguistics. Two of the deviant voices were familiar voices of their teachers. The two other deviant voices were matched (same age, sex and dialect) but unfamiliar to the participants. A typical MMN (Mismatch Negativity) was elicited, i.e. a more negative response to the deviants compared to the standards. In contrast to verbal reports, where only one participant identified any of the deviant voices, the MMN response differed on group level between familiar and unfamiliar voices. MMN to familiar voices was larger. Using teachers' voices ensured naturalistic long term exposure, but did not allow for random assignment to conditions of familiarity making the design quasi-experimental. Thus acoustic analysis of voice characteristics as well as follow up studies with randomized exposure to voices are needed to rule out possible confounds and establish a causal effect of voice familiarity.},
	booktitle    = {Proceedings of 21st International Congress on Acoustics},
	author       = {Gustavsson, Lisa and Kallioinen, Petter and Klintfors, Eeva and Lindh, Jonas},
	year         = {2013},
	volume       = {19},
	number       = {I},
	pages        = {060204----6},
}

@inProceedings{gustavsson-etal-2012-neural-162455,
	title        = {Neural processing of familiar and unfamiliar voices},
	booktitle    = {Proceedings of IAFPA2012},
	author       = {Gustavsson, Lisa and Lindh, Jonas and Kallioinen, Petter and Markelius, Marie and Ericsson, Anna and Moniri, Sadegheh Farah and Klintfors, Eeva},
	year         = {2012},
	volume       = {21},
}

@inProceedings{gunther-etal-2014-rtrgo-201512,
	title        = {RTRGO: Enhancing the GU-MLT-LT System for Sentiment Analysis of Short Messages},
	abstract     = {This paper describes the enhancements made to our GU-MLT-LT system (Günther and Furrer, 2013) for the SemEval-2014 re-run of the SemEval-2013 shared task on sentiment analysis in Twitter. The changes include the usage of a Twitter-specific tokenizer, additional features and sentiment lexica, feature weighting and random subspace learning. The improvements result in an increase of 4.18 F-measure points on this year’s Twitter test set, ranking 3rd.
},
	booktitle    = {Proceedings of the 8th International Workshop on Semantic Evaluation (SemEval 2014) August 23-24, 2014 Dublin, Ireland},
	author       = {Günther, Tobias and Vancoppenolle, Jean and Johansson, Richard},
	year         = {2014},
	ISBN         = {978-1-941643-24-2},
	pages        = {497--502},
}

@techreport{hammarstedt-etal-2017-korp-256056,
	title        = {Korp 6 - Användarmanual},
	author       = {Hammarstedt, Martin and Borin, Lars and Forsberg, Markus and Roxendal, Johan and Schumacher, Anne and Öhrman, Maria},
	year         = {2017},
	publisher    = {Institutionen för svenska språket, Göteborgs universitet},
}

@techreport{hammarstedt-etal-2017-korp-256055,
	title        = {Korp 6 - Technical Report},
	author       = {Hammarstedt, Martin and Roxendal, Johan and Öhrman, Maria and Borin, Lars and Forsberg, Markus and Schumacher, Anne},
	year         = {2017},
	publisher    = {Institutionen för svenska språket, Göteborgs universitet},
}

@article{hammarstrom-borin-2011-unsupervised-141707,
	title        = {Unsupervised learning of morphology},
	journal      = {Computational Linguistics},
	author       = {Hammarström, Harald and Borin, Lars},
	year         = {2011},
	volume       = {37},
	number       = {2},
	pages        = {309--350},
}

@inProceedings{hammarstrom-etal-2017-poor-261851,
	title        = {Poor man's OCR post-correction: Unsupervised recognition of variant spelling applied to a multilingual document collection},
	abstract     = {© 2017 Copyright held by the owner/author(s). The accuracy of Optical Character Recognition (OCR) is sets the limit for the success of subsequent applications used in text analyzing pipeline. Recent models of OCR postprocessing significantly improve the quality of OCR-generated text but require engineering work or resources such as humanlabeled data or a dictionary to perform with such accuracy on novel datasets. In the present paper we introduce a technique for OCR post-processing that runs off-the-shelf with no resources or parameter tuning required. In essence, words which are similar in form that are also distributionally more similar than expected at random are deemed OCR-variants. As such it can be applied to any language or genre (as long as the orthography segments the language at the word-level). The algorithm is illustrated and evaluated using a multilingual document collection and a benchmark English dataset.},
	booktitle    = {DATeCH2017, Proceedings of the 2nd International Conference on Digital Access to Textual Cultural Heritage, Göttingen, Germany — June 01 - 02, 2017 },
	author       = {Hammarström, Harald and Virk, Shafqat and Forsberg, Markus},
	year         = {2017},
	publisher    = {Association for Computing Machinery (ACM)},
	address      = {New York},
	ISBN         = {978-1-4503-5265-9},
}

@inProceedings{hamon-etal-2013-medication-189545,
	title        = {Medication Extraction and Guessing in Swedish, French and English. },
	abstract     = {Extraction of information related to the medication is an im-portant task within the biomedical area. While the elaboration and updating of the drug vocabularies cannot follow the rap-id evolution of the drug development, we propose an automat-ic method for the extraction of known and new drug names. Our method combines internal and contextual clues. The method is applied to different types of documents in three languages (Swedish, French and English). The results indi-cate that with this kind of approach, we can efficiently update and enrich the existing drug vocabularies (probably with rap-id manual browsing). Precision and recall scores varied be-tween 81%-91% for precision and 85%-100% for recall. As a future work we intend to continuously refine the approach, by for instance better integration of semantic patterns and fuzzy matching that should hopefully enable further increase of the obtained results.},
	booktitle    = {Proceedings of the 14th World Congress on Medical and Health Informatics (MEDINFO). Studies in Health Technology and Informatics. Copenhagen, Denmark.},
	author       = {Hamon, Thierry and Grabar, Natalia and Kokkinakis, Dimitrios},
	year         = {2013},
	volume       = {192},
}

@incollection{haugen-borin-2018-danish-267403,
	title        = {Danish, Norwegian and Swedish},
	booktitle    = {The world's major languages},
	editor       = {Bernard Comrie},
	author       = {Haugen, Einar and Borin, Lars},
	year         = {2018},
	publisher    = {Routledge},
	address      = {London and New York},
	ISBN         = {9781138184824},
	pages        = {127--150},
}

@inProceedings{hoang-etal-2019-aspect-284269,
	title        = {Aspect-Based Sentiment Analysis using BERT},
	booktitle    = {Proceedings of the 22nd Nordic Conference on Computational Linguistics, 30 September–2 October, 2019, Turku, Finland / Mareike Hartmann, Barbara Plank (Editors)},
	author       = {Hoang, M. and Bihorac, O. A. and Rouces, Jacobo},
	year         = {2019},
	publisher    = {Linköping University Electronic Press},
	address      = {Sweden},
	ISBN         = {978-91-7929-995-8},
}

@article{holzmann-etal-2015-named-209780,
	title        = {Named entity evolution recognition on the Blogosphere},
	abstract     = {Advancements in technology and culture lead to changes in our language. These changes create a gap between the language known by users and the language stored in digital archives. It affects user’s possibility to firstly find content and secondly interpret that content. In a previous work, we introduced our approach for named entity evolution recognition (NEER) in newspaper collections. Lately, increasing efforts in Web preservation have led to increased availability of Web archives covering longer time spans. However, language on the Web is more dynamic than in traditional media and many of the basic assumptions from the newspaper domain do not hold for Web data. In this paper we discuss the limitations of existing methodology for NEER. We approach these by adapting an existing NEER method to work on noisy data like the Web and the Blogosphere in particular. We develop novel filters that reduce the noise and make use of Semantic Web resources to obtain more information about terms. Our evaluation shows the potentials of the proposed approach.},
	journal      = {International Journal on Digital Libraries},
	author       = {Holzmann, Helge and Tahmasebi, Nina and Risse, Thomas},
	year         = {2015},
	volume       = {15},
	number       = {2-4},
	pages        = {209--235},
}

@inProceedings{hu-lindh-2010-perceptual-125330,
	title        = {PERCEPTUAL MISTAKES OF CHINESE TONES IN 2-SYLLABLE WORDS BY SWEDISH LISTENERS},
	abstract     = {Earlier studies on the perception of Chinese tones have almost exclusively used 1-syllable
words for the listening tests (Kiriloff, 1969; Chuang, 1971; Klatt, 1973; Gandour, 1978). In
these earlier studies the misperception between tone 2 and tone 3 has been shown to be the
most common. However, no studies that we have found have looked at the perception of 2-
syllable words besides Chuang (1971), who only used nonsense words.
By tradition the teaching of Chinese as a foreign language has been concentrated on training
of perception and production of tones since adult students have been shown to show particular
difficulties in perceiving their difference. Experienced teachers have through tests established
that this assumption is not valid when it comes to the so-called static tone. When it comes to
communicating in Chinese and to be able to use the separate tones it is not enough to know
the difference in 1-syllable words especially since most modern words in standard Chinese
contains 2 or more. Guo (1993) has shown that the more syllables a word contains the higher
ratio of misperceived tones.
So far, no investigations for Swedish students have been performed. A possible hypothesis
could be that Swedish listeners would perform better due to the Swedish grave and acute
accents. By asking experienced teachers in Sweden, we knew that this should not be the case
however. The general impressions from teachers are also that Swedish students have the
largest proportion misperceptions between tone 2 and 3. To test this we conducted a listening
test on 27 native speakers of Swedish (9 bilingual Chinese speakers with native ability in
Swedish) on 25 Chinese 2-syllable lexical words with 15 different tone combinations. One
male and one female native speaker of Chinese pronounced the words in isolation. The words
were taken from a random number of 2-syllable glossary. Each speaker repeated the words
once with 1 seconds pause in between the repetition and then 2 seconds pause before the new
word. The audio was presented in high quality headphones in the student language lab at the
University of Gothenburg. The participants were all second semester students of Chinese and
the listening test was also an exam, which made the participants wanting to perform as well as
possible. If they wanted they could repeat the sequence as many times as they until satisfied
with their answer.
The results show that produced tone 1 and tone 2 are confused more than 3 and 4 (tone 4 more
than 3, see figure 1). However, the distribution of misperceptions seems to be rather equally
distributed if we exclude the static tone (below called 0) in contradiction to earlier studies
claiming misperception mostly between tone 2 and 3. However, we also notice that certain
types of syllables containing different vowels are misperceived differently. The next step is to
figure out if certain syllable nucleuses are more misperceived than others and in certain
positions. These conclusions can in the future lead to new approaches when it comes to
teaching students production and perception of tones.},
	booktitle    = {Proceedings of the Fourth European Conference on Tone and Intonation (TIE4)},
	author       = {Hu, Guohua and Lindh, Jonas},
	year         = {2010},
}

@inProceedings{hu-lindh-2014-effects-203082,
	title        = {Effects of initial sounds on the perception of Chinese disyllable tones by Swedish students of Chinese},
	abstract     = {ABSTRACT
This paper extends previous research on the effects of initial sounds on perception of Chinese disyllable tones. A perception test was performed on Swedish adult students of Chinese using disyllable words (most previous studies have been made using solely monosyllable words). The main results indicate that voiced initial sounds e.g. [l] have a strong connection to the tone confusion pattern Tone 2 perceived as Tone 3. On the contrary, a voiceless aspirated initial sound e.g. [th] is mostly connected to misidentifications between Tone 3 to Tone 2. Unvoiced unaspirated initial sounds affect tone perception heavily, especially when they occur in the second syllable of a disyllabic word.    
 
},
	booktitle    = {2014  International  Conference  on  Phonetic Research and Language Learning (ICPRLL) & English Phonetic Conference in China (EPCC)},
	author       = {Hu, Guohua and Lindh, Jonas},
	year         = {2014},
}

@inProceedings{hughes-etal-2012-operavox-201897,
	title        = {operAVoX - On PErson RApid VOice eXaminer},
	abstract     = {At present, objective analysis of voice quality using acoustic parameters is only possible within a voice laboratory using specialist hardware and software. We have developed an easy-to-use portable voice analysis and feedback application running on the Apple iPhone, iPad, or iPod Touch. OperaVOX™ combines the signal processing power, easy connectivity, user-friendly interface, high-quality microphones and portability of these handheld devices with novel acoustic voice analysis algorithms to provide a powerful voice quality measurement tool that you can carry in your pocket. OperaVOX™ is designed for anyone who is interested in measuring the quality of their voice, such as a patient recovering following a stroke, a professional voice user such as singers or an aspiring actor. Built into OperaVOX™ are the validated Voice Handicap Index questionnaires and the ability for the user to record their voice for acoustic and perceptual analysis both on board the device and externally in the voice laboratory. Furthermore, the user can instruct OperaVOX™ to automatically and confidentially send these data via email to their speech therapist, voice coach or researcher team. OperaVOX™ makes it easy for everyone to accurately measure changes in the quality of their voice every hour, day, or week and without having to travel to the hospital. Two versions of OperaVOX™ will soon be available on the Apple App Store, one for the general public and another for professionals such as speech and language therapists. We have also worked with world-leading University research teams both in the UK and North America to develop bespoke versions of OperaVOX™ specifically tailored for their research and clinical requirements.},
	booktitle    = {5th national Conference in Logopedics},
	author       = {Hughes, Owain Rhys and Alexander, Anil and Forth, Oscar and Lindh, Jonas},
	year         = {2012},
	number       = {5},
}

@techreport{hoglund-etal-2012-maskininlarningsbaserad-159347,
	title        = {Maskininlärningsbaserad indexering av digitaliserade museiartefakter - projektrapport},
	abstract     = {Projektet har genomfört försök med maskinbaserad analys och maskininlärning för 
automatisk indexering och analys av bilder som stöd för registrering av föremål i 
museibestånd. Resultaten visar att detta är möjligt för avgränsade delmängder i kombination 
med maskininlärning som stöd för, men inte som ersättning för, manuell analys. Projektet har 
också funnit behov av utveckling av ett användargränssnitt för både text och bildsökning och 
utvecklat en prototyplösning för detta, vilket finns dokumenterat i denna rapport och i ett 
separat appendix till rapporten. Materialet utgör grundunderlag för implementeringar som 
innebär utökade sökmöjligheter, effektivare registrering samt ett användarvänligt gränssnitt. 
Arbetet ligger i framkant av forskningsområdets resultat och etablerade metoder och 
kombinerar statististiska, lingvistiska och datavetenskapliga metoder.  

Se länk till rapport och även länk till appendix längre ned. 
},
	author       = {Höglund, Lars and Eklund, Johan and Wilhelmsson, Kenneth},
	year         = {2012},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@inProceedings{jatowt-etal-2018-every-272054,
	title        = {Every Word Has Its History: Interactive Exploration and Visualization of Word Sense Evolution},
	booktitle    = {CIKM '18 Proceedings of the 27th ACM International Conference on Information and Knowledge Management, October 22 - 26, 2018, Torino, Italy},
	author       = {Jatowt, Adam  and Campos, Ricardo and Bhowmick ,  Sourav S.  and Tahmasebi, Nina and Doucet, Antoine },
	year         = {2018},
	publisher    = {ACM},
	address      = {New York, NY, USA},
	ISBN         = {978-1-4503-6014-2},
}

@article{johanssonkokkinakis-kokkinakis-1999-beskrivning-55910,
	title        = {Beskrivning av några problem vid automatisk analys av text},
	journal      = {Från dataskärm och forskarpärm, "Språkliga studier tillägnade Birgitta Ernby", MISS, Göteborgs universitet},
	author       = {Johansson Kokkinakis, Sofie and Kokkinakis, Dimitrios},
	year         = {1999},
	volume       = {No 25},
	pages        = {88--95},
}

@inProceedings{johansson-2012-bridging-163602,
	title        = {Bridging the Gap between Two Different Swedish Treebanks},
	abstract     = {We present two simple adaptation methods to train a dependency parser in the situation when there are multiple treebanks available, and these treebanks are annotated according to different linguistic conventions. To test the methods, we train parsers on the Talbanken and Syntag treebanks of Swedish. The results show that the  methods are effective for low-to-medium training set sizes.},
	booktitle    = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)},
	author       = {Johansson, Richard},
	year         = {2012},
	volume       = {Accepted},
}

@inProceedings{johansson-2012-atomic-156993,
	title        = {Non-atomic Classification to Improve a Semantic Role Labeler for a Low-resource Language},
	abstract     = {Semantic role classification accuracy for most languages other than English is constrained by the small amount of annotated data. In this paper, we demonstrate how the frame-to-frame relations described in the FrameNet ontology can be used to improve the performance of a FrameNet-based semantic role classifier for Swedish, a low-resource language. In order to make use of the FrameNet relations, we cast the semantic
role classification task as a non-atomic label prediction task.

The experiments show that the cross-frame generalization methods lead to a 27% reduction in the number of errors made by the classifier. For previously unseen frames, the reduction is even more significant: 50%.
},
	booktitle    = {Proceedings of the First Joint Conference on Lexical and Computational Semantics (*SEM); June 7-8; Montréal, Canada},
	author       = {Johansson, Richard},
	year         = {2012},
	publisher    = {Association for Computational Linguistics},
	address      = {Montréal, Canada},
}

@inProceedings{johansson-2013-training-173587,
	title        = {Training Parsers on Incompatible Treebanks},
	abstract     = {We consider the problem of training a statistical parser in the
situation when there are multiple treebanks available, and these
treebanks are annotated according to different linguistic
conventions. 

To address this problem, we present two simple adaptation methods:
the first method is based on the idea of using a shared feature
representation when parsing multiple treebanks, and the second method
on guided parsing where the output of one parser provides features
for a second one.

To evaluate and analyze the adaptation methods, we train parsers
on treebank pairs in four languages: German, Swedish, Italian, and English.
We see significant improvements for all eight treebanks when training
on the full training sets. However, the clearest benefits are seen when we
consider smaller training sets. Our experiments were carried out with
unlabeled dependency parsers, but the methods can easily be 
generalized to other feature-based parsers.},
	booktitle    = {Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
	author       = {Johansson, Richard},
	year         = {2013},
	pages        = {127--137},
}

@article{johansson-2014-automatic-201874,
	title        = {Automatic Expansion of the Swedish FrameNet Lexicon},
	abstract     = {We evaluate several lexicon-based and corpus-based methods to automatically induce new lexical units for the Swedish FrameNet, and we see that the best-performing setup uses a combination of both types of methods. A particular challenge for Swedish is the absence of a lexical resource such as WordNet; however, we show that the semantic network SALDO, which is organized according to lexicographical principles quite different from those of WordNet, is very useful for our purposes.},
	journal      = {Constructions and Frames},
	author       = {Johansson, Richard},
	year         = {2014},
	volume       = {6},
	number       = {1},
	pages        = {92--113},
}

@inProceedings{johansson-etal-2016-multi-233140,
	title        = {A Multi-domain Corpus of Swedish Word Sense Annotation},
	abstract     = {We describe the word sense annotation layer in Eukalyptus, a freely available five-domain corpus of contemporary Swedish with several annotation layers. The annotation uses the SALDO lexicon to define the sense inventory, and allows word sense annotation of compound segments and multiword units. We give an overview of the new annotation tool developed for this project, and finally present an analysis of the inter-annotator agreement between two annotators.
},
	booktitle    = {10th edition of the Language Resources and Evaluation Conference, 23-28 May 2016, Portorož (Slovenia)},
	author       = {Johansson, Richard and Adesam, Yvonne and Bouma, Gerlof and Hedberg, Karin},
	year         = {2016},
	publisher    = {European Language Resources Association},
	ISBN         = {978-2-9517408-9-1},
}

@inProceedings{johansson-etal-2012-semantic-156400,
	title        = {Semantic Role Labeling with the Swedish FrameNet},
	abstract     = {We present the first results on semantic role labeling using the Swedish FrameNet, which is a lexical resource currently in development. Several aspects of the task are investigated, including the selection of machine learning features, the effect of choice of syntactic parser, and the ability of the system to generalize to new frames and new genres.
In addition, we evaluate two methods to make the role label classifier more robust: cross-frame generalization and cluster-based features.
Although the small amount of training data limits the performance achievable at the moment, we reach promising results. In particular, the classifier that extracts the boundaries of arguments works well for new frames, which suggests that it already at this stage can be useful in a semi-automatic setting.},
	booktitle    = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12); Istanbul, Turkey; May 23-25},
	author       = {Johansson, Richard and Friberg Heppin, Karin and Kokkinakis, Dimitrios},
	year         = {2012},
	ISBN         = {978-2-9517408-7-7},
	pages        = {3697--3700},
}

@article{johansson-moschitti-2013-relational-158811,
	title        = {Relational Features in Fine-grained Opinion Analysis},
	abstract     = {Fine-grained opinion analysis often makes use of linguistic features but typically does not
take the interaction between opinions into account. This article describes a set of experiments that demonstrate that relational features, mainly derived from dependency-syntactic and semantic role structures, can significantly improve the performance of automatic systems for a number of fine-grained opinion analysis tasks: marking up opinion expressions, finding opinion holders, and determining the polarities of opinion expressions. These features make it possible to model the way opinions expressed in natural-language discourse interact in a sentence over arbitrary distances. The use of relations requires us to consider multiple opinions simultaneously, which makes exact inference intractable. However, a reranker can be used as a sufficiently accurate and efficient approximation.

A number of feature sets and machine learning approaches for the rerankers are evaluated. For the task of opinion expression extraction, the best model shows a 10-point absolute improvement in soft recall on the MPQA corpus over a conventional sequence labeler based on local contextual features, while precision decreases only slightly. Significant improvements are also seen for the extended tasks where holders and polarities are considered: 10 and 7 points in recall, respectively. In addition, the systems outperform previously published results for unlabeled (6 F-measure points) and polarity-labeled (10–15 points) opinion expression extraction. Finally, as an extrinsic evaluation, the extracted MPQA-style opinion expressions are used in practical opinion mining tasks. In all scenarios considered, the machine learning features derived from the opinion expressions lead to statistically significant improvement.},
	journal      = {Computational Linguistics},
	author       = {Johansson, Richard and Moschitti, Alessandro},
	year         = {2013},
	volume       = {39},
	number       = {3},
	pages        = {473--509},
}

@inProceedings{johansson-nietopina-2015-embedding-217863,
	title        = {Embedding a Semantic Network in a Word Space},
	abstract     = {We present a framework for using continuous-
space vector representations of word meaning
to derive new vectors representing the meaning of senses listed in a semantic network. It is a post-processing approach that can be applied to several types of word vector representations. It uses two ideas: first, that vectors for polysemous words can be decomposed into a convex combination of sense vectors; secondly, that the vector for a sense is kept similar to those of its neighbors in the network.This leads to a constrained optimization problem, and we present an approximation for the case when the distance function is the squared Euclidean.

We applied this algorithm on a Swedish semantic network, and we evaluate the quality
of the resulting sense representations extrinsically by showing that they give large improvements when used in a classifier that creates lexical units for FrameNet frames.
},
	booktitle    = {Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. Denver, United States, May 31 – June 5, 2015},
	author       = {Johansson, Richard and Nieto Piña, Luis},
	year         = {2015},
	ISBN         = {978-1-941643-49-5},
	pages        = {1428--1433},
}

@inProceedings{johansson-nietopina-2015-combining-216865,
	title        = {Combining Relational and Distributional Knowledge for Word Sense Disambiguation},
	abstract     = {We present a new approach to word sense
disambiguation derived from recent ideas
in distributional semantics. The input to
the algorithm is a large unlabeled corpus and a graph describing how senses
are related; no sense-annotated corpus is
needed. The fundamental idea is to embed meaning representations of senses in
the same continuous-valued vector space
as the representations of
words. In this way, the knowledge encoded in the lexical resource is combined with the infor-
mation derived by the distributional methods. Once this step has been carried out,
the sense representations can be plugged
back into e.g. the skip-gram model, which
allows us to compute scores for the different possible senses of a word in a given
context.

We evaluated the new word sense disambiguation system on two Swedish test
sets annotated with senses defined by the
SALDO lexical resource. In both evaluations, our system soundly outperformed
random and first-sense baselines. Its accuracy was slightly above that of a well-
known graph-based system, while being
computationally much more efficient,},
	booktitle    = {Proceedings of the 20th Nordic Conference of Computational Linguistics, May 12-13, Vilnius, Lithuania. Linköping Electronic Conference Proceedings 109, Linköping University Electronic Press..},
	author       = {Johansson, Richard and Nieto Piña, Luis},
	year         = {2015},
	ISBN         = {978-91-7519-098-3},
	pages        = {69--78},
}

@inProceedings{johansson-etal-2019-lexical-284330,
	title        = {Lexical diversity and mild cognitive impairment},
	abstract     = {This paper explores the role that various lexical-based measures play for differentiating between individuals with mild forms of cognitive impairment (MCI) and healthy controls (HC). Recent research underscores the importance of language and linguistic analysis as essential components that can contribute to a variety of sensitive cognitive measures for the identification of milder forms of cognitive
impairment. Subtle language changes serve as a sign that an individual’s cognitive functions have been impacted, potentially leading to early diagnosis. Our research aims to identify linguistic biomarkers that could distinguish between individuals with MCI and HC and also be useful in predicting MCI.},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Johansson, Sofie and Lundholm Fors, Kristina and Antonsson, Malin and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = {ExLing Society},
	address      = {Athens, Greece},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{ju-etal-2011-towards-151361,
	title        = {Towards Using Reranking in Hierarchical Classification},
	abstract     = {We consider the use of reranking as a way to relax typical in-
dependence assumptions often made in hierarchical multilabel classification.
Our reranker is based on (i) an algorithm that generates promising k-best
classification hypotheses from the output of local binary classifiers that clas-
sify nodes of a target tree-shaped hierarchy; and (ii) a tree kernel-based
reranker applied to the classification tree associated with the hypotheses
above. We carried out a number of experiments with this model on the
Reuters corpus: we firstly show the potential of our algorithm by computing
the oracle classification accuracy. This demonstrates that there is a signifi-
cant room for potential improvement of the hierarchical classifier. Then, we
measured the accuracy achieved by the reranker, which shows a significant
performance improvement over the baseline.
},
	booktitle    = {Proceedings of the Joint ECML/PKDD-PASCAL Workshop on Large-Scale Hierarchical Classification; September 5, 2011; Athens, Greece},
	author       = {Ju, Qi and Johansson, Richard and Moschitti, Alessandro},
	year         = {2011},
}

@inProceedings{ju-etal-2013-learning-166990,
	title        = {Learning to Rank from Structures in Hierarchical Text Classification},
	abstract     = {In this paper, we model learning to rank algorithms based on structural dependencies in hierarchical multi-label text categorization (TC). Our method uses the classification probability of the binary classifiers of a standard top-down approach to generate k-best hypotheses. The latter are generated according to their global probability while at the same time satisfy the structural constraints between father and children nodes. The rank is then refined using Support Vector Machines and tree kernels applied to a structural representation of hypotheses, i.e., a hierarchy tree in which the outcome of binary one-vs-all classifiers is directly marked in its nodes. Our extensive experiments on the whole Reuters Corpus Volume 1 show that our models significantly improve over the state of the art in TC, thanks to the use of structural dependecies.},
	booktitle    = {Advances in Information Retrieval; 35th European Conference on IR Research, ECIR 2013, Moscow, Russia, March 24-27, 2013; P. Serdyukov et al. (ed)},
	author       = {Ju, Qi and Moschitti, Alessandro and Johansson, Richard},
	year         = {2013},
	volume       = {Lecture Notes in Computer Science 7814},
	ISBN         = {978-3-642-36972-8},
	pages        = {183--194},
}

@inProceedings{junger-etal-2012-scxml-164522,
	title        = {SCXML for Building Conversational Agents in the Dialog Web Lab},
	abstract     = {The W3C has selected Harel Statecharts, under the name of State Chart XML (SCXML), as the basis for future stan- dards in the area of (multimodal) dialog systems (Barnett et al. 2012). In an effort to educate people about SCXML we are building a web-based development environment where the dialogs of embodied, spoken conversational agents can be managed and controlled using SCXML, in a playful and interesting manner.},
	booktitle    = {Proceedings of The Swedish Language Technology Conference (SLTC) 2012},
	author       = {Junger, David and Lager, Torbjörn and Roxendal, Johan},
	year         = {2012},
}

@inProceedings{karsvall-borin-2018-sdhk-265603,
	title        = {SDHK meets NER: Linking place names with medieval charters and historical maps},
	booktitle    = {CEUR Workshop Proceedings, vol. 2084.  Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018.  Edited by  Eetu Mäkelä Mikko Tolonen Jouni Tuominen },
	author       = {Karsvall, Olof and Borin, Lars},
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@inProceedings{kelly-etal-2016-identifying-242814,
	title        = {Identifying Perceptually Similar Voices with a Speaker Recognition System Using Auto-Phonetic Features},
	booktitle    = {17th Annual Conference of the International-Speech-Communication-Association (Interspeech 2016). San Francisco, CA, USA. 8-12 september 2016.},
	author       = {Kelly, Finnian and Alexander, Anil and Forth, Oscar and Kent, Samuel and Lindh, Jonas and Åkesson, Joel},
	year         = {2016},
	pages        = {1567----1568},
}

@inProceedings{kelly-etal-2016-automatically-242810,
	title        = {Automatically identifying perceptually similar voices for voice  parades},
	booktitle    = {Proceedings of IAFPA25. 25th Annual Conference of the International Association for Forensic Phonetics and Acoustics. York, UK 24th – 27th July 2016},
	author       = {Kelly, Finnian and Alexander, Anil and Forth, Oscar and Kent, Samuel and Lindh, Jonas and Åkesson, Joel},
	year         = {2016},
	pages        = {25--26},
}

@book{kokkinakis-2001-framework-125224,
	title        = {A Framework for the Acquisition of Lexical Knowledge; Description and Application},
	author       = {Kokkinakis, Dimitrios},
	year         = {2001},
	address      = {Göteborg},
	ISBN         = {LIBRIS-ID:8245865},
}

@inProceedings{kokkinakis-2004-reducing-33928,
	title        = {Reducing the Effect of Name Explosion.},
	abstract     = {The problem of new vocabulary is particularly frustrating once one begins to work with large corpora of real texts. The identification
of unknown proper nouns, chains of non-proper nouns and even common words that function as names (i.e. named entities) in
unrestricted text, and their subsequent classification into some sort of semantic type is a challenging and difficult problem in Natural
Language Processing (NLP). Systems that perform Information Extraction, Information Retrieval, Question-Answering, Topic
Detection, Text Mining, Machine Translation and annotation for the Semantic Web have highlighted the need for the automatic
recognition of such entities, since their constant introduction in any domain, however narrow, is very common and needs special
attention. Proper names are usually not listed in defining or other common types of dictionaries, they may appear in many alias forms
and abbreviated variations, which makes their listing infeasible. This paper deals with some extensions to the “traditional” named
entity recognition approaches. It puts emphasis on more name classes and their further subclassification into finer sets. An operative
system that can be tested and evaluated on-line implements the ideas described in this paper.},
	booktitle    = {Proceedings of the LREC Workshop: Beyond Named Entity Recognition, Semantic labelling for NLP tasks. ourth Language Resources and Evaluation Conference (LREC)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2004},
}

@inProceedings{kokkinakis-2005-identification-33934,
	title        = {Identification of Named Entities and Medical Terminology in Swedish Patient Records.},
	abstract     = {An anonymisation or de-identification system can provide a broad spectrum of services related to the growing demands for better forms of dissemination 
of information about individuals found in electronic patient records. The range of these services includes: health care statistics and sharing clinical 
information across institutions; validation and monitoring of new diagnostic tests; release of individual data by protecting identities or hints that 
can identify individuals, and appropriate mechanisms to provide only the information necessary to the professional who has the need to know. 
This paper describes our first experiments intended for automatic anonymisation of Swedish electronic patient records using a generic system
 for Named Entity Recognition. There are eight main types of entities that the system recognizes: “person”, “location”, “organisation”, “event”, 
“object”, “work & art”, “time” and “measure”. To this set, two new modules have been recently developed. 
One is dedicated to animacy recognition, a modules based on a number of clues (such as key words utilized in the person’s module 
grammar and verbs requiring animate subject), and another one designated to identify and annotate medical terminology. 
The latter module annotates names of drugs and chemical substances, diseases, symptoms, organisms and anatomical terms. 
A detailed evaluation of the system, on authentic patient records, is given both for the named, medical and animate entities. 
},
	booktitle    = {WSEAS Transactions on BIOLOGY and BIOMEDICINE},
	author       = {Kokkinakis, Dimitrios},
	year         = {2005},
	volume       = {2},
	number       = {3},
	pages        = {312--317},
}

@article{kokkinakis-2006-towards-45197,
	title        = {Towards a Swedish Medical Treebank},
	abstract     = {In this paper, we present our current activities towards the compilation and the multi-layered annotation of a domain-dependent corpus for Swedish in the area of medicine. The focus of the paper is based on the description of the constituent structure and functionally oriented annotation of the corpus. Moreover, the annotation scheme adopted, which incorporates three main layers of linguistic processing, lexical analysis, shallow semantic analysis and syntactic processing, will be exemplified. For the syntactic analysis we use a cascaded finite-state parser, aware of the shallow semantic annotations produced. The result of this analysis, including syntactic parsing and shallow semantic analysis, is transformed into the TIGER-XML interchange format. Our goal is to produce a large, rich in annotations, medical treebank suitable for both corpus-based grammar learning systems, for semantic relation extraction and for linguistic exploration of theoretical nature.},
	journal      = {Proceedings of the 5th Conference on Treebanks and Linguistic Theories},
	author       = {Kokkinakis, Dimitrios},
	year         = {2006},
}

@inProceedings{kokkinakis-2006-towards-34033,
	title        = {Towards a Swedish Medical Treebank},
	booktitle    = {5th Conference on Treebanks and Linguistic Theories},
	author       = {Kokkinakis, Dimitrios},
	year         = {2006},
}

@inProceedings{kokkinakis-2006-collection-33937,
	title        = {Collection, Encoding and Linguistic Processing of a Swedish Medical Corpus - The MEDLEX Experience.},
	abstract     = {Corpora annotated with structural and linguistic characteristics play a major role in nearly every area of language processing. 
During recent years a number of corpora and large data sets became known and available to research even in specialized fields
 such as medicine, but still however, targeted predominantly for the English language. This paper provides a description of the 
collection, encoding and linguistic processing of an ever growing Swedish medical corpus, the MEDLEX Corpus. MEDLEX consists
 of a variety of text-documents related to various medical text genres. The MEDLEX Corpus has been structurally annotated 
using the Corpus Encoding Standard for XML (XCES), lemmatized and automatically annotated with part-of-speech and 
semantic information (extended named entities and the Medical Subject Headings, MeSH, terminology). 
The results from the processing stages (part-of-speech, entities and terminology) have been merged into a single 
representation format and syntactically analysed using a cascaded finite state parser. 
Finally, the parser’s results are converted into a tree structure that follows the TIGER-XML coding scheme, resulting a suitable
 for further exploration and fairly large Treebank of Swedish medical texts. 
},
	booktitle    = {Proceedings of the 5th Languages Resources and Evalutaion (LREC)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2006},
}

@inProceedings{kokkinakis-2006-developing-33925,
	title        = {Developing Resources for Swedish Bio-Medical Text Mining},
	abstract     = {Collection and annotation of corpora in specialized fields, such as medicine, and particularly for lesser-spoken languages, than for instance English,
 is an important enterprise for the continuous development and growth of language technology research, for resource development and for 
the implementation of practical applications for these languages. In this paper, we describe our ongoing efforts to build a large Swedish medical corpus, 
the MEDLEX Corpus, how we combine ge-neric named entity and terminology recognition for the detailed annotation of the corpus, 
and how these annotations are further utilized by an annotations-aware cascaded finite-state parser. },
	booktitle    = {Proceedings of the 2nd International Symposium on Semantic Mining in Biomedicine (SMBM)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2006},
}

@inProceedings{kokkinakis-2007-automatic-47933,
	title        = {Automatic Indexing using the English and Swedish MeSH®, a Note on Coverage},
	abstract     = {The identification and mapping of terminology onto a concept hierarchy is the very first stage of semantic, deeper analysis of textual documents. Work regarding automatic terminology recognition using the Swedish MeSH® thesaurus (Medical Subject Headings, edition 2006) and its corresponding English source is reported. A number of transformations and refinements were applied to the original lexical database in order to enhance the automatic process of mapping the extensive variability of lexical terms in authentic data to structured MeSH codes. Means to increase the coverage of both thesauruses for automatic indexing of Swedish medical data are investigated.},
	booktitle    = {Svenska Läkaresällskapets Riksstämma 2007},
	author       = {Kokkinakis, Dimitrios},
	year         = {2007},
}

@inProceedings{kokkinakis-2008-semantic-73975,
	title        = {Semantic Relation Mining of Solid Compounds in Medical Corpora.},
	abstract     = {In the context of scientific and technical texts, meaning is usually embedded in noun compounds and the semantic interpretation of these compounds deals with the detection and semantic classification of the relation that holds between the compound’s constituents. Semantic relation mining, the technology applied for marking up, interpreting, extracting and classifying relations that hold between pairs of words, is an important enterprise that contribute to deeper means of enhancing document understanding technologies, such as Information Extraction, Question Answering, Summarization, Paraphrasing, Ontology Building and Textual Entailment. This paper explores the application of assigning semantic descriptors taken from a multilingual medical thesaurus to a large sample of solid (closed form) compounds taken from large Swedish medical corpora, and determining the relation(s) that may hold between the compound constituents. Our work is inspired by previous research in the area of using lexical hierarchies for identifying relations between two-word noun compounds in the medical domain. In contrast to previous research, Swedish, as other Germanic languages, require further means of analysis, since compounds are written as one sequence with no white space between the words, e.g. virus diseases vs. virussjukdomar, which makes the problem more challenging, since solid compounds are harder to identify and segment.},
	booktitle    = {Proceedings of the 21th Conference on the European Federation for Medical Informatics (MIE 2008)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2008},
	ISBN         = {9786611733414},
}

@inProceedings{kokkinakis-2008-semantically-73974,
	title        = {A Semantically Annotated Swedish Medical Corpus},
	abstract     = {With the information overload in the life sciences there is an increasing need for annotated corpora, particularly with biological and biomedical entities, which is the driving force for data-driven language processing applications and the empirical approach to language study. Inspired by the work in the GENIA Corpus, which is one of the very few of such corpora, extensively used in the biomedical field, and in order to fulfil the needs of our research, we have collected a Swedish medical corpus, the MEDLEX Corpus. MEDLEX is a large structurally and linguistically annotated document collection, consisting of a variety of text documents related to various medical text subfields, and does not focus at a particular medical genre, due to the lack of large Swedish resources within a particular medical subdomain. Out of this collection we selected 300 documents which were manually examined by two human experts who inspected, corrected and/or accordingly modified the automatically provided annotations according to a set of provided labelling guidelines. The annotations consist of medical terminology provided by the Swedish and English MeSH® (Medical Subject Headings) thesauri as well as named entity labels provided by an enhanced named entity recognition software.},
	booktitle    = {roceedings of the 6th Language Resources and Evaluation Conference (LREC)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2008},
}

@inProceedings{kokkinakis-2008-semantic-73977,
	title        = {Semantic Pre-processing for Complexity Reduction in Parsing Medical Texts},
	abstract     = {Collection and multilayer annotation of textual corpora in specialized fields, such as (bio-) medicine is an important enterprise for empirically-based, data-driven language processing, human language technologies and linguistic research. One of the most important and difficult to achieve piece of annotation that can be made available is at the syntactic and functional level, i.e. parsing, particularly in sublanguages where specialized tools have to be adapted which is considered too expensive for many applications. In this paper, we describe a way to reduce the complexity of parsing in medical discourse by the use of a semantic pre-processing stage guided by annotations provided by medical thesauri and other domain-specific lexical resources. Parsing biomedical texts, apart from the challenge it possesses (deviant and idiosyncratic uses of vocabulary and syntax), is required in order to support and improve technologies such as Information Extraction and Retrieval, enhance the acquisition of relations between terminology support terminology management and population of medical semantic resources.},
	booktitle    = {Proceedings of the 21th Conference on the European Federation for Medical Informatics (MIE 2008)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2008},
}

@inProceedings{kokkinakis-2008-mesh(R)-73973,
	title        = {MeSH® - From a Controlled Vocabulary to a Processable Resource},
	abstract     = {Large repositories of life science data in the form of domain-specific literature, textual databases and other large specialised textual collections (corpora) in electronic form increase on a daily basis to a level beyond the human mind can grasp and interpret. As the volume of data continues to increase, substantial support from new information technologies and computational techniques grounded in the form of the ever increasing applications of the mining paradigm is becoming apparent. These emerging technologies play an increasingly critical role in aiding research productivity, and they provide the means for reducing the workload for information access and decision support and for speeding up and enhancing the knowledge discovery process. In order to accomplish these higher level goals and support the mining approach however, a fundamental and unavoidable starting point is the identification and mapping of terminology from the textual, unstructured data onto biomedical knowledge sources and concept hierarchies. In this paper, we provide a description of the work regarding terminology recognition using the Swedish MeSH® thesaurus and its corresponding English original source. We explain the various transformation and refinement steps applied to the original database tables into a fully-fledged processing oriented annotating resource. Particular attention has been given to a number of these steps in order to automatically map the extensive variability of lexical terms to structured MeSH® nodes. Issues on annotation and coverage are also discussed.
},
	booktitle    = {Proceedings of the 6th Language Resources and Evaluation Conference (LREC)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2008},
}

@article{kokkinakis-2009-shallow-105133,
	title        = {Shallow Features for Differentiating Disease-Treatment Relations using Supervised Learning; a pilot study.},
	abstract     = {Clinical narratives provide an information rich, nearly unexplored corpus of evidential knowledge that is considered as a challenge for practitioners in the language technology field, particularly because of the nature of the texts (excessive use of terminology, abbreviations, orthographic term variation), the significant opportunities for clinical research that such material can provide and the potentially broad impact that clinical findings may have in every day life. It is therefore recognized that the capability to automatically extract key concepts and their relationships from such data will allow systems to properly understand the content and knowledge embedded in the free text which can be of great value for applications such as information extraction and question & answering. This paper gives a brief presentation of such textual data and its semantic annotation, and discusses the set of semantic relations that can be observed between diseases and treatments in the sample. The problem is then designed as a supervised machine learning task in which the relations are tried to be learned using pre-annotated data. The challenges designing the problem and empirical results are presented.},
	journal      = {Lecture Notes in Computer Science},
	author       = {Kokkinakis, Dimitrios},
	year         = {2009},
	volume       = {5729},
	pages        = {395--402},
}

@incollection{kokkinakis-2009-lexical-73979,
	title        = {Lexical granularity for automatic indexing and means to achieve it - the case of Swedish MeSH®},
	abstract     = {The identification and mapping of terminology from large repositories of life science data onto concept hierarchies constitute an important initial step for a deeper semantic exploration of unstructured textual content. Accurate and efficient mapping of this kind is likely to provide better means of enhancing indexing and retrieval of text, uncovering subtle differences, similarities and useful patterns, and hopefully new knowledge, among complex surface realisations, overlooked by shallow techniques based on various forms of lexicon look-up approaches. However, a finer-grained level of mapping between terms as they occur in natural language and domain concepts is a cumbersome enterprise that requires various levels of processing in order to make explicit relevant linguistic structures. This chapter highlights some of the challenges encountered in the process of bridging free to controlled vocabularies and thesauri and vice versa. We investigate how the extensive variability of lexical terms in authentic data can be efficiently projected to hierarchically structured codes, while means to increase the coverage of the underlying lexical resources are also investigated.},
	booktitle    = {Information Retrieval in Biomedicine : Natural Language Processing for Knowledge Integration},
	author       = {Kokkinakis, Dimitrios},
	year         = {2009},
	publisher    = {IGI Global },
	address      = {Hershey, Pennsylvania},
}

@inProceedings{kokkinakis-2009-shallow-94705,
	title        = {Shallow Features for Differentiating Disease-Treatment Relations using Supervised Learning, a pilot study},
	abstract     = {Clinical narratives provide an information rich, nearly unexplored corpus of evidential knowledge that is considered as a challenge for practitioners in the language technology field, particularly because of the nature of the texts (excessive use of terminology, abbreviations, orthographic term variation), the significant opportunities for clinical research that such material can provide and the potentially broad impact that clinical findings may have in every day life. It is therefore recognized that the capability to automatically extract key concepts and their relationships from such data will allow systems to properly understand the content and knowledge embedded in the free text which can be of great value for applications such as information extraction and question & answering. This paper gives a brief presentation of such textual data and its semantic annotation, and discuss the set of semantic relations that can be observed between diseases and treatments in the sample. The problem is then designed as a machine learning task in which the relations are tried to be learned in a supervised fashion, using pre-annotated data. The challenges designing the problem and empirical results are presented.},
	booktitle    = {Proceedings of the 12th International Conference TSD (Text, Speech and Dialogue). Springer Verlag, LNCS/LNAI series.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2009},
}

@article{kokkinakis-2010-complementary-125644,
	title        = {Complementary Methods for De-identifying Sensitive Data with a focus on Clinical Discourse},
	abstract     = {In the era of the Electronic Health Record (EHR) the release of individual data for research, public health planning, health care statistics, monitoring of diagnostic tests, automated data collection for health care registries and tracking disease outbreaks are some of the areas in which the protection of Personal Health Information (PHI) has become an important concern. The purpose of this study is to adapt and apply synergetic methods to document de-identification, particularly clinical, or other sources of sensitive data. The main challenge and goal of this research is to retain important concepts and PHI in the documents in a standardized and neutral manner as means of encryption without violating the integrity of the PHI and without sacrificing the quality and intended meaning of the authors.},
	journal      = {Revista de Procesamiento de Lenguaje Natural (SEPLN)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
	volume       = {45},
	pages        = {243--246},
}

@article{kokkinakis-2010-"data-130213,
	title        = {Är "data scrubbing" en användbar metod för att anonymisera känsliga patientdata?.},
	abstract     = {De senaste årens ökande användning av modern informationsteknik inom sjukvården har medfört en kraftig ökning av elektronisk dokumentation som rör patientens hälsotillstånd, vård och behandling. Vårddokumentationen blir både mer detaljerad och mer individuell, samtidigt som den uppdateras och förändras regelbundet. Patientjournalen är i första hand till för att bidra till en god och säker vård av patienten, men också en viktig informationskälla för FoU. Ett stort hinder för utnyttjandet av journalinformation som forskningskälla är de etiska och rättsliga problemen. För att kunna hantera och utnyttja dessa stora och ständigt växande informationsmängder ställs därmed högre krav på säker, skyddad och effektiv informationshantering.},
	journal      = {Svenska Läkaresällskapets Riksstämman },
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
}

@article{kokkinakis-2010-data-130212,
	title        = {Is data scrubbing useful for anonymizing sensitive data?.},
	abstract     = {The release of individual data for research, public health planning, health care statistics, monitoring of diagnostic tests, automated data collection for health care registries and tracking disease outbreaks are some of the areas in which the protection of Personal Health Information (PHI) has become an important concern. The purpose of this study is to adapt and apply synergetic methods to document de-identification, particularly in the clinical setting. The main challenge is to retain important concepts and PHI in the documents in a standardized and neutral manner as means of encryption without violating the integrity of the PHI and without sacrificing the quality and intended meaning of the authors.},
	journal      = {the Third Swedish Language Technology Conference},
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
}

@article{kokkinakis-2010-initiala-130210,
	title        = {Initiala resultat av en storskalig automatisk indexering av vetenskaplig litteratur med hela det svenska SNOMED CT - problem och möjligheter.},
	abstract     = {Syftet med denna studie är dels att skapa en stor samling svenska medicinska elektroniska texter, en korpus, och dels att validera och kvalitetssäkra existerande termer ur SNOMED CT (the Systematized NOmenclature of MEDicine - Clinical Terms) gentemot korpusinnehållet. På det sättet kan man få en objektiv uppfattning om SNOMED CT:s validitet, täckning och reliabilitet. Man kan även berika terminologin med nya termer eller termvarianter genom att automatiskt extrahera termkandidater inom olika delfackområden från korpusen med hjälp av olika statistiska och lingvistiska metoder. Resultat av de korpusbaserade, empiriska studierna ska kunna användas av terminologer i deras arbete med att göra SNOMED CT mer täckande, pålitlig och enhetlig. Samtidigt, genom användning av autentisk data, kan man försäkra sig om att termvarianterna (existerande eller nya) är vedertagna termer hos fackmän. I fall flera etablerade termvarianter (nya termkandidater) förekommer i korpusen kan dessa införas efter manuell granskning som synonymer till rekommenderade termer (med stöd av ett lämpligt granskningsgränssnitt) och därmed vidare utveckla innehållet i SNOMED CT. Följaktligen kommer vår presentation att innehålla en redovisning som bygger på tre huvudpelare – korpusuppbyggnad – termvalidering – termextrahering. Korpusen samlades in från två källor efter erhållet tillstånd. Texternas ursprung i korpusen kommer dels från Läkartidningens (LT) digitala arkiv <http://ltarkiv.lakartidningen.se> och dels från DiabetologNytts (DN) digitala arkiv <http://diabetolognytt.se/aterkommande/arkivet.html>.},
	journal      = {2010-års nationella termkonferens: Professionen i språket - språket i professionen.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
}

@inProceedings{kokkinakis-2010-korpus-119444,
	title        = {Korpus för vårdens och omsorgens fackspråk.},
	abstract     = {Inom ramen för regeringens satsning ”Nationell IT-strategi för vård och omsorg” har Socialstyrelsen fått i uppdrag att översätta och anpassa begreppssystemet ’the Systematized Nomenclature of Medicine, Clinical Terms’ till svenska. Med hjälp av Läkartidningens digitala arkiv har vi utvecklat metoder för att effektivisera kvalitetssäkringen av terminnehållet. },
	booktitle    = {Humanistdagen 2010 - humaniora i dagens samhälle.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
}

@inProceedings{kokkinakis-2011-what-141312,
	title        = {What is the Coverage of SNOMED CT® on Scientific Medical Corpora?},
	abstract     = {This paper reports on the results of a large scale mapping of SNOMED CT on scientific medical corpora. The aim is to automatically access the validity, reliability and coverage of the Swedish SNOMED-CT translation, the largest, most extensive available resource of medical terminology. The method described here is based on the generation of predominantly safe harbor term variants which together with simple linguistic processing and the already available SNOMED term content are mapped to large corpora. The results show that term variations are very frequent and this may have implication on technological applications (such as indexing and information retrieval, decision support systems, text mining) using SNOMED CT. Naïve approaches to terminology mapping and indexing would critically affect the performance, success and results of such applications. SNOMED CT appears not well-suited for automatically capturing the enormous variety of concepts in scientific corpora (only 6,3% of all SNOMED terms could be directly matched to the corpus) unless extensive variant forms are generated and fuzzy and partial matching techniques are applied with the risk of allowing the recognition of a large number of false positives and spurious results.},
	booktitle    = {Studies in Health Technology and Informatics / XXIII  International Conference of the European Federation for Medical Informatics},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
	volume       = {169},
}

@article{kokkinakis-2011-medicinska-149931,
	title        = {Medicinska terminologier - officiella standarder och verklighet},
	abstract     = {Officiella medicinska termlistor hinner aldrig bli helt kompletta 
eller uppdaterade i tid med de senaste upptäckterna inom det (bio)medicinska 
fältet
Växande behov av koppling mellan fack- och allmänspråk för praktiska 
(medicinskt orienterade) tillämpningar, t.ex. "din journal på nätet"-projektet	
Applikationer med indata som innehåller både fackspråk och allmänspråk - brist 
på täckande medicinska (elektroniska) ordböcker/termlistor med 
integrerad utförlig språklig och medicinsk information för lekmän finns inte
transkriberade patient-läkarsamtal

Använda existerande medicinska terminologier i språkteknologisk forskning som 
stöd för informationsutvinning - skapa strukturerade representationer av 
texter (samförekomstanalys; faktaextraktion och syntes; relation- och 
händelseextraktion;  t.ex. mellan sjukdom - behandling - utfall få att 
kunna få ett bra underlag för att kunna förutsäga hur framtida behandlingar slår)
Använda terminologin som ett medium för att underlätta kommunikationen 
mellan hälsotagare och hälsogivare t.ex. underlätta förståelse av 
medicinska termer av allmänheten 
},
	journal      = {Terminologiworkshop i Karlstad},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
}

@article{kokkinakis-2011-natural-149930,
	title        = {Natural language processing of clinical data with a focus on diffuse symptoms},
	abstract     = {The medical domain is well supported with a wealth of large, rich and
varied controlled vocabularies and terminological resources. This paper
investigates the extent by which the largest available medical nomenclature
for Swedish, the Systematized Nomenclature of Medicine Clinical Terms
(SNOMED CT), can handle a particularly challenging and difficult to
automatically acquire type of terminology, namely (clinical) phenotypes.
The aim of the study is to better understand phenotype contextualization in
order to improve and enhance our knowledge of communicative events in
various healthcare settings. Our approach can be seen as an exploratory one
in which we believe to yield useful insights into the nature of how findings,
symptoms and signs (i.e. clinical phenotypes in general) are expressed in
real data. This study is initiated in the context of the project "Interpretation
and understanding of functional symptoms in primary health care". The
main research goal of which is to study health care interactions with
patients suffering from Functional Somatic Syndromes (FSS). FSS are
characterized by particular constellations of medically unexplained, often
chronic symptoms, such as dizziness, fatigue, dyspepsia, muscle and joint
pain.
We use methods from the natural language processing field in order to
investigate how symptom mentions are expressed and how available
successful automated means are for capturing symptom descriptions both
on collected written (patient records) and transcribed material
(patient/nurse and patient/doctor encounters).
We manually evaluated the content of the resource on the collected data
and our results indicate that a large number of such phenotypes are
expressed using figurative language, or contextualized using a number of
variant expressions. SNOMED CT cannot easily accommodate for such
variation and vagueness expressed in real text data, unless we devise means
to handle such variation, e.g. by the use of near synonym dictionaries,
development and linking of consumer health vocabularies. The presented
research has several implications since accurate identification of
phenotypes can for instance increase the value of available data in decision
making and thus allow automatic systems to dynamically correct
inappropriate domain decisions.
We have evaluated the content of a large controlled vocabulary for Swedish
on symptom descriptions in clinical texts.},
	journal      = {Läkaresällskapets Riksstämman },
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
}

@inProceedings{kokkinakis-2011-reducing-143877,
	title        = {Reducing Complexity in Parsing Scientific Medical Data, a Diabetes Case Study},
	abstract     = {The aim of this study is to assemble and deploy various NLP components and resources in order to parse scientific medical data and evaluate the degree in which these resources contribute to the overall parsing performance. With parsing we limit our efforts to the identi-fication of unrestricted noun phrases with full phrase structure and investigate the effects of using layers of semantic annotations prior to parsing. Scientific medical texts exhibit com-plex linguistic structure but also regularities that can be captured by pre-processing the texts with specialized semantically-aware tools. Our results show evidence of improved performance while the complexity of parsing is reduced. Parsed scientific texts and inferred syntactic information can be leveraged to improve the accuracy of higher-level tasks such as information extraction and enhance the acquisition of semantic relations and events.},
	booktitle    = {Workshop: Biomedical Natural Language Processing in conjunction with Recent Advances in Natural Language Processing (RANLP). Hissar, Bulgaria.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
}

@inProceedings{kokkinakis-2011-health-141311,
	title        = {Health Portals and Clinical Phenotypes - Recognition using SNOMED CT},
	abstract     = {The medical domain is particularly well endowed with various sources of terminology. Usually, such sources vary with respect to size, structure, depth and breadth of descriptive power, granularity and applicability. This paper investigates the extent by which the largest available medical nomenclature for Swedish can cope with a particularly challenging and difficult to automatically acquire type of terminology, namely (clinical) phenotypes. We evaluated the content of the resource on extracted reference symptom lists from several popular health portals. The results indicate that a large number of such phenotypes are expressed using figurative language, or contextualized using a number of variant expressions. SNOMED CT cannot easily accommodate for such variation and vagueness expressed in real text data, unless we devise means to handle such variation, e.g. by the use of near synonym dictionaries, development and linking of consumer health vocabularies. The presented research has several implications since accurate identification of phenotypes can for instance increase the value of available data in decision making and thus allow automatic systems to dynamically correct inappropriate domain decisions.},
	booktitle    = {9th Scandinavian Conference on Health Informatics},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
}

@inProceedings{kokkinakis-2011-evaluating-139977,
	title        = {Evaluating the Coverage of three Controlled Health Vocabularies with Focus on Findings, Signs & Symptoms},
	abstract     = {The medical domain is blessed with a magnitude of terminological resources of various
characteristics, sizes, structure, depth and breadth of descriptive power, granularity etc.
In this domain a particularly interesting and difficult entity type are signs, symptoms and
findings which to a large extend are expressed in a periphrastic manner, sometimes by the use
of figurative or metaphorical language, or contextualized
using a wealth of vague variant expressions. We hypothesize therefore that no
major official terminology source alone can accommodate for the variation and complexity
present in real text data, such as electronic medical records, notes or health related documents.
In this paper we evaluate the content of the three largest medical control vocabularies available for Swedish on extracted reference symptom lists and initiate a discussion on how
we should proceed in order to accommodate for increased coverage on similar genres.
},
	booktitle    = {Workshop on Creation, Harmonization and Application of Terminology Resources Co-located with NODALIDA 2011},
	author       = {Kokkinakis, Dimitrios},
	year         = {2011},
	pages        = {5},
}

@inProceedings{kokkinakis-2012-initial-164788,
	title        = {Initial Experiments of Medication Event Extraction Using Frame Semantics},
	abstract     = {Semantic annotation of text corpora for mining complex relations and events has gained a considerable growing attention in the medical domain. The goal of this paper is to present a snapshot of ongoing work that aims to develop and apply an appropriate infrastructure for automatic event labelling and extraction in the Swedish medical domain. Annotated text samples, appropriate lexical resources (e.g. term lists and the Swedish Frame-Net++) and hybrid techniques are currently developed in order to alleviate some of the difficulties of the task. As a case study this paper presents a pilot approach based on the application of the theory of frame semantics to automatically identify and extract detailed medication information from medical texts. Medication information is often written in narrative form (e.g. in clinical records) and is therefore difficult to be acquired and used in computerized systems (e.g. decision support). Currently our approach uses a combination of generic entity and terminology taggers, specifically designed medical frames and various frame-related patterns. Future work intends to improve and enhance current results by using more annotated samples, more medically-relevant frames and combination of supervised learning techniques with the regular expression patterns.},
	booktitle    = {Scandinavian Conference on Health Informatics (SHI)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2012},
	volume       = {Linköping Electronic Conference Proceedings},
	ISBN         = {978-91-7519-758-6},
	pages        = {41--47},
}

@inProceedings{kokkinakis-2012-journal-155893,
	title        = {The Journal of the Swedish Medical Association - a Corpus Resource for Biomedical Text Mining in Swedish.},
	abstract     = {Biomedical text mining applications are largely dependent on high quality knowledge resources. Traditionally, these include lexical databases, terminologies, nomenclatures and ontologies and, during the last decade, also corpora of various sizes, variety and diversity. Some of these corpora are annotated with an expanding range of information types and metadata while others become available with a minimal set of annotations. At the same time, it is of great importance that biomedical corpora for lesser-spoken languages also get developed in order to support and facilitate the implementation of practical applications for such languages and to stimulate the development of language technology research and innovation infrastructures in the domain. This paper provides a detailed description of a Swedish biomedical corpus based on the electronic editions of the Journal of the Swedish Medical Association "Läkartidningen" of the years 1996-2010. The corpus consists of a variety of documents that can be related to different medical domains, developed as a response to the increasing needs for large and reliable medical information for Swedish biomedical NLP. The corpus has been structurally annotated with a minimal set of meta information and automatically indexed with the largest and systematically organised computer processable collection of medical terminology, the Swedish SNOMED CT (Systematized Nomenclature of Medicine -- Clinical Terms). This way topic-focused subcorpora, e.g. with diabetes-related content, can be easily developed.},
	booktitle    = {The Third Workshop on Building and Evaluating Resources for Biomedical Text Mining (BioTxtM), an LREC Workshop. Turkey.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2012},
	volume       = {Accepted},
}

@inProceedings{kokkinakis-2013-annotation-189536,
	title        = {Annotation of interpersonal relations in Swedish prose fiction.},
	abstract     = {This paper describes the manual annotation of a small sample of Swedish 19th and 20th century prose fiction with interpersonal relations between characters in six literary works. An interpersonal relationship is an association between two or more people that may range in duration from brief to enduring. The annotation is guided by a named entity recognition step. Our goal is to get an in-depth understanding of the difficulties of such a task and elaborate a model that can be applied for similar annotation on a larger scale, both manually as well as automatically. The identification of interpersonal relations can, hopefully, aid the reader of a Swedish literary work to better understand its content and plot, and get a bird’s eye view on the landscape of the core story. Our aim is to use such annotations in a hybrid context, i.e., using machine learning and rule-based methods, which, in conjunction with named entity recognition, can provide the necessary infrastructure for creating detailed biographical sketches and extracting facts for various named entities which can be exploited in various possible ways by Natural Language Processing (NLP) technologies such as summarization, question answering, as well as visual analytic techniques.},
	booktitle    = {Proceedings of the 3rd Workshop on Annotation of Corpora for Research in the Humanities (ACRH-3). Sofia, Bulgaria.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2013},
	ISBN         = {978-954-91700-5-4},
	pages        = {37--47},
}

@inProceedings{kokkinakis-2013-figurative-168227,
	title        = {Figurative Language in Swedish Clinical Texts. Potsdam, Germany},
	abstract     = {Automated processing of clinical texts with the intention to link all important text fragments to various established terminologies and ontologies for relation or event extraction is commonly faced with various less exposed, and not so regularly discussed linguistically motivated issues that needs to be addressed. One of these issues is the usage of figurative language. Figurative language, that is the use of words that go beyond their ordinary meaning, is not only a linguistically complex and challenging problem but also a problem that causes great difficulty for the field of natural language processing (NLP), both for the processing of general language and of various sublanguages, such as clinical medicine. Therefore, a comprehensive model of e.g. clinical language processing needs to account for figurative language usage and this paper provides a description towards this goal. Since the empirical, clinical data used in the study is limited in size, there is no formal distinction made between different sub-classifications of figurative language. e.g., metaphors, idioms or simile. As a matter of fact, all these types of expressions form a continuum with fuzzy boundaries, and most of the NLP-oriented approaches discussed in the past have used either very large data for the analysis or hand annotates samples, a situation that has been prohibitive so far in our project. Therefore distinction is solely based on a more general level, namely between literal versus figurative language, and on a more quantitative and corpus-based level, supported with concrete examples that illustrate several types of figurative expressions in the clinical discourse. The main research questions that this paper asks are whether there are traces of figurative language (or at least a subset of such types) in patient doctor and patient nurse interactions, how can they be found in a convenient way and whether these are transferred in the electronic health records and to what degree. 
},
	booktitle    = {Computational Semantics in Clinical Text workshop. Part of the  10th International Conference on Computational Semantics},
	author       = {Kokkinakis, Dimitrios},
	year         = {2013},
	ISBN         = {978-1-62748-398-8},
	pages        = {6},
}

@inProceedings{kokkinakis-2013-terminologihantering-189541,
	title        = {Terminologihantering i medicinska loggfiler.},
	booktitle    = {Proceedings of the "Nationell termkonferens". Göteborg},
	author       = {Kokkinakis, Dimitrios},
	year         = {2013},
}

@inProceedings{kokkinakis-2013-medical-188517,
	title        = {Medical Event Extraction using Frame Semantics - Challenges and Opportunities. Samos, Greece},
	abstract     = {Abstract. The aim of this paper is to present some findings from a study into how a large scale semantic resource, FrameNet, can be applied for event extraction in the (Swedish) biomedical domain. Combining lexical resources with domain specific knowledge provide a powerful modeling mechanism that can be utilized for event extraction and other advanced text mining-related activities. The results, from developing a rule-based approach, showed that only small discrepancies and omissions were found between the semantic descriptions, the corpus data examined and the domain-specific semantics provided by SNOMED CT (medical terminology), NPL (medicinal products) and various semi-automatically developed clue lists (e. g., domain-related abbreviations). Although the described experiment is only based on four different domain-specific frames, the methodology is extendable to the rest ones and there is much room for improvements, for instance by combining rule-based with machine learning techniques, and using more advanced syntactic representations.},
	booktitle    = {Proceedings of the 14th International Conference on Intelligent Text Processing and Computational Linguistics (CICLing)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2013},
}

@misc{kokkinakis-2016-proceedings-252412,
	title        = {Proceedings of LREC 2016 Workshop: Resources and Processing of Linguistic and Extra-Linguistic Data from People with Various Forms of Cognitive/Psychiatric Impairments (RaPID-2016), Monday 23rd of May 2016. Linköping electronic conference proceedings.},
	abstract     = {The purpose of the Workshop on “Resources and ProcessIng of linguistic and extra-linguistic Data
from people with various forms of cognitive/psychiatric impairments” (RaPID-2016) was to provide
a snapshot view of some of the current technological landscape, resources, data samples and also
needs and challenges in the area of processing various data from individuals with various types of
mental and neurological health impairments and similar conditions at various stages; increase the
knowledge, understanding, awareness and ability to achieve useful outcomes in this area and
strengthen the collaboration between researchers and workers in the field of clinical/nursing/medical
sciences and those in the field of language technology/computational linguistics/Natural Language
Processing (NLP).
Although many of the causes of cognitive and neuropsychiatric impairments are difficult to foresee
and accurately predict, physicians and clinicians work with a wide range of factors that potentially
contribute to such impairments, e.g., traumatic brain injuries, genetic predispositions, side effects of
medication, and congenital anomalies. In this context, there is new evidence that the acquisition and
processing of linguistic data (e.g., spontaneous story telling) and extra-linguistic and production
measures (e.g., eye tracking) could be used as a complement to clinical diagnosis and provide the
foundation for future development of objective criteria to be used for identifying progressive decline
or degeneration of normal mental and brain functioning.
An important new area of research in NLP emphasizes the processing, analysis, and interpretation of
such data and current research in this field, based on linguistic-oriented analysis of text and speech
produced by such a population and compared to healthy adults, has shown promising outcomes. This
is manifested in early diagnosis and prediction of individuals at risk, the differentiation of individuals
with various degrees of severity forms of brain and mental illness, and for the monitoring of the
progression of such conditions through the diachronic analysis of language samples or other extralinguistic
measurements. Initially, work was based on written data but there is a rapidly growing body
of research based on spoken samples and other modalities.
Nevertheless, there remains significant work to be done to arrive at more accurate estimates for
prediction purposes in the future and more research is required in order to reliably complement the
battery of medical and clinical examinations currently undertaken for the early diagnosis or
monitoring of, e.g., neurodegenerative and other brain and mental disorders and accordingly, aid the
development of new, non-invasive, time and cost-effective and objective (future) clinical tests in
neurology, psychology, and psychiatry.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2016},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-730-4},
}

@inProceedings{kokkinakis-2016-linguistic-243100,
	title        = {Linguistic and extra-linguistic parameters for early detection of cognitive impairment},
	abstract     = {AIM: to adapt, develop and test methods that in isolation have shown promising outcomes on tasks related to (early) detection of dementia, differentiating between various dementia types and controls and also increase our understanding of the cognitive processes that underlie written text and certain forms of spoken language production. Unlike previous models, based solely on a certain aspect of language abilities (i.e. on written or spoken language alone), the project is comprehensive and more likely to provide new insights in the area of dementia detection and improve practices applied so far. The project builds on the success stories of the past and focus on the interplay between various types of technologies that hold the potential to provide reliable estimates for the detection of cognitive decline. The project emphasizes its interdisciplinary nature, by bringing together researchers from humanities (computational linguistics / language technology), computer science and medicine, and foresees the development of a comprehensive set of novel analytic approaches not explored jointly in the past
GOAL: discovering evidence about linguistic performance and identifying whether the addition of new ways for investigating, combining and evaluating measurement and other parameters for improvement of established models can advance our understanding of: i) the boundaries between normal aging and dementia; ii) its effects on linguistic performance extrapolated from various sources and iii) whether effects of cognitive decline can be seen across (daily) language production.
},
	booktitle    = {European Summer School on Eye Movements (ESSEM),   11-17 september, 2016  Athens, Greece.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2016},
}

@misc{kokkinakis-2018-resources-265118,
	title        = {Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric impairments (RaPID-2)},
	abstract     = {Proceedings of the second RaPID: "Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive/psychiatric impairments". An LREC workshop. 8th of May 2018, Miyazaki, Japan},
	author       = {Kokkinakis, Dimitrios},
	year         = {2018},
	ISBN         = {979-10-95546-26-9},
}

@inProceedings{kokkinakis-dannells-2006-recognizing-33936,
	title        = {Recognizing Acronyms and their Definitions in Swedish Medical Texts},
	abstract     = {This paper addresses the task of recognizing acronym-definition pairs in Swedish (medical) texts as well as the compilation of a freely 
available sample of such manually annotated pairs. A material suitable not only for supervised learning experiments, but also as 
a testbed for the evaluation of the quality of future acronym-definition recognition systems. There are a number of approaches to 
the identification described in the literature, particularly within the biomedical domain, but none of those addresses the variation and 
complexity exhibited in a language other than English. This is realized by the fact that we can have a mixture of two languages in
 the same document and/or sentence, i.e. Swedish and English; that Swedish is a compound language that significantly deteriorates 
the performance of previous approaches (without adaptations) and, most importantly, the fact that there is a large variation of 
possible acronym-definition permutations realized in the analysed corpora, a variation that is usually ignored in previous studies. 
},
	booktitle    = {roceedings of the 5th Languages Resources and Evalutaion (LREC). },
	author       = {Kokkinakis, Dimitrios and Dannélls, Dana},
	year         = {2006},
}

@article{kokkinakis-edstrom-2019-alderism-284251,
	title        = {Ålderism i dagens mediala Sverige
},
	journal      = {Språkbruk},
	author       = {Kokkinakis, Dimitrios and Edström, Maria},
	year         = {2019},
	number       = {3/2019},
	pages        = {22--27},
}

@inProceedings{kokkinakis-edstrom-2019-alderism-279386,
	title        = {Ålderism i svenska nyhetsmedier.},
	abstract     = {Ålderdom existerar inte. Det finns människor som är mindre unga än andra. Det är allt.” (Simone de Beauvoir, 1908-1986).
Ålderism syftar till “fördomar eller stereotypa föreställningar som utgår från en människas ålder och som kan leda till diskriminering”. Ålderism och media är ett område som under de senaste åren har uppmärksammats på ett sätt som aldrig tidigare skett (WHO). Detta antyder på att stereotypa beskrivningar och diskriminering av individer eller grupper av individer på grund av sin kronologiska ålder i (tryckta) nyhetsmedier är ett stort problem. För ålderismstudier är det värdefullt och viktigt att förstå hur olika typer av texter och medier beskriver eller presenterar åldrande och ålderdom. Därmed är syftet med denna forskning att samla och sammanställa korpusbaserade data från olika publicerade svenska mediekällor för att kunna svara på frågan om hur utbrett fenomenet är i den svenska verkligheten och därmed kunna frambringa en mer omfattande empirisk bevisning rörande fenomenet. Två pilotstudier har genomförts; en som använde förnamn och deras frekvenser av bärarnas ålder enligt Statistiska centralbyrån (SCB) i olika synkrona on-line tidningskällor och en som använde generella mönstermatchningstekniker som tillämpades på 13 utgåvor av Göteborgs Posten (1994, 2001-13). Äldre, i vår studie, är personer ≥60 år. Preliminära, kvantitativa, resultat tyder på att det finns tydliga och konsekventa skillnader i hur olika åldersgrupper representeras i dessa medier. Ett tydligt band visar att omnämnanden av 25-52-åringar är mycket överrepresenterat än den svenska befolkningspyramiden säger att de borde (SCB). Medan 0-24-åringar och personer över 52 är underrepresenterade. Mönstermatchning pekar åt liknande resultat med undantag av dödsannonser där omnämnanden om äldre är mycket vanligare. Vår pilotstudie bekräftar den introspektiva synen på underrepresentation av ålderdom och äldre i synkrona mediekällor. Men fler studier krävs och inom den närmaste tiden planerar vi att förbättra, skala upp och tillämpa språkteknologisk metodik på både synkronisk och diakronisk textkorpora och därmed få ett nytt och bredare perspektiv på skillnader och trender om åldrandet och äldre och vad olika publicerade källor ur en större tidsperiod kan avslöja.},
	booktitle    = {Svenskans beskrivning 37, 8–10.5.2019, Åbo, Finland.},
	author       = {Kokkinakis, Dimitrios and Edström, Maria},
	year         = {2019},
}

@inProceedings{kokkinakis-eklund-2013-query-189552,
	title        = {Query Logs as a Corpus.},
	abstract     = {This paper provides a detailed description of a large Swedish health-related query log corpus and explores means to derive useful statistics, their distributions and analytics from its content across several dimensions. Information acquisition from query logs can be useful for several purposes and potential types of users, such as terminologists, infodemiologists / epidemiologists, medical data and web analysts, specialists in NLP technologies such as information retrieval and text mining but also public officials in health and safety organizations.},
	booktitle    = {Corpus Linguistics 2013 : abstract book. Lancaster: UCREL},
	editor       = {Andrew Hardie and Robbie Love},
	author       = {Kokkinakis, Dimitrios and Eklund, Ann-Marie},
	year         = {2013},
	pages        = {329},
}

@inProceedings{kokkinakis-etal-2012-literacy-164587,
	title        = {Literacy Demands and Information to Cancer Patients},
	abstract     = {This study examines language complexity of written health information materials for patients undergoing colorectal cancer surgery. Written and printed patient information from 28 Swedish clinics are automatically analyzed by means of language technology. The analysis reveals different problematic issues that might have impact on readability. The study is a first step, and part of a larger project about patients’ health information seeking behavior in relation to written information material. Our study aims to provide support for producing more individualized, person centered information materials according to preferences for complex and detailed or legible texts and thus enhance a movement from receiving information and instructions to participating in knowing. In the near future the study will continue by integrating focus groups with patients that may provide valuable feedback and enhance our knowledge about patients’ use and preferences of different information material.},
	booktitle    = {Proceedings of the 15th International Conference on Text, Speech and Dialogue},
	author       = {Kokkinakis, Dimitrios and Forsberg, Markus and Johansson Kokkinakis, Sofie and Smith, Frida and Öhlén, Joakim},
	year         = {2012},
	ISBN         = {978-364232789-6},
}

@inProceedings{kokkinakis-gerdin-2009-kvalitetssakring-105141,
	title        = {Kvalitetssäkring av SNOMED CT med hjälp av Läkartidningens arkiv. },
	abstract     = {Inom ramen för regeringens satsning ”Nationell IT-strategi för vård och omsorg” har Socialstyrelsen fått i uppdrag att översätta och anpassa begreppssystemet ’the Systematized Nomenclature of Medicine, Clinical Terms’ (SNOMED CT) till svenska. Arbetet är både omfattande och tidskrävande samtidigt som uppdragstagaren har krav om kvalitetssäkring av översättningen.

Hur kan Läkartidningens arkiv bidra till kvalitetssäkringen?
Med hjälp av Läkartidningens digitala arkiv, LDA, (årgångarna 1996-2009) har vi utvecklat metoder för att effektivisera kvalitetssäkringen av olika SNOMED CT-urval (t.ex. diabetestermer). Det innebär att vi underlättar för utförandet av empiriska, SNOMED CT-relaterade studier, som t.ex. framtagning av underlag om termernas användning, variation och frekvensdistribution över tid.

Arkivets förädling: LDA:t omvandlades till ett enhetligt textbaserat format och textinnehållet normaliserades med avseenden på dokumentformat och teckenkodning för att kunna skapa ett bra underlag för den efterföljande språkteknologiska analysen. Alla artiklar i varje publicerad årgång extraherades och märktes upp dels med olika slags metainformation (t.ex. genretillhörighet) dels med lingvistisk och semantisk information, sammanlagt 27 000 artiklar. Den språkteknologiska bearbetningen innefattade automatiskt tillägg av lingvistisk information som t.ex. ordklasstillhörighet för varje ord i korpusen och automatiskt, semantisk mappning dels till den svenska MeSH-tesaurusen och dels till delar av den svensköversatta SNOMED-hierarkin.

LDA i en ny skepnad: LDA utgör sedan länge en värdefull svensk medicinsk resurs för alla som yrkesmässigt jobbar med termer och språk. Vi har dock bidragit med att göra textmaterialet ännu mer välstrukturerat och förädlat, som kan vara till hjälp för explorativa studier där sökningar kan förfinas på ett flertal sätt och därmed ge forskare möjligheter att göra djupare innehållsanalyser av texterna och samla grundläggande kunskaper inom olika ämnesområden. Kombinationen av enstaka termer och ord med lingvistisk och semantisk information ger unika möjligheter till att skaffa information och generera fakta som kan leda till nya hypoteser och eventuellt ny kunskap om olika aspekter som gäller termanvändning och variation och vi kommer att redovisa exempel på sådana analyser.
},
	booktitle    = {Svenska Läkaresällskapets Riksstämman },
	author       = {Kokkinakis, Dimitrios and Gerdin, Ulla},
	year         = {2009},
}

@article{kokkinakis-gerdin-2009-issues-105140,
	title        = {Issues on Quality Assessment of SNOMED CT® Subsets - Term Validation and Term Extraction},
	abstract     = {The aim of this paper is to apply and develop methods based
on Natural Language Processing for automatically testing the
validity, reliability and coverage of various Swedish
SNOMED-CT subsets, the Systematized NOmenclature of
MEDicine - Clinical Terms a multiaxial, hierarchical
classification system which is currently being translated from
English to Swedish. Our work has been developed across two
dimensions. Initially a Swedish electronic text collection of
scientific medical documents has been collected and
processed to a uniform format. Secondly, a term processing
activity has been taken place. In the first phase of this activity,
various SNOMED CT subsets have been mapped to the text
collection for evaluating the validity and reliability of the
translated terms. In parallel, a large number of term
candidates have been extracted from the corpus in order to
examine the coverage of SNOMED CT. Term candidates that
are currently not included in the Swedish SNOMED CT can
be either parts of compounds, parts of potential multiword
terms, terms that are not yet been translated or potentially new
candidates. In order to achieve these goals a number of
automatic term recognition algorithms have been applied to
the corpus. The results of the later process is to be reviewed
by domain experts (relevant to the subsets extracted) through
a relevant interface who can decide whether a new set of
terms can be incorporated in the Swedish translation of
SNOMED CT or not.
},
	journal      = {Proceedings of RANLP-2009 Workshop: Biomedical Information Extraction.},
	author       = {Kokkinakis, Dimitrios and Gerdin, Ulla},
	year         = {2009},
}

@article{kokkinakis-gerdin-2009-uppbyggandet-105136,
	title        = {Uppbyggandet av en svensk medicinsk korpus för termvalidering och termextrahering - hur bra täcker SNOMED CT olika delfackområden?},
	abstract     = {Syftet med denna studie är dels att skapa en stor samling svenska medicinska elektroniska texter,
en korpus, och dels att validera och kvalitetssäkra existerande termer ur SNOMED CT (the
Systematized NOmenclature of MEDicine - Clinical Terms) gentemot korpusinnehållet. På det
sättet kan man få en objektiv uppfattning om SNOMED CT:s validitet, täckning och reliabilitet.
Man kan även berika terminologin med nya termer eller termvarianter genom att automatiskt
extrahera termkandidater inom olika delfackområden från korpusen med hjälp av olika statistiska
och lingvistiska metoder. Resultat av de korpusbaserade, empiriska studierna ska kunna användas
av terminologer i deras arbete med att göra SNOMED CT mer täckande, pålitlig och enhetlig.
Samtidigt, genom användning av autentisk data, kan man försäkra sig om att termvarianterna
(existerande eller nya) är vedertagna termer hos fackmän. I fall flera etablerade termvarianter
(nya termkandidater) förekommer i korpusen kan dessa införas efter manuell granskning som
synonymer till rekommenderade termer (med stöd av ett lämpligt granskningsgränssnitt) och
därmed vidare utveckla innehållet i SNOMED CT. Följaktligen kommer vår presentation att
innehålla en redovisning som bygger på tre huvudpelare – korpusuppbyggnad – termvalidering –
termextrahering. Korpusen samlades in från två källor efter erhållet tillstånd. Texternas ursprung
i korpusen kommer dels från Läkartidningens (LT) digitala arkiv
<http://ltarkiv.lakartidningen.se> och dels från DiabetologNytts (DN) digitala arkiv
<http://diabetolognytt.se/aterkommande/arkivet.html>.
},
	journal      = {2009 års nationella termkonferens Språk och Kommunikation},
	author       = {Kokkinakis, Dimitrios and Gerdin, Ulla},
	year         = {2009},
}

@article{kokkinakis-gerdin-2010-lakartidningens-120480,
	title        = {Läkartidningens arkiv i en ny skepnad - En resurs för forskare, läkare och allmänhet},
	abstract     = {I Sverige har det tagits fram en medicinsk korpus baserad på Läkartidningens digitala arkiv. Denna resurs möjliggör precisa sökningar
och värdefull tillgång till medicinsk terminologisk information på olika nivåer. Dimitrios Kokkinakis från Göteborgs universitet
och Ulla Gerdin från Socialstyrelsen presenterar projektet.
},
	journal      = {Språkbruk},
	author       = {Kokkinakis, Dimitrios and Gerdin, Ulla},
	year         = {2010},
	volume       = {1/2010},
	pages        = {22--28},
}

@inProceedings{kokkinakis-gerdin-2010-swedish-113194,
	title        = {A Swedish Scientific Medical Corpus for Terminology Management and Linguistic Exploration},
	abstract     = {This paper describes the development of a new Swedish scientific medical corpus. We provide a detailed description of the characteristics of this new collection as well results for a number of term management tasks, including terminology validation and terminology extraction based on this material. Although the corpus is representative for the scientific medical domain it still covers a lot of specialised sub-disciplines such as “diabetes” and “osteoporosis” which makes it suitable for facilitating the production of smaller and more focused subcorpora. We have tried to address this issue by making explicit some features of the corpus in order to demonstrate the corpus usefulness particularly for the quality assessment of official terminologies such as the Systematized NOmenclature of MEDicine - Clinical Terms (SNOMED CT).},
	booktitle    = {Proceedings of the 7th international conference on Language Resources and Evaluation (LREC), Malta},
	author       = {Kokkinakis, Dimitrios and Gerdin, Ulla},
	year         = {2010},
}

@inProceedings{kokkinakis-grahn-2014-corpus-209807,
	title        = {A corpus-based approach to the identification of non-literal language in a medical setting.},
	abstract     = {Automated processing of clinical texts is commonly faced with various less exposed, and not
so regularly discussed linguistically complex problems that need to be addressed. One of these issues
concerns the usage of figurative language. Figurative language implies the use of words that
go beyond their ordinary meaning, a linguistically complex and challenging problem and also a
problem that causes great difficulty for the field of natural language processing (NLP). The problem
is equally prevalent in both general language and also in various sublanguages, such as clinical
medicine. Therefore we believe that a comprehensive model of e.g. clinical language processing
needs to account for figurative language usage, and this paper provides a description, and preliminary
results towards this goal. Since the empirical, clinical data used in the study is limited in size,
there is no formal distinction made between different sub-classifications of figurative language. e.g.,
metaphors, idioms or simile. We illustrate several types of figurative expressions in the clinical discourse
and apply a rather quantitative and corpus-based level analysis. The main research questions
that this paper asks are whether there are traces of figurative language (or at least a subset of such
types) in patient-doctor and patient-nurse interactions, how can they be found in a convenient way
and whether these are transferred in the electronic health records and to what degree.},
	booktitle    = {Proceedings of the Conference on Communication, Medicine and Ethics (COMET), Lugano, 26-28 June 2014},
	author       = {Kokkinakis, Dimitrios and Grahn, Inga-Lill},
	year         = {2014},
	pages        = {1},
}

@inProceedings{kokkinakis-etal-2014-vocation-209808,
	title        = {Vocation Identification in Swedish Fiction. },
	abstract     = {This paper presents a system for automatic annotation of vocational signals in 19th century Swedish prose fiction. Besides vocation identification, the system assigns gender (male, female, unknown) to the vocation words. Since gender is a prominent attribute of first names, we apply a named-entity recognizer (NER) that uses first name gazetteers where each name has been pre-assigned gender, which aids gender assignment to vocations with unknown gender if appropriate context is available. We also use a statistical modelling method, conditional random fields (CRF), for learning gender-assigned vocations in combination with the results of the NER and other pattern matching techniques. The purpose of this work is to develop and apply tools to literature as means to expand our understanding of history in the area of literature-based gender studies, e.g. investigate how women enter literature, which functions do they assume and their working patterns. Vocation identification can be used as one such indicator for achieving some these goals.},
	booktitle    = {Proceedings of the Fifth Swedish Language Technology Conference (SLTC)},
	author       = {Kokkinakis, Dimitrios and Ighe, Ann and Malm, Mats},
	year         = {2014},
	pages        = {3},
}

@inProceedings{kokkinakis-etal-2015-gender-215535,
	title        = {Gender-Based Vocation Identification in Swedish 19th Century Prose Fiction using Linguistic Patterns, NER and CRF Learning},
	abstract     = {This paper investigates how literature could be used as a means to expand our understanding of history. By applying macroanalytic techniques we are aiming to investigate how women enter literature and particularly which functions they assume, their working patterns and if we can spot differences in how often male and female characters are mentioned with various types of occupational titles (vocation) in Swedish literary texts. Modern historiography, and especially feminist and women’s history has emphasized a relative invisibility of women’s work and women workers. The reasons behind this are manifold, and the extent, the margin of error in terms of women’s work activities is of course hard to assess. Therefore, vocation identification can be used as an indicator for such exploration and we present a hybrid system for automatic annotation of vocational signals in 19th century Swedish prose fiction. Beside vo-cations, the system also assigns gender (male, female or unknown) to the vocation words, a prerequisite for the goals of the study and fu-ture in-depth explorations of the corpora.},
	booktitle    = {Proceedings of the Fourth Workshop on Computational Linguistics for Literature (Clfl). Co-located with the NAACL/HLT. Denver, Colorado, USA},
	author       = {Kokkinakis, Dimitrios and Ighe, Ann and Malm, Mats},
	year         = {2015},
	pages        = {9},
}

@techreport{kokkinakis-johanssonkokkinakis-1998-cascaded-56209,
	title        = {A Cascaded Finite-State Parser for Syntactic Analysis of Swedish},
	author       = {Kokkinakis, Dimitrios and Johansson Kokkinakis, Sofie},
	year         = {1998},
	publisher    = {Svenska språket},
	address      = {Göteborg},
}

@article{kokkinakis-johanssonkokkinakis-1999-cascaded-56216,
	title        = {A Cascaded Finite-State Parser for Syntactic Analysis of Swedish},
	journal      = {European Chapter of the Association of Computational Linguistics (EACL)},
	author       = {Kokkinakis, Dimitrios and Johansson Kokkinakis, Sofie},
	year         = {1999},
}

@techreport{kokkinakis-johanssonkokkinakis-1999-sense-56213,
	title        = {Sense Tagging at the Cycle-Level Using GLDB},
	author       = {Kokkinakis, Dimitrios and Johansson Kokkinakis, Sofie},
	year         = {1999},
}

@article{kokkinakis-johanssonkokkinakis-1999-automatisk-56218,
	title        = {Automatisk betydelseidentifiering på cykelnivå m.h.a. GLDB},
	journal      = {Proceedings från NFL Symposiet (Nordisk Förening i Lexikografi) och Nordiska studier i Lexikografi},
	author       = {Kokkinakis, Dimitrios and Johansson Kokkinakis, Sofie},
	year         = {1999},
}

@inProceedings{kokkinakis-etal-2012-contextualisation-155530,
	title        = {Contextualisation of functional symptoms in primary health care},
	abstract     = {Background: a number of patients consulting primary health care have physical symptoms that may be labeled “medically unexplained”, i.e. absence of a demonstrable organic etiology. Common functional somatic symptoms (FSS) are irritable bowel, tension headache and chronic fatigue. FSS-patients are generally frustrated with the inability of health care to alleviate their illness. Health care staff often also feel frustration. The communication between patient and care giver is the key for coming to terms with the problem. Objective: to investigate how complex, vague and long-standing symptoms with no identified organic cause are put into context, interpreted and acted upon in primary health-care interactions. Two types of interventions are envisaged (i) methods for early identification of patients at risk of entering a vicious circle of functional symptoms and (ii) methods for re-interpreting symptoms in alternative and more purposeful ways. Methods: the project studies interactions between patients and nurses giving advice over telephone, consultations between patients and physicians, interviews and study patients' medical case notes. Eligible patients (18-65 y.o.) contact their primary health care centre by telephone, have had at least eight physical consultations with nurses or physicians in the last 12 months and if a majority of the symptoms within this time span had no clear organic or psychiatric cause. The project contains a number of subprojects, according to the type of data collected. Several methods of analysis will be used, mainly critical discourse analysis, phenomenologic-hermeneutic and computation linguistic analyses. (Expected) Results: using the collected data, we describe characteristics of the communication that takes place in these settings and the way symptoms and diseases are represented. This will facilitate the development of future interventions aimed at decreasing the morbidity due to FSS and give further insights into the problem. 
},
	booktitle    = {The 5th GENEVA Conference on Person-Centered Medicine. Geneva, Switzerland. },
	author       = {Kokkinakis, Dimitrios and Lidén, Eva and Svensson, Staffan and Björk Brämberg, Elisabeth and Määttä, Sylvia},
	year         = {2012},
}

@inProceedings{kokkinakis-lundholmfors-2019-"hund-279384,
	title        = {"hund, katt, ko...": Semantiskt ordflödestest som indikator på kognitiv nedsättning hos äldre.},
	abstract     = {Ordflödestest är en typ av test som ofta ingår vid språkliga och neuropsykologiska utredningar, och de används för att bedöma språkliga förmågor, så som ordmobilisering, och exekutiva funktioner, så som verbalt arbetsminne och bearbetningshastighet. Vid ett fonologiskt ordflödestest får personen i uppgift att på en begränsad tid (oftast 60 sekunder) producera så många ord som möjlighet som börjar med en viss bokstav (ofta F, A och S), medan vid ett semantiskt ordflödestest får personen istället i uppgift att producera ord som tillhör en viss kategori (t ex djur eller grönsaker). Dessa tester tar liten tid att genomföra, är lätta att administrera och ger värdefull information om kognitiva färdigheter och begränsningar.  Tidigare forskning har visat att ordflödestester har hög reliabilitet och är känsliga för kognitiva nedsättningar. Vid analys av testen mäts traditionellt enbart antalet korrekta ord som producerats, men med hjälp av digital ljudinspelning samt den utveckling som skett inom språkteknologi kan man nu göra mer detaljerade analyser och få ny information om de strategier man använder vid exempelvis ordgenereringen; nämligen klustring (produktion av en grupp relaterade ord inom den redan identifierade subkategorin) och växling (sökning efter och växling till nya subkategorier). I vår forskning studerar vi bl.a. semantiskt ordflödestest som nyanserad indikator på olika aspekter av exekutiva och språkliga förmågor hos personer med degenerativa lindriga eller milda kognitiva nedsättningar samt en kontrollgrupp med kognitivt friska individer. Studien kommer presentera detaljer av vår språkteknologiska analys, visa på de skillnader som finns mellan grupperna och de samband som eventuellt finns med andra, redan genomförda, neuropsykiatriska tester för samma population.},
	booktitle    = {Svenskans beskrivning 37, 8–10.5.2019, Åbo, Finland.},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina},
	year         = {2019},
}

@inProceedings{kokkinakis-etal-2016-specifications-243183,
	title        = {Specifications and Methodology for Language-Related Data Acquisition and Analysis in the Domain of Dementia Diagnostics},
	abstract     = {This paper outlines the initial stages of a project that aims to build and use a corpus with data samples acquired from people diagnosed with subjective or mild cognitive impairment and healthy, age-matched controls. The data we are currently collecting consists of audio-recorded spoken language samples; transcripts of the audio recordings and eye tracking measurements. From these data we plan to extract, evaluate and model features to be used for learning classification models in order to test how well a differentiation between the aforementioned subject groups can be made. Features will be also correlated with outcomes from e.g. other language-related scores, such as word fluency, in order to investigate whether there are relationships between various variables.},
	booktitle    = { The Sixth Swedish Language Technology Conference (SLTC) Umeå University, 17-18 November, 2016},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Björkner, Eva and Nordlund, Arto},
	year         = {2016},
}

@inProceedings{kokkinakis-etal-2017-data-256955,
	title        = {Data Collection from Persons with Mild Forms of Cognitive Impairment and Healthy Controls - Infrastructure for Classification and Prediction of Dementia},
	abstract     = {Cognitive and mental deterioration, such as difficulties with memory and language, are some of the typical phenotypes for most neurodegenerative diseases including Alzheimer’s disease and other dementia forms. This paper describes the first phases of a project that aims at collecting various types of cognitive data, acquired from human subjects in order to study relationships among linguistic and extra-linguistic observations. The project’s aim is to identify, extract, process, correlate, evaluate, and disseminate various linguistic phenotypes and measurements and thus contribute with complementary knowledge in early diagnosis, monitor progression, or predict individuals at risk. In the near future, automatic analysis of these data will be used to extract various types of features for training, testing and evaluating automatic classifiers that could be used to differentiate individuals with mild symptoms of cognitive impairment from healthy, age-matched controls and identify possible indicators for the early detection of mild forms of cognitive impairment. Features will be extracted from audio recordings (speech signal), the transcription of the audio signals (text) and the raw eye-tracking data.},
	booktitle    = {Proceedings of the 21st Nordic Conference on Computational Linguistics, NoDaLiDa, 22-24 May 2017, Gothenburg, Sweden},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Björkner, Eva and Nordlund, Arto},
	year         = {2017},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping},
	ISBN         = {978-91-7685-601-7},
}

@inProceedings{kokkinakis-etal-2018-textforskning-265113,
	title        = {Kan textforskning bidra till tidigare och säkrare demensdiagnostik?},
	abstract     = {Tidigare forskning har visat att subtila språkstörningar kan finnas vid de tidigaste förstadierna till demens, flera år innan en klinisk diagnos kan ställas. Inom ramen för projektet ”Språkliga och extra-lingvistiska parametrar för tidig upptäckt av kognitiv svikt” (finansierat av Riksbankens Jubileumsutlysning, 2016-19) undersöker vi med hjälp av språkteknologi och språkanalysstudier hur dessa språkstörningar yttrar sig. Kan språkteknologi användas för att upptäcka dessa tidiga språkrelaterade symtom och därmed bidra med nyanserad, komplementär och användbar kunskap? Kan användning av språkteknologi särskilja personer med de allra tidigaste kognitiva avvikelserna från personer med mer godartad, åldersrelaterad kognitiv svikt? Vilka språkliga förmågor drabbas? Hur yttrar sig dessa förändringar och vilka slags empiriska material finns att tillgå? Dessa är några av de frågor vi söker svar på. Vi gör inspelningar som vi analyserar för att kunna ta fram ny kunskap om subtila språkliga kännetecken som kan föregå demensutveckling. Denna kunskap kan användas för att eventuellt kunna förutsäga vilka individer som befinner sig i riskzonen för att utveckla demens, och kan vara användbar som komplementerande beslutsunderlag till domänexperter. Vi utvinner, analyserar och undersöker om det finns samband mellan olika språkrelaterade parametrar från spontan talinteraktion, transkriptioner men även ögonrörelser och neuropsykologiska tester från personer med subjektiv eller lindrig kognitiv nedsättning och friska kontrollpersoner. Många gånger är det svårt att avgöra huruvida lindriga kognitiva symtom är en del av det normala åldrandet eller början på en neurodegenerativ process. Vi förväntar oss inte heller att varje enskild person med kognitiv nedsättning kommer att uttrycka sig eller läsa på samma sätt utan snarare att dessa personer tidigt i sjukdomsförloppet kommer att börja uppvisa olika slags avvikande läsmönster, eller göra fonologiska, lexikala, syntaktiska eller semantiska fel. I studien utvecklar vi verktyg för att automatiskt hitta dessa avvikelser, och målet är att detta sedan ska kunna användas som komplement till tidig diagnostik samt som prognostiskt eller screeningverktyg. Deltagarna i vår studie har rekryterats från en pågående longitudinell studie, ”Demens i Tidigt Skede”, (eng. ”The Gothenburg MCI study”) på Minnesmottagningen i Göteborg, och vårt projekt har godkänts av den lokala etiknämnden. Alla deltagare i studien (kontrollgruppen [HC], personer med subjektiv kognitiv nedsättning [SCI] och personer med mild kognitiv nedsättning [MCI]) har genomgått baslinjeundersökning och gett informerat skriftligt samtycke (demografisk information finns i tabell 1). Vårt projekt är f.n. pågående och vi kommer presentera resultat baserade på inspelningstillfälle nr ett (aug. 2016-mars 2017). En ny inspelningsomgång, med samma deltagare, började i februari 2018 och förväntas vara avslutat i december 2018. Under presentationen kommer vi ge exempel på olika tal-, text- och ögonrörelseanalyser vi har genomfört och diskutera metodval och resultat baserade på studiens första fas. Vi kommer vidare ge en kort inblick i den nya, pågående inspelningsomgången och de nya testmoment vi använder. Vi vill med vårt arbete visa hur språkteknologisk analys kan bidra till att utöka vår kunskap inom området så att den kan vara användbar för tidig diagnostik och optimal omvårdnad. Enligt Socialstyrelsen (2017) finns det i Sverige över 160 000 personer med någon demenssjukdom. Våra resultat kan ha en betydelse för vårdpersonal som snabbare vill diagnostisera och identifiera individer med olika former av kognitiv funktionsnedsättning innan allvarliga symtom blir påtagliga. Utvecklingsmöjligheterna är många: nya eller förbättrade kognitiva screeningtester som skulle kunna användas inom primär- och specialistvården, samt utveckling och tillämpning av insatser som kan påverka beteendemönster och träna upp individens kommunikativa förmåga, kan på sikt leda till positiva konsekvenser som minskade vårdköer samt effektivare behandling avseende kostnader och behandlingsutfall.},
	booktitle    = {Forum för textforskning 13 , Lund 7 – 8 juni 2018},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Eckerström, Marie and Themistocleous, Charalambos},
	year         = {2018},
}

@inProceedings{kokkinakis-etal-2019-multifaceted-278217,
	title        = {A Multifaceted Corpus for the Study of Cognitive Decline in a Swedish Population},
	abstract     = {A potential, early-stage diagnostic marker for neurodegenerative diseases, such as Alzheimer’s disease, is the onset of language disturbances which is often characterized by subtle word-finding difficulties, impaired spontaneous speech, slight speech hesitancy, object naming difficulties and phonemic errors. Connected speech provides valuable information in a non-invasive and easy-to-assess way for determining aspects of the severity of language impairment. Data elicitation is an established method of obtaining highly constrained samples of connected speech that allows us to study the intricate interactions between various linguistic levels and cognition. In the paper, we describe the collection and content of a corpus consisting of spontaneous Swedish speech from individuals with Mild Cognitive Impairment (MCI), with Subjective Cognitive Impairment SCI) and healthy, age-matched controls (HC). The subjects were pooled across homogeneous subgroups for age and education, a sub-cohort from the Gothenburg-MCI study. The corpus consists of high quality audio recordings (including transcriptions) of several tasks, namely:
(i)	a picture description task – the Cookie-theft picture, an ecologically valid approximation to spontaneous discourse that has been widely used to elicitate speech from speakers with different types of language and communication disorders; 
(ii)	a read aloud task (including registration of eye movements) – where participants read a text from the IREST collection twice, both on a computer screen (while eye movements are registered), and the same text on paper;
(iii)	a complex planning task – a subset of executive functioning that tests the ability to identify, organize and carry out (complex) steps and elements that are required to achieve a goal;
(iv)	a map task – a spontaneous speech production/semi-structured conversation in which the participants are encouraged to talk about a predefined, cooperative task-oriented topic;
(v)	a semantic verbal fluency task – category animals: where participants have to produce as many words as possible from a category in a given time (60 seconds). The fluency tests require an elaborate retrieval of words from conceptual (semantic) and lexical (phonetic) memory involving specific areas of the brain in a restricted timeframe. 
All samples are produced by Swedish speakers after obtaining written consent approved by the local ethics committee. Tasks (i) and (ii) have been collected twice in a diachronically apart period of 18 months between 2016 and 2018.
The corpus represents an approximation to speech in a natural setting: The material for elicitation is controlled in the sense that the speakers are given specific tasks to talk about, and they do so in front of a microphone. The corpus may serve as a basis for many linguistic and/or speech technological investigations and has being already used for various investigations of language features.},
	booktitle    = {CLARe4 : Corpora for Language and Aging Research, 27 February – 1 March 2019, Helsinki, Finland},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Fraser, Kathleen and Eckerström, Marie and Horn, Greta and Themistocleous, Charalambos},
	year         = {2019},
}

@inProceedings{kokkinakis-etal-2018-swedish-262851,
	title        = {A Swedish Cookie-Theft Corpus},
	abstract     = {Language disturbances can be a diagnostic marker for neurodegenerative diseases, such as Alzheimer’s disease, at earlier stages, and connected speech analysis provides a non-invasive and easy-to-assess measure for determining aspects of the severity of language impairment. In this paper we focus on the development of a corpus consisting of audio recordings of picture descriptions of the Cookie-theft, produced by Swedish speakers, and accompanying transcriptions. The speech elicitation procedure provides an established method of obtaining highly constrained samples of connected speech that can allow us to study the intricate interactions between various linguistic levels and cognition. We chose the Cookie-theft picture since it is a standardized test that has been used in various studies in the past, and therefore comparisons can be made based on previous results. This type of picture description task might be useful for detecting subtle language deficits in patients with subjective and mild cognitive impairment. The resulting corpus is a new, rich and multi-faceted resource for the investigation of linguistic characteristics of connected speech and a unique data set that provides a rich resource for (future) research and experimentation in many areas, and of language impairment in particular. The information in the corpus can also be combined and correlated with other collected data about the speakers, such as neuropsychological tests, imaging and brain physiology markers and cerebrospinal fluid markers.},
	booktitle    = {LREC 2018, 11th edition of the Language Resources and Evaluation Conference, 7-12 May 2018, Miyazaki (Japan) / Editors:  Nicoletta Calzolari (Conference chair), Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Koiti Hasida, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis, Takenobu Tokunaga},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Fraser, Kathleen and Nordlund, Arto},
	year         = {2018},
	publisher    = {European Language Resources Association},
	ISBN         = {979-10-95546-00-9},
}

@inProceedings{kokkinakis-etal-2016-data-243069,
	title        = {Data Resource Acquisition from People at Various Stages of Cognitive Decline – Design and Exploration Considerations},
	abstract     = {In this paper we are introducing work in progress towards the development of an infrastructure (i.e., design, methodology, creation and description) of linguistic and extra-linguistic data samples acquired from people diagnosed with subjective or mild cognitive impairment and healthy, age-matched controls. The data we are currently collecting consists of various types of modalities; i.e. audio-recorded spoken language samples; transcripts of the audio recordings (text) and eye tracking measurements. The integration of the extra-linguistic information with the linguistic phenotypes and measurements elicited from audio and text, will be used to extract, evaluate and model features to be used in machine learning experiments. In these experiments, classification models that will be trained, that will be able to learn from the whole or a subset of the data to make predictions on new data in order to test how well a differentiation between the aforementioned groups can be made. Features will be also correlated with measured outcomes from e.g. language-related scores, such as word fluency, in order to investigate whether there are relationships between various variables.},
	booktitle    = {The Seventh International Workshop on Health Text Mining and Information Analysis (Louhi). November 5, 2016, Austin, Texas, USA},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Nordlund, Arto},
	year         = {2016},
}

@inProceedings{kokkinakis-malm-2011-character-143875,
	title        = {Character Profiling in 19th Century Fiction},
	abstract     = {This paper describes the way in which personal relationships between main characters in 19th century Swedish prose fiction can be identified using information guided by named entities, provided by a entity recognition system adapted to the 19th century Swedish language characteristics. Interpersonal relation extraction is based on the context between two relevant, identified person entities. The extraction process of the relationships also utilize the content of on-line available lexical semantic resources (suitable vocabularies) and fairly standard context matching methods that provide a basic mechanism for identifying a wealth of interpersonal relations that hopefully can aid the reader of a 19th-century Swedish literary work to better understand its content and plot, and get a bird’s eye view on the landscape of the core story.},
	booktitle    = {Workshop: Language Technologies for Digital Humanities and Cultural Heritage in conjunction with the Recent Advances in Natural Language Processing (RANLP). Hissar, Bulgaria.},
	author       = {Kokkinakis, Dimitrios and Malm, Mats},
	year         = {2011},
}

@inProceedings{kokkinakis-malm-2013-macroanalytic-188518,
	title        = {A Macroanalytic View of Swedish Literature using Topic Modeling.},
	abstract     = {New research opportunities are plentiful for digital and literature scholars who are currently faced with increasingly large portions of large digitized archives produced during the last decades. Conventional methods of analysis involving a so called close reading view are not enough. Distant reading or macroanalysis is proposed instead, as a better, viable and more pragmatic alternative to the traditional methods of analyzing e.g., literature. According to this view, understanding literature is not accomplished by studying individual texts, but by aggregating and analyzing massive amounts of data. Therefore, applying macroanalytic methods and technologies is a priority among many research groups in the humanities worldwide. In this paper we explore topic modeling, an increasingly popular statistical method used for uncovering themes, topics and patterns in large amounts of text. We use available topic modeling software and, as empirical data, the content of the Swedish literature bank, a constantly growing body of Swedish fiction corpus from the 18th and 19th century. We present preliminary results on a sample of this corpus and discuss how humanistic research can be conducted through this type of computation, as a means to identify potential issues of interest e.g., for historians.},
	booktitle    = {Corpus Linguistics 2013 : abstract book (Lancaster)},
	editor       = {Andrew Hardie and Robbie Love},
	author       = {Kokkinakis, Dimitrios and Malm, Mats},
	year         = {2013},
}

@inProceedings{kokkinakis-malm-2015-detecting-225762,
	title        = {Detecting Reuse of Biblical Quotes in Swedish 19th Century Fiction using Sequence Alignment},
	abstract     = {Text reuse, a form of text repetition, recycling or borrowing, is a theoretically and practically interesting problem that has attracted considerable attention during the last years e.g. in the cultural heritage context (historical and comparative linguistics); in the context of social network propagation of ideas and in the measuring of journalistic reuse. In this paper we briefly outline and experiment with a method used for biological sequence alignment that have been also used in humanities research for e.g. the detection of similar passages in the complete works of Voltaire and 18th century French encyclopedias or for tracing how and which ideas spread in 19th century US-newspaper collections. We use available software (text-PAIR: Pairwise Alignment for Intertextual Relations) and experiment with the Charles XII Bible translation into Swedish, completed in 1703, against the content of the Swedish prose fiction 1800-1900, in order to automatically detect passages taken from this particular Bible translation in the prose fiction corpus.},
	booktitle    = {Corpus-based Research in the Humanities workshop (CRH), 10 December 2015  Warsaw, Poland },
	author       = {Kokkinakis, Dimitrios and Malm, Mats},
	year         = {2015},
	ISBN         = {978-83-63159-19-1},
	pages        = {79--86},
}

@inProceedings{kokkinakis-etal-2014-semantics-209802,
	title        = {Semantics in Storytelling in Swedish Fiction},
	abstract     = {In this paper, we aim to define foundations and research questions
for future large scale exploration of various types of semantic
relationships in literature, namely Swedish prose fiction. More
specifically, we are interested to get an in-depth understanding of
storytelling in Swedish fiction by analyzing and mining the
narrative discourse in a small sample of such data, focusing on
interpersonal relationships and answering various questions such
as how to recognize and assess gender patterns. Our intention is to
apply our findings into a much larger scale in the near future in
order to obtain useful insights about the social relations,
structures, behavior and everyday life of characters found in
literary works, thus enhancing the use of prose fiction as a source
for research within the humanities and social sciences. Our work
is inspired by the notions of distant reading and macroanalysis, a
relatively new and often contested paradigm of literary research.
In order to achieve our goal we strive for a combination of natural
language processing techniques and simple visualizations that
allow the user to rapidly focus on key areas of interest and provide
the ability to discover latent semantic patterns and structures. 
},
	booktitle    = {Proceedings of the Digital Access to textual Cultural Heritage (DATeCH).},
	author       = {Kokkinakis, Dimitrios and Malm, Mats and Bergenmar, Jenny and Ighe, Ann},
	year         = {2014},
	ISBN         = {978-1-4503-2588-2},
	pages        = {6},
}

@inProceedings{kokkinakis-etal-2014-hfst-209800,
	title        = {HFST-SweNER . A New NER Resource for Swedish},
	abstract     = {Named entity recognition (NER) is a knowledge-intensive information extraction task that is used for recognizing textual mentions of entities that belong to a predefined set of categories, such as locations, organizations and time expressions. NER is a challenging, difficult, yet essential preprocessing technology for many natural language processing applications, and particularly crucial for language understanding. NER has been actively explored in academia and in industry especially during the last years due to the advent of social media data. This paper describes the conversion, modeling and adaptation of a Swedish NER system from a hybrid environment, with integrated functionality from various processing components, to the Helsinki Finite-State Transducer Technology (HFST) platform. This new HFST-based NER (HFST-SweNER) is a full-fledged open source implementation that supports a variety of generic named entity types and consists of multiple, reusable resource layers, e.g., various n-gram-based named entity lists (gazetteers).},
	booktitle    = {Proceedings of the 9th edition of the Language Resources and Evaluation Conference (LREC), Reykjavik 26 - 31 May 2014.},
	author       = {Kokkinakis, Dimitrios and Niemi, Jyrki and hardwick, sam and Lindén, Krister and Borin, Lars},
	year         = {2014},
	ISBN         = {978-2-9517408-8-4},
	pages        = {2537--2543},
}

@inProceedings{kokkinakis-oelke-2012-women-155537,
	title        = {Men, Women and Gods: Distant Reading in Literary Collections - Combining Visual Analytics with Language Technology},
	abstract     = {The volumes of digitized literary collections in various languages increase at a rapid pace and so increases the need to computationally support the analysis of such data. Literature can be studied in a number of different ways and from many different perspectives and text analysis make up a central component of literature studies. If such analysis can be integrated with advanced visual methods and fed back to the daily work of the literature researcher, then it is likely to reveal the presence of useful and nuanced insights into the complex daily lives, ideas and beliefs of the main characters found in many of the literary works. In this paper we describe the combination of robust text analysis with visual analytics and bring a new set of tools to literary analysis. As a show case, we analyzed a small subset (13 novels of a single author) taken from a large literary collection, the Swedish Literature Bank <http://litteraturbanken.se/#!om/inenglish>. The analysis is based upon two levels of inquiry, namely by focusing on mentions of theistic beings (e.g. Gods' names) as well as mentions of persons' names, including their gender and their normalized, linked variant forms, and examining their appearance in sentences, paragraphs and chapters. The case study shows several successful applications of visual analytics methods to various literature problems and demonstrates the advantages of the implementation of visual literature fingerprinting. Our work is inspired by the notion of distant reading or macronalysis for the analyses of literature collections. We start by recognizing all characters in the novels using a mature language technology (named entity recognition) which can be turned into a tool in aid of text analysis in this field. We apply context cues, lists of animacy and gender markers and inspired by the document centered approach and the labelled consistency principle which is a form of on-line learning from documents under processing which looks at unambiguous usages of words or names for assigning annotations in ambiguous words or names. For instance, if in an unambiguous context where there is a strong gender indicator, such as 'Mrs Alexander' the name 'Alexander' is assigned a feminine gender, then subsequent mentions of the same name in the same discourse will be assigned the feminine gender as well unless there is a conflict with another person with the same name. We argue, that the integration of text analysis such as the one briefly outlined and visualization techniques, such as higher resolution pixel-based fingerprinting, could be put to effective use also in literature studies. We also see an opportunity to devise new ways of exploring the large volumes of literary texts being made available through national cultural heritage digitization projects, for instance by exploring the possibility to show several literary texts (novels) at once. We will illustrate some of the applied techniques using several examples from our case study, such as summary plots based on all the characters in these novels as well as fingerprints based on the distribution of characters across the novels.},
	booktitle    = {Proceedings of the Advances in Visual Methods for Linguistics (AVML)},
	author       = {Kokkinakis, Dimitrios and Oelke, Daniela},
	year         = {2012},
	volume       = {Accepted},
}

@inProceedings{kokkinakis-etal-2004-intelligent-33932,
	title        = {Intelligent Building of Language Resources for HLT Applications},
	booktitle    = {Proceedings of the LREC Workshop: Amazing Utility of Parallel and Comparable Corpora. Fourth Language Resources and Evaluation Conference (LREC)},
	author       = {Kokkinakis, Dimitrios and Samiotou, Anna and Kranias, Lambros},
	year         = {2004},
}

@article{kokkinakis-thurin-2007-anonymisation-45193,
	title        = {Anonymisation of Swedish Clinical Data},
	abstract     = {There is a constantly growing demand for exchanging clinical and health-related information electronically. In the era of the Electronic Health Record the release of individual data for research, health care statistics, monitoring of new diagnostic tests and tracking disease outbreak alerts are some of the areas in which the protection of (patient) privacy has become an important concern. In this paper we present a system for automatic anonymisation of Swedish clinical free text, in the form of discharge letters, by applying generic named entity recognition technology.},
	journal      = {Lecture Notes in Computer Science},
	author       = {Kokkinakis, Dimitrios and Thurin, Anders},
	year         = {2007},
	volume       = {4594},
	pages        = {237--241},
}

@article{kokkinakis-thurin-2007-identification-45195,
	title        = {Identification of Entity References in Hospital Discharge Letters},
	abstract     = {In the era of the Electronic Health Record the release of medical narrative textual data for research, for health care statistics, for monitoring of new diagnostic tests and for tracking disease outbreak alerts imposes tough restrictions by various public authority bodies for the protection of (patient) privacy. In this paper we present a system for automatic identification of named entities in Swedish clinical free text, in the form of discharge letters, by applying generic named entity recognition technology with minor adaptations},
	journal      = {Proceedings of the 16th Nordic Conference of Computational Linguistics (NODALIDA)},
	author       = {Kokkinakis, Dimitrios and Thurin, Anders},
	year         = {2007},
}

@inProceedings{kokkinakis-thurin-2008-applying-73972,
	title        = {Applying MeSH® to the (Swedish) Clinical Domain - Evaluation and Lessons learned},
	abstract     = {Medical discharge summaries and clinical notes provide an information rich, nearly unexplored corpus of evidential knowledge that is considered as a potential goldmine for both medical scientists as well as practitioners in the language technology field. The capability to extract the key concepts and their relationships from such data can be of great value for knowledge management tasks such as indexing, data interchange, data aggregation and clinical decision support. The purpose of this work is to get insights into the feasibility of applying the content of a controlled vocabulary, the Medical Subject Headings (MeSH) to a sample of electronic discharge letters (i.e. free text clinical notes). We explore the application of natural language processing (NLP) techniques to the challenge of efficiently detecting the terminology, as encoded in MeSH and we evaluate MeSH in this setting, showing that a lot of work remains to be done in order to increase the coverage of the resource both in terms of its breadth and depth.
},
	booktitle    = {Proceedings of the 6th Scandinavian Health Informatics and the 12th Swedish National Term Conference},
	author       = {Kokkinakis, Dimitrios and Thurin, Anders},
	year         = {2008},
}

@article{kokkinakis-toporowskagronostaj-2006-comparing-34032,
	title        = {Comparing Lay and Professional Language in Cardiovascular Disorders Corpora.},
	abstract     = {This paper reports on a corpus-based, contrastive study of Swedish medical language. It is focused on the vocabulary used in two types of medical textual material: professional portals and web-based consumer sites within the domain of cardiovascular disorders. Linguistic, statistical and quantitatively based readability studies are considered in order to find the typical language-dependent and, possibly, language independent characteristics of the material examined and suggest concrete measures that might bridge the gap in medical vocabulary as used by laypersons/consumers and professionals.
},
	journal      = {WSEAS Transactions on BIOLOGY and BIOMEDICINE},
	author       = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria},
	year         = {2006},
	volume       = {3},
	number       = {6},
	pages        = {429--437},
}

@inProceedings{kokkinakis-toporowskagronostaj-2006-language-33938,
	title        = {Lay Language versus Professional Language within the Cardiovascular Subdomain - a Contrastive Study},
	abstract     = {This paper reports on a corpus-based, contrastive study of Swedish medical language. It is focused on the vocabulary used in two types of medical
 textual material: professional portals and web-based consumer sites within the domain of cardiovascular disorders. Linguistic, statistical and 
quantitatively based readability studies are considered in order to find the typical language-dependent and, possibly, language independent 
characteristics of the material examined and suggest concrete measures that might bridge the gap in medical vocabulary as used by laypersons/consumers 
and professionals. },
	booktitle    = {Proceedings of the 2006 WSEAS Int. Conf. on Cellular & Molecular Biology, Biophysics & Bioengineering},
	author       = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria},
	year         = {2006},
}

@inProceedings{kokkinakis-toporowskagronostaj-2008-medlex+-73976,
	title        = {MEDLEX+: An Integrated Corpus-Lexicon Medical Workbench for Swedish},
	abstract     = {This paper reports on ongoing work on developing a medical corpus-lexicon workbench for Swedish, MedLex+. At the moment the workbench incorporates: (i) an annotated collection of medical texts, 25 million tokens, 50,000 documents, (ii) a number of language processing components, including tools for collocation extraction, compound segmentation and thesaurus-based semantic annotation, and (iii) a lexical database of medical terms (5,000 entries). MedLex+ is a multifunctional lexical resource due to its structural design and content which can be easily queried. The medical workbench is intended to support lexicographers in their work on compiling lexicons and also lexicon users more or less initiated in the medical domain. It can also assist researchers working in the fields of lexical semantics and natural language processing (NLP) with focus on medical language. The linguistically and semantically annotated medical texts in combination with a set of queries turn the corpus into a rich repository of semasiological and onomasiological knowledge about medical terminology and their linguistic, lexical and pragmatic properties. These properties are recorded in the lexical database with a cognitive profile. The MedLex+ workbench seems to offer constructive help in many different lexical tasks. },
	booktitle    = {Proceedings of the 13th EURALEX},
	author       = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria},
	year         = {2008},
}

@inProceedings{kokkinakis-toporowskagronostaj-2010-linking-119441,
	title        = {Linking SweFN++ with Medical Resources, towards a MedFrameNet for Swedish},
	abstract     = {In this pilot study we define and apply a methodology for building an event extraction system for the Swedish scientific medical and clinical language. Our aim is to find and describe linguistic expressions which refer to medical events, such as events related to diseases, symptoms and drug effects. In order to achieve this goal we have initiated actions that aim to extend and refine parts of the ongoing compilation of the Swedish FrameNet++ (SFN++), which, as its English original predecessor, is grounded in Frame Semantics which provides a sound theoretical ground for modeling and linking linguistic structures encountered in general language and in specific domains (after specialization). Using such resource we manually annotate domain texts to be used as training data for automatic event extraction by automated techniques.},
	booktitle    = {Proceedings of the Second Louhi Workshop on Text and Data Mining of Health Documents. A NAACL-HTL Workshop},
	author       = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria},
	year         = {2010},
}

@article{kokkinakis-etal-2007-lexical-45194,
	title        = {Lexical Parameters, Based on Corpus Analysis of English and Swedish Cancer Data, of Relevance for NLG},
	abstract     = {This paper reports on a corpus-based, contrastive study of the Swedish and English medical language in the cancer sub-domain. It is focused on the examination of a number of linguistic parameters differentiating two types of cancer-related textual material, one intended for medical experts and one for laymen. Language-dependent and language independent characteristics of the textual data between the two languages and the two registers are examined and compared. The aim of the work is to gain insights into the differences between lay and expert texts in order to support natural language generation (NLG) systems.},
	journal      = {roceedings of the 16th Nordic Conference of Computational Linguistics (NODALIDA)},
	author       = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria and Hallett, Catalina and Hardcastle, david},
	year         = {2007},
}

@incollection{kokkinakis-etal-2006-bygga-56225,
	title        = {Att bygga en språkbro mellan allmänhet och vårdpersonal - språket i texter om hjärt-kärlsjukdomar},
	booktitle    = {Humanistdag-boken},
	author       = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria and Johansson Kokkinakis, Sofie},
	year         = {2006},
	publisher    = {Göteborgs universitet},
	address      = {Göteborg},
}

@article{kosem-etal-2019-image-275354,
	title        = {The image of the monolingual dictionary across Europe. Results of the European survey of dictionary use and culture},
	abstract     = {The article presents the results of a survey on dictionary use in Europe, focusing on general monolingual dictionaries. The survey is the broadest survey of dictionary use to date, covering close to 10,000 dictionary users (and non-users) in nearly thirty countries. Our survey covers varied user groups, going beyond the students and translators who have tended to dominate such studies thus far. The survey was delivered via an online survey platform, in language versions specific to each target country. It was completed by 9,562 respondents, over 300 respondents per country on average. The survey consisted of the general section, which was translated and presented to all participants, as well as country-specific sections for a subset of 11 countries, which were drafted by collaborators at the national level. The present report covers the general section},
	journal      = {International Journal of Lexicography},
	author       = {Kosem, Iztok and Lew, Robert and Müller-Spitzer, Carolin and Ribeiro Silveira,  Maria and Wolfer ,  Sascha  and Volodina, Elena and Pilán, Ildikó and Sköldberg, Emma and Holmer, Louise and Dorn, Amelie and Gurrutxaga, Antton and Lorentzen, Henrik and Kallas, Jelena and Abel, Andrea and Tiberius, Carole and Partners , Local},
	year         = {2019},
	volume       = {32},
	number       = {1},
	pages        = {92–114},
}

@inProceedings{kageback-etal-2015-neural-217864,
	title        = {Neural context embeddings for automatic discovery of word senses},
	abstract     = {Word sense induction (WSI) is the problem of
automatically building an inventory of senses
for a set of target words using only a text
corpus. We introduce a new method for embedding word instances and their context, for use in WSI. The method, Instance-context embedding (ICE), leverages neural word embeddings, and the correlation statistics they capture, to compute high quality embeddings of word contexts. In WSI, these context embeddings are clustered to find the word senses present in the text. ICE is based on a novel method for combining word embeddings using continuous Skip-gram, based on both se-
mantic and a temporal aspects of context
words. ICE is evaluated both in a new system, and in an extension to a previous system
for WSI. In both cases, we surpass previous
state-of-the-art, on the WSI task of SemEval-2013, which highlights the generality of ICE. Our proposed system achieves a 33% relative improvement.},
	booktitle    = {Proceedings of the 1st Workshop on Vector Space Modeling for Natural Language Processing. Denver, United States},
	author       = {Kågebäck, Mikael and Johansson, Fredrik and Johansson, Richard and Dubhashi, Devdatt},
	year         = {2015},
	pages        = {25--32},
}

@inProceedings{kageback-etal-2014-extractive-210878,
	title        = {Extractive Summarization using Continuous Vector Space Models},
	abstract     = {Automatic summarization can help users extract the most important pieces of information from the vast amount of text digitized into electronic form everyday. Central to automatic summarization is the notion of similarity between sentences in text. In this paper we propose the use of continuous vector representations for semantically aware representations of sentences as a basis for measuring similarity. We evaluate different compositions
for sentence representation on a standard dataset using the ROUGE evaluation measures. Our experiments show that the evaluated methods improve the performance of a state-of-the-art summarization framework and strongly indicate the benefits of continuous word vector representations for automatic summarization.},
	booktitle    = {Proceedings of the 2nd Workshop on Continuous Vector Space Models and their Compositionality (CVSC) EACL, April 26-30, 2014 Gothenburg, Sweden},
	author       = {Kågebäck, Mikael and Mogren, Olof and Tahmasebi, Nina and Dubhashi, Devdatt},
	year         = {2014},
	ISBN         = {978-1-937284-94-7},
	pages        = {31--39},
}

@inProceedings{laakso-etal-2012-swedish-162454,
	title        = {Swedish Test of Intelligibility (STI) – Development of computerized assessment of word and sentence intelligibility and the performance of adult control speakers},
	abstract     = {Without abstract},
	booktitle    = {ICPLA2012},
	author       = {Laakso, Katja and Lindh, Jonas and Hartelius, Lena},
	year         = {2012},
	volume       = {14},
}

@inProceedings{lange-ljunglof-2018-demonstrating-274016,
	title        = {Demonstrating the MUSTE Language Learning Environment},
	abstract     = {We present a language learning application that relies on grammars to model the learning outcome. Based on this concept we can provide a powerful framework for language learning exercises with an intuitive user interface and a high reliability. Currently the application aims to augment existing language classes and support students by improving the learner attitude and the general learning outcome. Extensions beyond that scope are promising and likely to be added in the future.},
	booktitle    = {NLP4CALL 2018, the 7th Workshop on NLP for Computer Assisted Language Learning, Stockholm, 7th November 2018; published as issue 152 of Linköping Electronic Conference Proceedings},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2018},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping},
	ISBN         = {978-91-7685-173-9},
}

@inProceedings{lange-ljunglof-2018-mulle-274014,
	title        = {MULLE: A grammar-based Latin language learning tool to supplement the classroom setting},
	abstract     = {MULLE is a tool for language learning
that focuses on teaching Latin as a foreign
language. It is aimed for easy integration
into the traditional classroom setting
and syllabus, which makes it distinct
from other language learning tools that
provide standalone learning experience. It
uses grammar-based lessons and embraces
methods of gamification to improve the
learner motivation. The main type of exercise
provided by our application is to practice
translation, but it is also possible to
shift the focus to vocabulary or morphology
training.},
	booktitle    = {NLPTEA 2018, the 5th Workshop on Natural Language Processing Techniques for Educational Applications, Melbourne, Australia, 19th July 2018},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2018},
	publisher    = {Association for Computational Linguistics},
	address      = {Melbourne, Australia},
}

@inProceedings{lange-ljunglof-2018-putting-274013,
	title        = {Putting Control into Language Learning},
	abstract     = {Controlled Natural Languages (CNLs) have many applications including document authoring, automatic reasoning on texts and reliable machine translation, but their application is not limited to these areas. We explore a new application area of CNLs, the use of CNLs in computer-assisted language learning. In this paper we present a a web application for language learning using CNLs as well as a detailed description of the properties of the family of CNLs it uses.},
	booktitle    = {CNL 2018, the 6th International Workshop on Controlled Natural Language, Maynooth, Co Kildare, 27-28th August 2018; published as volume 304 of Frontiers in Artificial Intelligence and Applications},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2018},
	publisher    = {IOS Press},
	address      = {Amsterdam},
	ISBN         = {978-1-61499-904-1},
}

@inProceedings{lange-ljunglof-2020-learning-291243,
	title        = {Learning Domain-specific Grammars from a Small Number of Examples},
	abstract     = {In this paper we investigate the problem of grammar inference from a different perspective. The common approach is to try to infer a grammar directly from example sentences, which either requires a large training set or suffers from bad accuracy. We instead view it as a problem of grammar restriction or sub-grammar extraction. We start from a large-scale resource grammar and a small number of examples, and find a sub-grammar that still covers all the examples. To do this we formulate the problem as a constraint satisfaction problem, and use an existing constraint solver to find the optimal grammar. We have made experiments with English, Finnish, German, Swedish and Spanish, which show that 10–20 examples are often sufficient to learn an interesting domain grammar. Possible applications include computer-assisted language learning, domain-specific dialogue systems, computer games, Q/A-systems, and others.},
	booktitle    = {12th International Conference on Agents and Artificial Intelligence - Volume 1: NLPinAI},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2020},
	publisher    = {SciTePress},
	ISBN         = {978-989-758-395-7},
}

@edited_book{larsson-borin-2012-from-167661,
	title        = {From Quantification to Conversation},
	editor       = {Larsson, Staffan and Borin, Lars},
	year         = {2012},
	publisher    = {College Publications},
	address      = {London},
	ISBN         = {978-1-84890-091-2},
}

@edited_book{lendvai-borin-2009-proceedings-91853,
	title        = {Proceedings of the EACL 2009 Workshop on Language Technology and Resources for Cultural Heritage, Social Sciences, Humanities, and Education (LaTeCH -- SHELT&R 2009)},
	editor       = {Lendvai, Piroska and Borin, Lars},
	year         = {2009},
	publisher    = {ACL},
	address      = {Athens},
	ISBN         = {1-932432-21-3},
}

@inProceedings{lenkiewicz-etal-2014-dwan-216695,
	title        = {The DWAN framework: Application of a web annotation framework for the general humanities to the domain of language resources},
	abstract     = {Researchers share large amounts of digital resources, which offer new chances for cooperation. Collaborative annotation systems are meant to support this. Often, these systems are targeted at a specific task or domain, e.g., annotation of a corpus. The DWAN framework for web annotation is generic and can support a wide range of tasks and domains. A key feature of the framework is its support for caching representations of the annotated resource. This allows showing the context of the annotation even if the resource has changed or has been removed. The paper describes the design and implementation of the framework. Use cases provided by researchers are well in line with the key characteristics of the DWAN annotation framework.},
	booktitle    = {LREC 2014, Reykjavik, Iceland; http://lrec2014.lrec-conf.org/en/conference-programme/list-accepted-papers/},
	author       = {Lenkiewicz, Przemyslaw and Shkaravska, Olha and Goosen, Twan and Windhouwer, Menzo and Broeder, Daan and Roth, Stephanie S. and Olsson, Olof},
	year         = {2014},
}

@inProceedings{lindahl-etal-2019-towards-286588,
	title        = {Towards Assessing Argumentation Annotation - A First Step},
	abstract     = {This paper presents a first attempt at using Walton’s argumentation schemes for annotating arguments in Swedish political text and assessing the feasibility of using this particular set of schemes with two linguistically trained annotators. The texts are not pre-annotated with argumentation structure beforehand. The results show that the annotators differ both in number of annotated arguments and selection of the conclusion and premises which make up the arguments. They also differ in their labeling of the schemes, but grouping the schemes increases their agreement. The outcome from this will be used to develop guidelines for future annotations.},
	booktitle    = {Proceedings of the 6th Workshop on Argument Mining, August 1, 2019, Florence, Italy / Benno Stein, Henning Wachsmuth (Editors)},
	author       = {Lindahl, Anna and Borin, Lars and Rouces, Jacobo},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-33-8},
}

@inProceedings{lindh-2002-preliminary-47286,
	title        = {Preliminary Observations on Discontinuities in Two TTS Concatenation Systems.},
	booktitle    = {Proceedings of Fonetik 2002, TMH-QPSR, KTH, Stockholm},
	author       = {Lindh, Jonas},
	year         = {2002},
	volume       = {44(1)},
	pages        = {113--116},
}

@inProceedings{lindh-2004-acoustic-47302,
	title        = {Acoustic and Perceptual Analysis of Discontinuities in Two TTS Concatenation Systems},
	booktitle    = {Proceedings of the XVIIth Swedish Phonetics Conference, Department of Linguistics, Stockholm University},
	author       = {Lindh, Jonas},
	year         = {2004},
}

@inProceedings{lindh-2004-handling-47298,
	title        = {Handling the "Voiceprint" Issue},
	booktitle    = {Proceedings of the XVIIth Swedish Phonetics Conference},
	author       = {Lindh, Jonas},
	year         = {2004},
}

@inProceedings{lindh-2004-preliminary-47289,
	title        = {Preliminary Observations on Speaker Identification in a Closed Set Using Graphic Representations of LTAS},
	booktitle    = {Annual conference of IAFPA, Helsinki 2004},
	author       = {Lindh, Jonas},
	year         = {2004},
}

@inProceedings{lindh-2005-visual-47310,
	title        = {Visual Acoustic vs. Aural Perceptual Speaker Identification in a Closed Set of Disguised Voices},
	booktitle    = {Annual conference of IAFPA, in Marrakech 2005},
	author       = {Lindh, Jonas},
	year         = {2005},
}

@inProceedings{lindh-2005-visual-47308,
	title        = {Visual Acoustic vs. Aural Perceptual Speaker Identification in a Closed Set of Disguised Voices},
	booktitle    = {Proceedings of the XVIIIth Swedish Phonetics Conference, Department of Linguistics, Göteborg University, eds Jonas Lindh & Anders Eriksson},
	author       = {Lindh, Jonas},
	year         = {2005},
}

@inProceedings{lindh-2005-model-47305,
	title        = {A Model-Based Experiment Towards an Emotional Synthesis},
	booktitle    = {Proceedings of the XVIIIth Swedish Phonetics Conference, Department of Linguistics, Göteborg University, eds Jonas Lindh & Anders Eriksson},
	author       = {Lindh, Jonas},
	year         = {2005},
}

@inProceedings{lindh-2006-preliminary-47318,
	title        = {Preliminary F0 Statistics and Forensic Phonetics},
	booktitle    = {Annual conference of IAFPA, Department of Linguistics, Göteborg University, 2006. Eds. Jonas Lindh and Anders Eriksson},
	author       = {Lindh, Jonas},
	year         = {2006},
}

@inProceedings{lindh-2006-case-47316,
	title        = {A Case Study of /r/ in the Västgöta Dialect},
	booktitle    = {Papers from FONETIK 2006, Working Papers, 52, Department of Linguistics and Phonetics, Lund University},
	author       = {Lindh, Jonas},
	year         = {2006},
	volume       = {52},
	pages        = {85--88},
}

@inProceedings{lindh-2006-preliminary-47314,
	title        = {Preliminary Descriptive F0-statistics for Young Male Speakers},
	booktitle    = {Papers from FONETIK 2006, Working Papers, 52, Department of Linguistics and Phonetics, Lund University},
	author       = {Lindh, Jonas},
	year         = {2006},
	volume       = {52},
	pages        = {89--92},
}

@inProceedings{lindh-2006-statistics-99168,
	title        = {F0 Statistics, Robustness and Measures - Implications for Forensic Speaker Identification},
	booktitle    = {Proceedings of The Swedish Language Technology Conference 2006},
	author       = {Lindh, Jonas},
	year         = {2006},
}

@inProceedings{lindh-2007-voxalys-47320,
	title        = {Voxalys- a Pedagogical Praat Plugin for Voice Analysis.},
	booktitle    = {Proceedings of Fonetik 2007, TMH-QPSR, KTH, Stockholm},
	author       = {Lindh, Jonas},
	year         = {2007},
	volume       = {50},
	pages        = {73--77},
}

@inProceedings{lindh-2008-robustness-99174,
	title        = {Robustness of Forced Alignment in a Forensic Context},
	booktitle    = {Proceedings of IAFPA2008, Lausanne, Switzerland},
	author       = {Lindh, Jonas},
	year         = {2008},
}

@inProceedings{lindh-2009-perception-99180,
	title        = {Perception of voice similarity and the results of a voice line-up},
	booktitle    = {The XXIInd Swedish Phonetics Conference, Department of Linguistics, Stockholm University, 2009.},
	author       = {Lindh, Jonas},
	year         = {2009},
	ISBN         = {978-91-633-4892-1},
	pages        = {186--189},
}

@inProceedings{lindh-2009-first-99189,
	title        = {A first step towards a text-independent speaker verification Praat  plug-in using Mistral/Alize tools},
	booktitle    = {The XXIInd Swedish Phonetics Conference, Department of Linguistics, Stockholm University, 2009.},
	author       = {Lindh, Jonas},
	year         = {2009},
	ISBN         = {978-91-633-4892-1},
	pages        = {194--197},
}

@inProceedings{lindh-2009-pick-123922,
	title        = {Pick a Voice among Wolves, Goats and Lambs},
	booktitle    = {Proceedings of the 18th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Cambridge, UK},
	author       = {Lindh, Jonas},
	year         = {2009},
	number       = {18},
}

@inProceedings{lindh-2010-preliminary-123920,
	title        = {Preliminary Formant Data of the Swedia Dialect Database in a Forensic Phonetic Perspective},
	booktitle    = {Proceedings of the 19th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Trier, Germany},
	author       = {Lindh, Jonas},
	year         = {2010},
	number       = {19},
}

@article{lindh-2011-peter-142484,
	title        = {Peter French},
	abstract     = {The Encyclopedia of Applied Linguistics is a ground-breaking resource, spanning the entire field. Truly international in scope, it brings together contributions from the world’s most respected scholars in applied linguistics.
Available online or as a 10-volume print set, this comprehensive print and electronic resource provides an overview of all the key areas in applied linguistics, from language learning and language policy, to qualitative methods in applied linguistics, and technology and language. Comprising over 3.5 million words, across 1,200 entries, it spans key developments and ideas in applied linguistics, historic and emerging areas of research, and includes 250 biographies of prominent figures who have helped shaped this diverse, and ever-growing field.},
	journal      = {The Encyclopedia of Applied Linguistics},
	author       = {Lindh, Jonas},
	year         = {2011},
	pages        = {2},
}

@article{lindh-2011-francis-142483,
	title        = {Francis Nolan},
	abstract     = {The Encyclopedia of Applied Linguistics is a ground-breaking resource, spanning the entire field. Truly international in scope, it brings together contributions from the world’s most respected scholars in applied linguistics.
Available online or as a 10-volume print set, this comprehensive print and electronic resource provides an overview of all the key areas in applied linguistics, from language learning and language policy, to qualitative methods in applied linguistics, and technology and language. Comprising over 3.5 million words, across 1,200 entries, it spans key developments and ideas in applied linguistics, historic and emerging areas of research, and includes 250 biographies of prominent figures who have helped shaped this diverse, and ever-growing field.},
	journal      = {The Encyclopedia of Applied Linguistics},
	author       = {Lindh, Jonas},
	year         = {2011},
	pages        = {2},
}

@inProceedings{lindh-2015-forensic-222514,
	title        = {Forensic speaker comparison using machine and mind},
	booktitle    = {24th Annual Conference of the International Association for Forensic Phonetics and Acoustics, 8 - 10 July 2015, Leiden, Netherlands},
	author       = {Lindh, Jonas},
	year         = {2015},
}

@inProceedings{lindh-2015-forensic-222517,
	title        = {Forensic speaker comparison evaluations},
	booktitle    = {Proceedings of Roundtable in Forensic Linguistics 2015, September 4th- 6th, Mainz, Germany},
	author       = {Lindh, Jonas},
	year         = {2015},
}

@book{lindh-2017-forensic-261214,
	title        = {Forensic comparison of voices, speech and speakers : tools and methods in forensic phonetics},
	abstract     = {This thesis has three main objectives. The first objective (A) includes Study I, which investigates the parameter fundamental frequency (F0) and its robustness in different acoustic contexts by using different measures. The outcome concludes that using the alternative baseline as a measure will diminish the effect of low-quality recordings or varying speaking liveliness. However, both creaky voice and raised vocal effort induce intra-variation problems that are yet to be solved.
The second objective (B) includes study II, III and IV. Study II investigates the differences between the results from an ear witness line-up experiment and the pairwise perceptual judgments of voice similarity performed by a large group of listeners. The study shows that humans seem to be much more focused on similarities of speech style than features connected to voice quality, even when recordings are played backwards. Study III investigates the differences between an automatic voice comparison system and humans’ perceptual judgments of voice similarity. The experiments’ results show that it is possible to see a correlation between how speakers were judged as more or less different using multidimensional scaling of similarity ranks compared to both the automatic system and the listeners. However, there are also differences due to the fact that human listeners include information about speech style and have difficulties weighting the parameters, i.e. ignoring them when they are contradictory. Study IV successfully investigates a new functional method for how to convert the perceptual similarity judgments made by humans and then compare those to the automatic system results within the likelihood ratio framework. It was discovered that the automatic system outperformed the naïve human listeners in this task (using a very small dataset).
The third objective (C) includes study V. Study V investigates several statistical modelling techniques to calculate relevant likelihood ratios using simulations based on existing reference data in an authentic forensic case of a disputed utterance. The study presents several problems with modelling small datasets and develops methods to take into account the lack of data within the likelihood ratio framework.
In summary, the thesis contains a larger historical background to forensic speaker comparison to guide the reader into the current research situation within forensic phonetics. The work further seeks to build a bridge between forensic phonetics and automatic voice recognition. Practical casework implications have been considered throughout the work on the basis of own experience as a forensic caseworker and through collaborative interaction with other parties working in the field, both in research and in forensic practice and law enforcement. Since 2005, the author has been involved in over 400 forensic cases and given testimony in several countries.},
	author       = {Lindh, Jonas},
	year         = {2017},
	publisher    = {Department of Philosophy, Linguistics, and Theory of Science, University of Gothenburg},
	address      = {Gothenburg},
	ISBN         = {978-91-629-0141-7},
}

@inProceedings{lindh-eriksson-2007-robustness-47321,
	title        = {Robustness of Long Time Measures of Fundamental Frequency},
	abstract     = {In many speech technology based applications as well as in forensic phonetics it is desirable to obtain reliable estimates of a speaker’s fundamental frequency. We would like the measures to be accurate and reliable enough in order to be used meaningfully as a parameter in speaker identification or verification. Under optimal conditions such as when high quality studio recordings and normal speech styles are used this is often possible. In real life applications such conditions are the exception rather than the rule. The study presented here reports the result from an investigation where different measures were tested on speech material that varied with respect to speaking style, vocal effort and recording quality. Based on the results from these tests we would like to suggest a measure we call the alternative fundamental frequency baseline as the measure that is most robust with respect to the above-mentioned sources of variation. Index Terms: speaker recognition, speaker identification, fundamental frequency, F0.},
	booktitle    = {In Proceedings of Interspeech 2007, Antwerp, Belgium.},
	author       = {Lindh, Jonas and Eriksson, Anders},
	year         = {2007},
	pages        = {2025–2028},
}

@article{lindh-eriksson-2009-swedat-118616,
	title        = {The SweDat Project and Swedia Database for Phonetic and Acoustic Research},
	abstract     = {The project described here may be seen as a continuation of an earlier project, SweDia 2000, aimed at transforming the database collected in that project to a full-fledged e-science database. The database consists of recordings of Swedish dialects from 107 locations in Sweden and Swedish speaking parts of Finland. The goal of the present project is to make the material searchable in a flexible and simple way to make it available to a much wider sector of the research community than is the case at present. The database will be accessible over the Internet via user-friendly interfaces specifically designed for this type of data. Other more specialized research interfaces will also be designed to facilitate phonetic acoustic research and orientation of the database.},
	journal      = {Proceeding   E-SCIENCE '09 Proceedings of the 2009 Fifth IEEE International Conference on e-Science},
	author       = {Lindh, Jonas and Eriksson, Anders},
	year         = {2009},
	pages        = {45--49},
}

@inProceedings{lindh-eriksson-2010-voice-122326,
	title        = {Voice similarity — a comparison between judgements by human listeners and automatic voice comparison},
	abstract     = {Comparison between the way human listeners judge voice similarity and how state-of-the art GMM-UBM systems for voice recognition compare voices is a little explored area of research. In this study groups of informants judged the similarity between voice samples taken from a set of fairly similar male voices that had previously been used in a voice line-up experiment. The result from the listening tests was then compared to the scores from a UBM-GMM automatic voice
comparison system, built on the Mistral LIA_RAL open source platform. The results show a correlation between scores obtained from the automatic system and the judgements by the listeners. Listeners are, however, more sensitive to language dependent parameters or idiosyncratic phonetic features such as speaking tempo, while the system only bases its likelihood ratios on spectral similarities, i.e. timbre.},
	booktitle    = {Proceedings from FONETIK 2010, Working Papers},
	author       = {Lindh, Jonas and Eriksson, Anders},
	year         = {2010},
	volume       = {54},
	pages        = {63--69},
}

@inProceedings{lindh-etal-2010-methodological-123919,
	title        = {Methodological Issues in the Presentation and Evaluation of Speech Evidence in Sweden},
	booktitle    = {Proceedings of the 19th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Trier, Germany},
	author       = {Lindh, Jonas and Eriksson, Anders and Nelhans, Gustaf},
	year         = {2010},
	number       = {19},
}

@inProceedings{lindh-morrison-2011-humans-146100,
	title        = {Humans versus machine: forensic voice comparison on a small database of Swedish voice recordings},
	abstract     = {A procedure for comparing the performance of humans and machines on speaker recognition and on forensic voice comparison is proposed and demonstrated. The procedure is consistent with the new paradigm for forensic-comparison science (use of the likelihood-ratio framework and testing of the validity and reliability of the results). The use of the procedure is demonstrated using a small database of Swedish voice recordings.},
	booktitle    = {Proceedings of ICPhS2011},
	author       = {Lindh, Jonas and Morrison, Geoffrey Stewart},
	year         = {2011},
	volume       = {17},
	pages        = {4},
}

@inProceedings{lindh-etal-2012-calculating-162456,
	title        = {Calculating the reliability of a likelihood ratio from a disputed utterance},
	booktitle    = {Proceedings of IAFPA2012},
	author       = {Lindh, Jonas and Ochoa, Felipe and Morrison, Geoffrey Stewart},
	year         = {2012},
	volume       = {21},
}

@inProceedings{lindh-akesson-2013-pilot-188837,
	title        = {A pilot study on the effect of different phonetic acoustic input to a GMM - UBM system for voice comparison},
	booktitle    = {22nd conference of the International Association for Forensic Phonetics and Acoustics (IAFPA). July 21st-24th, 2013, Tampa, Florida, USA},
	author       = {Lindh, Jonas and Åkesson, Joel},
	year         = {2013},
}

@inProceedings{lindh-akesson-2014-effect-218075,
	title        = {Effect of the Double-Filtering effect on Automatic Voice Comparison},
	abstract     = {In forensic casework today it is not uncommon to receive material recorded with mobile phones or other handheld recording devices. From experience we know most people do not treat recordings with as much care as a person well versed in audio technology. Especially given the varying circumstances under which the material can be recorded. Thus it is important we learn more about what sort of acoustic effects take place under particular conditions and how these effects can influence Automatic Voice Comparison (AVC). The current study aims at evaluating the effects of recording material consisting of what could be described as ‘doublefiltered’ sound, henceforth referred to as DF, e.g. when a phone call is recorded using a handheld recorder placed in the vicinity of the mobile device. This filtering effect constitutes sound transmitted via GSM communication (1st filter) which then passes an indeterminable distance through the air before being captured by another recording device, such as a mobile phone or handheld recorder’s microphone (2nd filter). This effect
affects the energy in the signal. The energy decreases in both the low and the high frequencies, while the middle frequencies are boosted.
In this study we have used a database consisting of 150 female speakers of Swedish, all students of speech and language pathology. The recordings were made in a sound treated recording booth using a setup of one computer equipped with an internal MAudio soundcard and a high quality headset microphone. Each recording consists of solicited spontaneous speech together with read speech material (Swedish standard reading passage called ‘Ett svårt fall’). Each speaker is informed and encouraged to finish the task at their own pace. Mean duration of the full recording among the speakers was 69.3 seconds (std 16 seconds).},
	booktitle    = {Proceedings of IAFPA 2014. International Association for Forensic Phonetics and Acoustics Annual Conference 31 August - 3 September 2014},
	author       = {Lindh, Jonas and Åkesson, Joel},
	year         = {2014},
	pages        = {2},
}

@inProceedings{lindh-akesson-2016-evaluation-242811,
	title        = {Evaluation of Software ‘Error checks’ on the SweEval2016 Corpus for Forensic Speaker Comparison},
	booktitle    = {Proceedings of IAFPA25. 25th Annual Conference of the International Association for Forensic Phonetics and Acoustics. York, UK 24th – 27th July 2016},
	author       = {Lindh, Jonas and Åkesson, Joel},
	year         = {2016},
	pages        = {57--58},
}

@inProceedings{lindh-etal-2016-comparison-242808,
	title        = {Comparison of Perceptual and ASR Results on the SweEval2016 Corpus},
	booktitle    = {Proceedings of IAFPA25. 25th Annual Conference of the International Association for Forensic Phonetics and Acoustics. York, UK  24th – 27th July 2016.},
	author       = {Lindh, Jonas and Åkesson, Joel and Sundqvist, Maria},
	year         = {2016},
	pages        = {110--111},
}

@inProceedings{linz-etal-2019-temporal-279131,
	title        = {Temporal Analysis of Semantic Verbal Fluency Tasks in Persons with Subjective and Mild Cognitive Impairment.},
	abstract     = {The Semantic Verbal Fluency (SVF) task is a classical neuropsychological assessment where persons are asked to produce words belonging to a semantic category (e.g., animals) in a given time. This paper introduces a novel method of temporal analysis for SVF tasks utilizing time intervals and applies it to a corpus of elderly Swedish subjects (mild cognitive impairment, subjective cognitive impairment and healthy controls). A general decline in word count and lexical frequency over the course of the task is revealed, as well as an increase in word transition times. Persons with subjective cognitive impairment had a higher word count during the last intervals, but produced words of the same lexical frequencies. Persons with MCI had a steeper decline in both word count and lexical frequencies during the third interval. Additional correlations with neuropsychological scores suggest these findings are linked to a person’s overall vocabulary size and processing speed, respectively. Classification results improved when adding the novel features (AUC = 0.72), supporting their diagnostic value.},
	booktitle    = {Sixth Workshop on Computational Linguistics and Clinical Psychology: Reconciling Outcomes. Minneapolis, Minnesota,  USA,  June 6, 2019 / Kate Niederhoffer, Kristy Hollingshead, Philip Resnik, Rebecca Resnik, Kate Loveys (Editors)},
	author       = {Linz, Nicklas and Lundholm Fors, Kristina and Lindsay, Hali and Eckerström, Marie and Alexandersson, Jan and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = {Association for Computational Linguistics },
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-948087-95-7},
}

@inProceedings{ljunglof-2009-trik-99886,
	title        = {TRIK: en talande och ritande robot för barn med kommunikativa funktionshinder},
	booktitle    = {Presentation på ID-dagarna, 7–9 oktober 2009, Stockholm},
	author       = {Ljunglöf, Peter},
	year         = {2009},
}

@inProceedings{ljunglof-2009-dialogue-95890,
	title        = {Dialogue Management as Interactive Tree Building},
	abstract     = {We introduce a new dialogue model and a formalism for limited-domain dialogue systems, which works by interactively building dialogue trees. The model borrows its fundamental ideas from type theoretical grammars and Dynamic Syntax. The resulting dialogue theory is a simple and light-weight formalism, which is still capable of advanced dialogue behaviour.},
	booktitle    = {DiaHolmia'09, 13th Workshop on the Semantics and Pragmatics of Dialogue},
	author       = {Ljunglöf, Peter},
	year         = {2009},
}

@inProceedings{ljunglof-2009-trindikit-99883,
	title        = {trindikit.py: An open-source Python library for developing ISU-based dialogue systems},
	abstract     = {TrindiKit is one of the main tools for developing ISU-based dialogue systems, but it is implemented in a non-free dialect of the programming language Prolog. Therefore we have translated the TrindiKit  toolkit into an open-source Python package. We have tried to remain close to the original TrindiKit formulation, while making the most of Python classes and objects.},
	booktitle    = {IWSDS'09, 1st International Workshop on Spoken Dialogue Systems Technology},
	author       = {Ljunglöf, Peter},
	year         = {2009},
}

@inProceedings{ljunglof-2010-grasp-130137,
	title        = {GRASP: Grammar-based Language Learning},
	abstract     = {We are developing a pedagogical tool to support language learning and training for children with communicative disabilities. The system has a graphical interface, where the user can move, replace, add, and in other ways modify, words or phrases. The system keeps the sentence grammatical, by automatically rearranging the words and changing inflection, if necessary. In this way we hope that the system stimulates the child to explore the possibilities of language.},
	booktitle    = {SLTC-2010, 3rd Swedish Language Technology Conference},
	author       = {Ljunglöf, Peter},
	year         = {2010},
}

@inProceedings{ljunglof-2010-trik-130134,
	title        = {TRIK: A Talking and Drawing Robot for Children with Communication Disabilities},
	abstract     = {In this project we have developed and evaluated a setup involving a touch-screen computer with a dynamic screen software, and a drawing robot, which can communicate with each other via spoken language. The purpose is to help children with severe communication disabilities to learn language, language use and cooperation, in a playful and inspiring way.  

The communication board speaks and the robot is able to understand and talk back. This encourages the child to use language and learn to cooperate to reach a common goal, which in this case is to get the robot to draw figures on a paper.  

The robot has been tested on three children, two with cerebral palsy and one with autism spectrum disorder. During this session we present the preliminary results.},
	booktitle    = {ISAAC-2010, 14th Biennial Conference for Augmentative and Alternative Communication},
	author       = {Ljunglöf, Peter},
	year         = {2010},
}

@inProceedings{ljunglof-kjellberg-2018-interactive-274247,
	title        = {Interactive correction of speech recognition errors: implementation and evaluation for English and Swedish},
	booktitle    = {SLTC 2018, the 7th Swedish Language Technology Conference, Stockholm, 7-9th November 2018},
	author       = {Ljunglöf, Peter and Kjellberg, J. Magnus},
	year         = {2018},
}

@inProceedings{ljunglof-etal-2009-trik-91892,
	title        = {TRIK: A talking and drawing robot for children with communication disabilities},
	abstract     = {This paper describes an ongoing project where we develop and evaluate setup involving a communication board (for manual sign communication) and a drawing robot, which can communicate with each other via spoken language. The purpose is to help children with severe communication disabilities to learn language, language use and cooperation, in a playful and inspiring way. The communication board speaks and the robot is able to understand and talk back. This encourages the child to use the language and learn to cooperate to reach a common goal, which in this case is to get the robot to draw figures on a paper.},
	booktitle    = {Proceedings of the 17th Nordic Conference of Computational Linguistics NODALIDA 2009},
	author       = {Ljunglöf, Peter and Larsson, Staffan and Thunberg, Gunilla and Mühlenbock, Katarina},
	year         = {2009},
	volume       = {4},
}

@inProceedings{ljunglof-olsson-2009-trik-99885,
	title        = {TRIK: en talande och ritande robot för barn med kommunikativa funktionshinder},
	booktitle    = {Presentation vid 8:e Västsvenska Kommunikationskarnevalen, 1–2 juni 2009},
	author       = {Ljunglöf, Peter and Olsson, Maria},
	year         = {2009},
}

@incollection{ljunglof-wiren-2010-syntactic-99884,
	title        = {Syntactic parsing},
	abstract     = {This chapter presents basic techniques for grammar-driven natural language parsing, that is, analysing a string of words (typically a sentence) to determine its structural description according to a formal grammar. Basic parsing concepts are explained after which a number of well-known parsing techniques are described.},
	booktitle    = {Handbook of Natural Language Processing, 2nd edition},
	author       = {Ljunglöf, Peter and Wirén, Mats},
	year         = {2010},
	publisher    = {CRC Press, Taylor and Francis},
	ISBN         = {978-1420085921},
}

@techreport{ljunglof-etal-2019-assessing-281222,
	title        = {Assessing the quality of Språkbanken’s annotations},
	abstract     = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser.
Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. },
	author       = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars},
	year         = {2019},
}

@inProceedings{lundholmfors-etal-2019-reading-284036,
	title        = {Reading and mild cognitive impairment},
	abstract     = {In the present study, we investigated the discriminatory power of eye-tracking features in distinguishing between individuals with mild cognitive impairment (MCI) and healthy controls (HC). The eye movements of the study participants were recorded at two different time points, 18 months apart. Using a machine learning approach with leave-one-out cross-validation, we were able to discriminate between the groups with 73.6 AUC. However, somewhat surprisingly the classification was less successful using data from the second recording session, which might be attributed to the non-static nature of cognitive status. Still, the outcome suggests that eye-tracking measures can be exploited as useful markers of MCI.
},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Lundholm Fors, Kristina and Antonsson, Malin and Kokkinakis, Dimitrios and Fraser, Kathleen},
	year         = {2019},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{lundholmfors-breitholtz-2016-mocking-240344,
	title        = {Are you mocking me or are you laughing with me?},
	booktitle    = { SEMDIAL 2016, JerSem, Proceedings of the 20th Workshop on the Semantics and Pragmatics of Dialogue, 16-18 July 2016 Rutgers, New Brunswick, NJ, USA /  Julie Hunter, Mandy Simons, and Matthew Stone (eds.)},
	author       = {Lundholm Fors, Kristina and Breitholtz, Ellen},
	year         = {2016},
}

@inProceedings{lundholmfors-etal-2018-automated-263790,
	title        = {Automated Syntactic Analysis of Language Abilities in Persons with Mild and Subjective Cognitive Impairment},
	abstract     = {In this work we analyze the syntactic complexity of transcribed picture descriptions using a variety of automated syntactic features, and investigate the features’ predictive power in classifying narratives from people with subjective and mild cognitive impairment and healthy controls. Our results indicate that while there are no statistically significant differences, syntactic features can still be moderately successful at distinguishing the participant groups when used in a machine learning framework.},
	booktitle    = {Building continents of knowledge in oceans of data : the future of co-created eHealth: proceedings of MIE2018, 24-26 April 2018, Gothenburg, Sweden},
	editor       = {Adrien Ugon and Daniel Karlsson and Gunnar O. Klein and Anne Moen.},
	author       = {Lundholm Fors, Kristina and Fraser, Kathleen and Kokkinakis, Dimitrios},
	year         = {2018},
	publisher    = {IOS Press},
	address      = {Amsterdam},
	ISBN         = {978-1-61499-851-8},
}

@inProceedings{lundholmfors-etal-2018-voice-264400,
	title        = {Eye-voice span in adults with mild cognitive impairment (MCI) and healthy controls. },
	abstract     = {Objectives: This study is part of a larger project focused on developing new techniques for identification of early linguistic and extra-linguistic signs of cognitive impairment, with the overall goal of identifying dementia in the preclinical stage. In a previous study, we found that eye movements during reading can be used to distinguish between subjects with mild cognitive impairment (MCI) and healthy controls with up to 86% accuracy. In this study, we are investigating the process of reading aloud, by exploring the eye-voice span in subjects with and without cognitive impairment. The aim of the study is to identify differences in the reading processes and evaluate whether these differences can be used to discriminate between the two groups.
Methods: The eye-voice span is a measurement of the temporal and spatial organization between the eye and the voice, and is affected by for example working memory and automaticity, but also by the familiarity and length of words. In previous work, differences between eye movements when reading in healthy controls and subjects with cognitive impairments have been identified, and it has been shown that subjects with Alzheimer’s disease show impairments when reading aloud, specifically with regards to speech and articulation rate.
Results: We present a quantitative and qualitative analysis of the reading process in the subjects, focusing both on general measures of eye-voice span, but also specifically on instances of hesitation and mistakes in the speech, and the correlated eye movements.
Conclusions/Take home message: Early detection of dementia is important for a number of reasons, such as giving the person access to interventions and medications, and allowing the individual and families time to prepare. By expanding the knowledge about reading processes in subjects with MCI, we are adding to the potential of using reading analysis as an avenue of detecting early signs of dementia.},
	booktitle    = {Book of Abstracts 10th CPLOL Congress 10-12 May 2018, Cascais, Portugal / editor :  Trinite, Baiba },
	author       = {Lundholm Fors, Kristina and Fraser, Kathleen and Kokkinakis, Dimitrios},
	year         = {2018},
}

@inProceedings{lyngfelt-etal-2014-svenskt-208457,
	title        = {Ett svenskt konstruktikon. Grammatik möter lexikon},
	booktitle    = {Svenskans beskrivning : Förhandlingar vid Trettiotredje sammankomsten för svenskans beskrivning. Helsingfors den 15–17 maj 2013},
	author       = {Lyngfelt, Benjamin and Borin, Lars and Bäckström, Linnéa and Forsberg, Markus and Olsson, Leif-Jöran and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia and Uppström, Jonatan},
	year         = {2014},
	volume       = {33},
	ISBN         = {978-951-51-0120-4},
	pages        = {268--279},
}

@inProceedings{lyngfelt-etal-2012-adding-163582,
	title        = {Adding a constructicon to the Swedish resource network of Språkbanken},
	abstract     = {This paper presents the integrated Swedish resource network of Språkbanken in general, and its latest addition – a constructicon – in particular. The constructicon, which is still in its early stages, is a collection of (partially) schematic multi-word units, constructions, developed as an addition to the Swedish FrameNet (SweFN). SweFN and the constructicon are integrated with other parts of Språkbanken, both lexical resources and corpora, through the lexical resource SALDO. In most respects, the constructicon is modeled on its English counterpart in Berkeley, and, thus, following the FrameNet format. The most striking differencies are the inclusion of so-called collostructional elements and the treatment of semantic roles, which are defined globally instead of locally as in FrameNet. Incorporating subprojects such as developing methods for automatic identification of constructions in authentic text on the one hand, and accounting for constructions problematic for L2 acquisition on the other, the approach is highly cross-disciplinary in nature, combining various theoretical linguistic perspectives on construction grammar with language technology, lexicography, and L2 research.},
	booktitle    = {11th Conference on Natural Language Processing (KONVENS) Proceedings},
	author       = {Lyngfelt, Benjamin and Borin, Lars and Forsberg, Markus and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia},
	year         = {2012},
	ISBN         = {3-85027-005-X},
	pages        = {452--461},
}

@edited_book{lyngfelt-etal-2018-constructicography-269082,
	title        = {Constructicography: Constructicon development across languages},
	abstract     = {In constructionist theory, a constructicon is an inventory of constructions making up the full set of linguistic units in a language. In applied practice, it is a set of construction descriptions – a “dictionary of constructions”. The development of constructicons in the latter sense typically means combining principles of both construction grammar and lexicography, and is probably best characterized as a blend between the two traditions. We call this blend constructicography.

The present volume is a comprehensive introduction to the emerging field of constructicography. After a general introduction follow six chapters presenting constructicon projects for English, German, Japanese, Brazilian Portuguese, Russian, and Swedish, respectively, often in relation to a framenet of the language. In addition, there is a chapter addressing the interplay between linguistics and language technology in constructicon development, and a final chapter exploring the prospects for interlingual constructicography.

This is the first major publication devoted to constructicon development and it should be particularly relevant for those interested in construction grammar, frame semantics, lexicography, the relation between grammar and lexicon, or linguistically informed language technology.
},
	editor       = {Lyngfelt, Benjamin and Borin, Lars and Ohara, Kyoko and Torrent, Tiago Timponi},
	year         = {2018},
	publisher    = {John Benjamins},
	address      = {Amsterdam},
	ISBN         = {9789027263865},
}

@incollection{lyngfelt-etal-2018-constructicography-269085,
	title        = {Constructicography at work: Theory meets practice in the Swedish constructicon},
	abstract     = {This chapter addresses central topics in constructicography from the viewpoint of the Swedish constructicon project (SweCcn), focusing on practical constructicon development. The full process of construction description is described and discussed, from selection via corpus analysis to finished constructicon entry and beyond, towards structuring the set of entries into a network. Particular
attention is given to the description format and the treatment of constructional variation. A main theme in the chapter is the interdependence and alignment of SweCcn and related resources, on the one hand in the local context, notably the infrastructure of Språkbanken (the Swedish language bank), and on the other hand with respect to corresponding resources for other languages. Of key concern is the relation to FrameNet, both the Swedish and other framenets, and a major section is devoted to conditions for linking constructions and frames.},
	booktitle    = {Constructicography: Constructicon development across languages},
	editor       = {Benjamin Lyngfelt and Lars Borin and Kyoko Ohara and Tiago Timponi Torrent},
	author       = {Lyngfelt, Benjamin and Bäckström, Linnéa and Borin, Lars and Ehrlemark, Anna and Rydstedt, Rudolf},
	year         = {2018},
	publisher    = {John Benjamins},
	address      = {Amsterdam},
	ISBN         = {9789027263865},
	pages        = {41--106},
}

@techreport{lyngfelt-forsberg-2012-svenskt-158226,
	title        = {Ett svenskt konstruktikon. Utgångspunkter och preliminära ramar},
	author       = {Lyngfelt, Benjamin and Forsberg, Markus},
	year         = {2012},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@inProceedings{malm-etal-2018-uneek-267351,
	title        = {Uneek: a Web Tool for Comparative Analysis of Annotated Texts},
	abstract     = {In this paper, we present Uneek, a web based linguistic tool that performs set operations on raw or annotated texts. The tool may be used for automatic distributional analysis, and for disambiguating polysemy with a method that we refer to as semi-automatic uniqueness differentiation (SUDi). Uneek outputs the intersection and differences between their listed attributes, e.g. POS, dependencies, word forms, frame elements. This makes it an ideal supplement to methods for lumping or splitting in frame development processes. In order to make some of Uneek’s functions more clear, we employ SUDi on a small data set containing the polysemous verb "bake". As of now, Uneek may only run two files at a time, but there are plans to develop the tool so that it may simultaneously operate on multiple files. Finally, we relate the developmental plans for added functionality, to how such functions may support FrameNet work in the future.},
	booktitle    = {Proceedings of the LREC 2018 Workshop International FrameNetWorkshop 2018: Multilingual Framenets and Constructicons, 7-12 May 2018, Miyazaki (Japan)  / [ed] Tiago Timponi Torrent, Lars Borin & Collin F. Baker, 2018},
	author       = {Malm, Per and Ahlberg, Malin and Rosén, Dan},
	year         = {2018},
	ISBN         = {979-10-95546-04-7},
}

@inProceedings{malm-etal-2018-lingfn-267404,
	title        = {LingFN: Towards a framenet for the linguistics domain},
	abstract     = {Framenets and frame semantics have proved useful for a number of natural language processing (NLP) tasks. However, in this connection framenets have often been criticized for limited coverage. A proposed reasonable-effort solution to this problem is to develop domain-specific (sublanguage) framenets to complement the corresponding general-language framenets for particular NLP tasks, and in the literature we find such initiatives covering, e.g., medicine, soccer, and tourism. In this paper, we report on our experiments and first results on building a framenet to cover the terms and concepts encountered in descriptive linguistic grammars. A contextual statistics based approach is used to judge the polysemous nature of domain-specific terms, and to design new domain-specific frames. The work is part of a more extensive research undertaking where we are developing NLP methodologies for automatic extraction of linguistic information from traditional linguistic descriptions to build typological databases, which otherwise are populated using a labor intensive
manual process.},
	booktitle    = {Proceedings : LREC 2018 Workshop, International FrameNet Workshop 2018. Multilingual Framenets and Constructicons, May 12, 2018, Miyazaki, Japan / Edited by Tiago Timponi Torrent, Lars Borin and Collin F. Baker},
	author       = {Malm, Per and Virk, Shafqat and Borin, Lars and Saxena, Anju},
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-04-7},
}

@edited_book{malmgren-etal-2011-lexins-174145,
	title        = {Lexins svenska lexikon (4 uppl.)},
	editor       = {Malmgren, Sven-Göran and Berg, Daniel and Berg, Sture and Hult, Ann-Kristin and Holmer, Louise and Sjögreen, Christian and Sköldberg, Emma and Toporowska Gronostaj, Maria},
	year         = {2011},
	publisher    = {Internetpublikation},
	address      = {Stockholm},
}

@article{malmgren-toporowskagronostaj-2009-valensbeskrivning-109243,
	title        = {Valensbeskrivning i svenska ordböcker — och några andra},
	journal      = {LexicoNordica},
	author       = {Malmgren, Sven-Göran and Toporowska Gronostaj, Maria},
	year         = {2009},
	volume       = {2009},
	number       = {16},
	pages        = {181--196},
}

@book{marinov-2008-dependency-88750,
	title        = {Dependency-Based Syntactic Analysis of Bulgarian},
	author       = {Marinov, Svetoslav},
	year         = {2008},
	ISBN         = {978-91-977196-2-9},
}

@inProceedings{marko-etal-2006-towards-40540,
	title        = {Towards a multilingual medical lexicon},
	booktitle    = {Proceedings of the American Medical Informatics Association Symposium (AMIA '06)},
	author       = {Markó, Kornél and Baud, Robert and Zweigenbaum, Pierre and Borin, Lars and Merkel, Magnus and Schulz, Stefan},
	year         = {2006},
	pages        = {534--538},
}

@inProceedings{marko-etal-2006-cross-34049,
	title        = {Cross-Lingual Alignment of Medical Lexicons},
	abstract     = {We present an approach for the creation of a multilingual medical dictionary for the biomedical domain. In a
first step, available monolingual lexical resources are compiled into a common interchange format. Secondly,
according to a linking format deciced by the authors, the cross-lingual mappings of lexical entries are added. We
show how these mappings can be generated using a morpho-semantic term normalization engine, which captures
intra- as well as interlingual synonymy relationships on the level of subwords.},
	booktitle    = {Language Resources and Evaluation },
	author       = {Marko, Kornel and Baud, Robert and Zweigenbaum, Pierre and Merkel, Magnus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios and Schulz, Stefan},
	year         = {2006},
	volume       = {2006},
	pages        = {5--8},
}

@inProceedings{matsson-etal-2019-imagettr-284011,
	title        = {ImageTTR: Grounding Type Theory with Records in Image Classification for Visual Question Answering},
	abstract     = {We present ImageTTR, an extension to the Python implementation of Type Theory with Records (pyTTR) which connects formal record type representation with image classifiers implemented as deep neural networks. The Type Theory with Records framework serves as a knowledge representation system for natural language the representations of which are grounded in perceptual information of neural networks. We demonstrate the benefits of this symbolic and data-driven hybrid approach on the task of visual question answering.},
	booktitle    = {Proceedings of the IWCS 2019 Workshop on Computing Semantics with Types, Frames and Related Structures, May 24, 2019, Gothenburg, Sweden / Rainer Osswald, Christian Retoré, Peter Sutton (Editors)},
	author       = {Matsson, Arild and Dobnik, Simon and Larsson, Staffan},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-950737-25-3},
}

@inProceedings{megyesi-etal-2018-learner-275359,
	title        = {Learner Corpus Anonymization in the Age of GDPR: Insights from the Creation of a Learner Corpus of Swedish},
	abstract     = {This paper reports on the status of learner corpus anonymization for the ongoing research infrastructure project SweLL. The main project aim is to deliver and make available for research a well-annotated corpus of essays written by second language (L2) learners of Swedish. As the practice shows, annotation of learner texts is a sensitive process demanding a lot of compromises between ethical and legal demands on the one hand, and research and technical demands, on the other. Below, is a concise description of the current status of pseudonymization of language learner data to ensure anonymity of the learners, with numerous examples of the above-mentioned compromises.},
	booktitle    = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018) at SLTC, Stockholm, 7th November 2018},
	editor       = {Ildikó Pilán and Elena Volodina and David Alfter and Lars Borin},
	author       = {Megyesi, Beata and Granstedt, Lena and  Johansson, Sofia and Prentice, Julia and Rosén, Dan and Schenström, Carl-Johan and Sundberg, Gunlög and  Wirén , Mats and Volodina, Elena},
	year         = {2018},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7685-173-9},
}

@inProceedings{moradi-etal-2014-graph-197533,
	title        = {A Graph-Based Analysis of Medical Queries of a Swedish Health Care Portal},
	abstract     = {Today web portals play an increasingly important role in health care allowing information seekers to learn about diseases and treatments, and to administrate their care. Therefore, it is important that the portals are able to support this process as well as possible. In this paper, we
study the search logs of a public Swedish health portal to address the questions if health information seeking differs from other types of Internet search and if there is a potential for utilizing network analysis methods in combination with semantic annotation to gain insights into search behaviors. Using a semantic-based method and a graph-based analysis of word cooccurrences in queries, we show there is an overlap among the results indicating a potential role of these types of methods to gain insights and facilitate improved information search. In addition we show that samples, windows of a month, of search logs may be sufficient to obtain similar results as using larger windows. We also show that medical queries share the same
structural properties found for other types of information searches, thereby indicating an ability to reuse existing analysis methods for this type of search data.},
	booktitle    = {The Fifth International Workshop on Health Text Mining and Information Analysis (Louhi)},
	author       = {Moradi, Farnaz and Eklund, Ann-Marie and Kokkinakis, Dimitrios and Olovsson, Tomas and Tsigas, Philippas},
	year         = {2014},
	ISBN         = {978-1-937284-90-9},
	pages        = {2--10},
}

@article{morrison-etal-2014-likelihood-188784,
	title        = {Likelihood ratio calculation for a disputed-utterance analysis with limited available data},
	abstract     = {We present a disputed-utterance analysis using relevant data, quantitative measurements and statistical models to calculate likelihood ratios. The acoustic data were taken from an actual forensic case in which the amount of data available to train the statistical models was small and the data point from the disputed word was far out on the tail of one of the modelled distributions. A procedure based on single multivariate Gaussian models for each hypothesis led to an unrealistically high likelihood ratio value with extremely poor reliability, but a procedure based on Hotelling’s T2 statistic and a procedure based on calculating a posterior predictive density produced more acceptable results. The Hotelling’s T2 procedure attempts to take account of the sampling uncertainty of the mean vectors and covariance matrices due to the small number of tokens used to train the models, and the posterior-predictive-density analysis integrates out the values of the mean vectors and covariance matrices as nuisance parameters. Data scarcity is common in forensic speech science and we argue that it is important not to accept extremely large calculated likelihood ratios at face value, but to consider whether such values can be supported given the size of the available data and modelling constraints.},
	journal      = {Speech Communication},
	author       = {Morrison, Geoffrey Stewart and Lindh, Jonas and Curran, James M},
	year         = {2014},
	volume       = {58},
	pages        = {81--90},
}

@inProceedings{morrison-etal-2012-calculating-167148,
	title        = {Calculating the reliability of likelihood ratios: Addressing modelling problems related to small n and tails},
	abstract     = {In forensic speech science we are often faced with the problem of having a relatively small amount of data which is also
multivariate and distributionally complex. This results in a serious problem exactly in the scenario where potentially large
strengths of evidence could be obtained, i.e., when the trace data are on a tail of the distribution which models either the
prosecution or defence hypothesis and a large magnitude log likelihood ratio is calculated. By definition the sampling of
a distribution is sparse on its tails and this problem is compounded if the model is trained on a small amount of data – small
fluctuations in the training data can lead to large changes in the calculated likelihoods on the tails and thus large changes
in the calculated likelihood ratios for trace data on the tails. Large-magnitude calculated log likelihood ratios are therefore
inherently unreliable.},
	booktitle    = {Proceedings of 14th Australasian International Conference on Speech Science and Technology},
	author       = {Morrison, Geoffrey Stewart and Ochoa, Felipe and Lindh, Jonas},
	year         = {2012},
	volume       = {14},
}

@inProceedings{moschitti-etal-2012-modeling-156401,
	title        = {Modeling Topic Dependencies in Hierarchical Text Categorization},
	abstract     = {In this paper, we encode topic dependencies in hierarchical multi-label Text Categorization (TC) by means of rerankers. We represent reranking hypotheses with several innovative kernels considering both the structure of the hierarchy and the probability of nodes. Additionally, to better investigate the role of category relationships, we consider two interesting cases: (i) traditional schemes in which node-fathers include all the documents of their child-categories; and (ii) more general schemes, in which children can include documents not belonging to their fathers. The extensive experimentation on Reuters Corpus Volume 1 shows that our rerankers inject effective structural semantic dependencies in multi-classifiers and significantly outperform the state of the art.},
	booktitle    = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (ACL 2012); Jeju, Korea; July 8-14},
	author       = {Moschitti, Alessandro and Ju, Qi and Johansson, Richard},
	year         = {2012},
	pages        = {759--767},
}

@article{nautsch-etal-2017-making-258734,
	title        = {Making Likelihood Ratios Digestible for Cross-Application Performance Assessment},
	abstract     = {Performance estimation is crucial to the assessment of novel algorithms and systems. In detection error tradeoff (DET) diagrams, discrimination performance is solely assessed targeting one application, where cross-application performance considers risks resulting from decisions, depending on application constraints. For the purpose of interchangeability of research results across different application constraints, we propose to augment DET curves by depicting systems regarding their support of security and convenience levels. Therefore, application policies are aggregated into levels based on verbal likelihood ratio scales, providing an easy to use concept for business-to-business communication to denote operative thresholds. We supply a reference implementation in Python, an exemplary performance assessment on synthetic score distributions, and a fine-tuning scheme for Bayes decision thresholds, when decision policies are bounded rather than fix.},
	journal      = {IEEE Signal Processing Letters},
	author       = {Nautsch, A. and Meuwly, D. and Ramos, D. and Lindh, Jonas and Busch, C.},
	year         = {2017},
	volume       = {24},
	number       = {10},
	pages        = {1552--1556},
}

@book{nietopina-2019-splitting-282680,
	title        = {Splitting rocks: Learning word sense representations from corpora and lexica},
	abstract     = {The representation of written language semantics is a central problem of language technology and a crucial component of many natural language processing applications, from part-of-speech tagging to text summarization. These representations of linguistic units, such as words or sentences, allow computer applications that work with language to process and manipulate the meaning of text. In particular, a family of models has been successfully developed based on automatically learning semantics from large collections of text and embedding them into a vector space, where semantic or lexical similarity is a function of geometric distance. Co-occurrence information of words in context is the main source of data used to learn these representations.

Such models have typically been applied to learning representations for word forms, which have been widely applied, and proven to be highly successful, as characterizations of semantics at the word level. However, a word-level approach to meaning representation implies that the different meanings, or senses, of any polysemic word share one single representation. This might be problematic when individual word senses are of interest and explicit access to their specific representations is required. For instance, in cases such as an application that needs to deal with word senses rather than word forms, or when a digital lexicon's sense inventory has to be mapped to a set of learned semantic representations.

In this thesis, we present a number of models that try to tackle this problem by automatically learning representations for word senses instead of for words. In particular, we try to achieve this by using two separate sources of information: corpora and lexica for the Swedish language. Throughout the five publications compiled in this thesis, we demonstrate that it is possible to generate word sense representations from these sources of data individually and in conjunction, and we observe that combining them yields superior results in terms of accuracy and sense inventory coverage. Furthermore, in our evaluation of the different representational models proposed here, we showcase the applicability of word sense representations both to downstream natural language processing applications and to the development of existing linguistic resources.},
	author       = {Nieto Piña, Luis},
	year         = {2019},
	publisher    = {University of Gothenburg},
	address      = {Gothenburg},
	ISBN         = {978-91-87850-75-2},
}

@inProceedings{nietopina-johansson-2015-simple-222611,
	title        = {A Simple and Efficient Method to Generate Word Sense Representations},
	abstract     = {Distributed representations of words have boosted  the  performance  of  many  Natural Language Processing tasks.  However, usually only one representation per word is  obtained,  not  acknowledging  the  fact that some words have multiple meanings. This has a negative effect on the individual word representations and the language model as a whole. In this paper we present a  simple  model  that  enables  recent  techniques  for  building  word  vectors  to  represent distinct senses of polysemic words. In our assessment of this model we show that  it  is  able  to  effectively  discriminate between words’ senses and to do so in a computationally efficient manner.},
	booktitle    = {Proceedings of International Conference in Recent Advances in Natural Language Processing},
	editor       = {Galia Angelova and Kalina Bontcheva and Ruslan Mitkov and Hissar and Bulgaria 7–9 September and 2015},
	author       = {Nieto Piña, Luis and Johansson, Richard},
	year         = {2015},
	pages        = {465--472},
}

@inProceedings{nietopina-johansson-2016-embedding-241139,
	title        = {Embedding Senses for Efficient Graph-based Word Sense Disambiguation},
	abstract     = {We propose a simple graph-based method for word sense disambiguation (WSD) where sense and context embeddings are constructed by applying the Skip-gram method to random walks over the sense graph. We used this method to build a WSD system for Swedish using the SALDO lexicon, and evaluated it on six different annotated test sets. In all cases, our system was several orders of magnitude faster than a state-of-the-art PageRank-based system, while outperforming a random baseline soundly.},
	booktitle    = { Proceedings of TextGraphs-10: the Workshop on Graph-based Methods for Natural Language Processing},
	author       = {Nieto Piña, Luis and Johansson, Richard},
	year         = {2016},
	publisher    = {Association for Computational Linguistics},
}

@article{nietopina-johansson-2016-benchmarking-251412,
	title        = {Benchmarking Word Sense Disambiguation Systems for Swedish},
	abstract     = {We compare several word sense disambiguation systems for Swedish and evaluate them on seven different sense-annotated corpora. Our results show that unsupervised systems beat a random baseline, but generally do not outperform a first-sense baseline considerably. On a lexical-sample dataset that allows us to train a supervised system, the unsupervised disambiguators are strongly outperformed by the supervised one.},
	journal      = {The Sixth Swedish Language Technology Conference},
	author       = {Nieto Piña, Luis and Johansson, Richard},
	year         = {2016},
}

@inProceedings{nietopina-johansson-2017-training-261938,
	title        = {Training Word Sense Embeddings With Lexicon-based Regularization},
	abstract     = {We propose to improve word sense embeddings by enriching an automatic corpus-based method with lexicographic data. Information from a lexicon is introduced into the learning algorithm’s objective function through a regularizer. The incorporation of lexicographic data yields embeddings that are able to reflect expertdefined word senses, while retaining the robustness, high quality, and coverage of automatic corpus-based methods. These properties are observed in a manual inspection of the semantic clusters that different degrees of regularizer strength create in the vector space. Moreover, we evaluate the sense embeddings in two
downstream applications: word sense disambiguation and semantic frame prediction, where they outperform simpler approaches. Our results show that a corpusbased model balanced with lexicographic data learns better representations and improve their performance in downstream tasks},
	booktitle    = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), Taipei, Taiwan, November 27 – December 1, 2017},
	author       = {Nieto Piña, Luis and Johansson, Richard},
	year         = {2017},
	publisher    = {Asian Federation of Natural Language Processing },
	ISBN         = {978-1-948087-00-1},
}

@inProceedings{nietopina-johansson-2018-automatically-270261,
	title        = {Automatically Linking Lexical Resources with Word Sense Embedding Models},
	abstract     = {Automatically learnt word sense embeddings are developed as an attempt to refine the capabilities of coarse word embeddings. The word sense representations obtained this way are, however, sensitive to underlying corpora and parameterizations, and they might be difficult to relate to word senses as formally defined by linguists. We propose to tackle this problem by devising a mechanism to establish links between word sense embeddings and lexical resources created by experts. We evaluate the applicability of these links in a task to retrieve instances of Swedish word senses not present in the lexicon.},
	booktitle    = {The Third Workshop on Semantic Deep Learning (SemDeep-3), August 20th, 2018, Santa Fe, New Mexico, USA / Luis Espinosa Anke, Thierry Declerck, Dagmar Gromann (eds.)},
	author       = {Nieto Piña, Luis and Johansson, Richard},
	year         = {2018},
	ISBN         = {978-1-948087-56-8},
}

@article{nimb-etal-2006-leksikalisk-35157,
	title        = {Leksikalisk beskrivelse af adverbiers semantik i norsk, svensk og dansk  - LEXADV},
	journal      = {Skrifter / Nordisk forening for leksikografi},
	author       = {Nimb, Sanni and Fjeld, Ruth V. and Toporowska Gronostaj, Maria},
	year         = {2006},
	volume       = {9},
	pages        = {301--314},
}

@inProceedings{nord-forsberg-2017-enklare-259902,
	title        = {Enklare efter klarspråk? Myndighetstexter före och efter ett klarspråksprojekt},
	booktitle    = {Saga Bendegard, Ulla Melander Marttala & Maria Westman (red.), Språk och norm: Rapport från ASLA:s symposium, Uppsala universitet 21–22 april 2016},
	author       = {Nord, Andreas and Forsberg, Markus},
	year         = {2017},
	publisher    = {ASLA},
	address      = {Uppsala},
	ISBN         = {978-91-87884-26-9},
}

@inProceedings{nusko-etal-2016-building-238135,
	title        = {Building a Sentiment Lexicon for Swedish},
	abstract     = {In this paper we will present our ongoing project to build and evaluate a sentiment lexicon for Swedish. Our main resource is SALDO, a lexical resource of modern Swedish developed at Språkbanken, University of Gothenburg. Using a semi-supervised approach, we expand a manually chosen set of six core words using parent-child relations based on the semantic network structure of SALDO. At its current stage the lexicon consists of 175 seeds, 633 children, and 1319 grandchildren.},
	booktitle    = {Linköping Electronic Conference Proceedings},
	author       = {Nusko, Bianka and Tahmasebi, Nina and Mogren, Olof},
	year         = {2016},
	volume       = {126},
	number       = {006},
	ISBN         = {978-91-7685-733-5},
	pages        = {32----37},
}

@article{oelke-etal-2013-fingerprint-181484,
	title        = {Fingerprint Matrices: Uncovering the dynamics of social networks in prose literature},
	abstract     = {In prose literature often complex dynamics of interpersonal relationships can be observed between the different characters. Traditionally, node-link diagrams are used to depict the social network of a novel. However, static graphs can only visualize the overall social network structure but not the development of the networks over the course of the story, while dynamic graphs have the serious problem that there are many sudden changes between different portions of the overall social network. In this paper we explore means to show the relationships between the characters of a plot and at the same time their development over the course of a novel. Based on a careful exploration of the design space, we suggest a new visualization technique called Fingerprint Matrices. A case study exemplifies the usage of Fingerprint Matrices and shows that they are an effective means to analyze prose literature with respect to the development of relationships between the different characters.},
	journal      = {Computer Graphics Forum},
	author       = {Oelke, D. and Kokkinakis, Dimitrios and Keim, D. A.},
	year         = {2013},
	volume       = {32},
	number       = {3},
	pages        = {371--380},
}

@inProceedings{oelke-etal-2012-visual-155495,
	title        = {Visual Analytics and the Language of Web Query Logs - A Terminology Perspective},
	abstract     = {This paper explores means to integrate natural language processing methods for terminology and entity identification in medical web session logs with visual analytics techniques. The aim of the study is to examine whether the vocabulary used in queries posted to a Swedish regional health web site can be assessed in a way that will enable a terminologist or medical data analysts to instantly identify new term candidates and their relations based on significant co-occurrence patterns. We provide an example application in order to illustrate how the visualizations of co-occurrence relationships between medical and general entities occurring in such logs can be visualized, accessed and explored. To enable a visual exploration of the generated co-occurrence graphs, we employ a general purpose social network analysis tool, Visone (http://visone.info), that permits to visualize and analyze various types of graph structures. Our examples show that visual analytics based on co-occurrence analysis provides insights into the use of layman language in relation to established (professional) terminologies, which may help terminologists decide which terms to include in future terminologies. Increased understanding of the used querying language is also of interest in the context of public health web sites. The query results should reflect the intentions of the information seekers, who may express themselves in layman language that differs from the one used on the available web sites provided by medical professionals.},
	booktitle    = {The 15th EURALEX International Congress (European Association of Lexicography). Oslo, Norway.},
	author       = {Oelke, Daniela and Eklund, Ann-Marie and Marinov, Svetoslav and Kokkinakis, Dimitrios},
	year         = {2012},
	pages        = {8},
}

@inProceedings{oelke-etal-2012-advanced-155493,
	title        = { Advanced Visual Analytics Methods for Literature Analysis},
	abstract     = {The volumes of digitized literary collections in various languages increase at a rapid pace, which results also in a growing demand for computational support to analyze such linguistic data. This paper combines robust text analysis with advanced visual analytics and bring a new set of tools to literature analysis. Visual analytics techniques can offer new and unexpected insights and knowledge to the literary scholar. We analyzed a small subset of a large literary collection, the Swedish Literature Bank, by focusing on the extraction of persons’ names, their gender and their normalized, linked form, including mentions of theistic beings (e.g., Gods’ names and mythological figures), and examined their appearance over the course of the novel. A case study based on 13 novels, from the aforementioned collection, shows a number of interesting applications of visual analytics methods to literature problems, where named entities can play a prominent role, demonstrating the advantage of visual literature analysis. Our work is inspired by the notion of distant reading or macroanalysis for the analyses of large literature collections. },
	booktitle    = {Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH). An EACL 2012 workshop. Avignon, France.},
	author       = {Oelke, Daniela and Kokkinakis, Dimitrios and Malm, Mats},
	year         = {2012},
	volume       = {Accepted},
	pages        = {10},
}

@inProceedings{olsson-2006-does-116086,
	title        = {Does the ITG platform eXist?},
	abstract     = {An overview of the ITG platform and its backend eXist (http://exist-db.org). },
	booktitle    = {XML Prague 2006 (http://xmlprague.cz), 17–18 juni 2006, Prag},
	author       = {Olsson, Leif-Jöran},
	year         = {2006},
}

@inProceedings{olsson-2007-exist-66850,
	title        = {How do you do eXist},
	booktitle    = {Javaforum 2007-05-23},
	author       = {Olsson, Leif-Jöran},
	year         = {2007},
}

@inProceedings{olsson-2008-valkommen-116044,
	title        = {Välkommen till eXist},
	abstract     = {A walkthrough of the versatility of the native XML database eXist, (http://exist-db.org), combined with overwiews of XPath and XQuery.},
	booktitle    = {FSCONS (http://fscons.org), 24–26 oktober 2008, Göteborg},
	author       = {Olsson, Leif-Jöran},
	year         = {2008},
}

@inProceedings{pedersen-etal-2012-linking-155599,
	title        = {Linking and validating Nordic and Baltic wordnets},
	booktitle    = {Proceedings of the 6th International Global Wordnet Conference},
	author       = {Pedersen, Bolette Sandford and Borin, Lars and Forsberg, Markus and Lindén, Krister and Orav, Heili and Rögnvaldsson, Eírikur},
	year         = {2012},
	volume       = {Accepted},
	pages        = {254--260},
}

@inProceedings{pedersen-etal-2013-nordic-178357,
	title        = {Nordic and Baltic wordnets aligned and compared through “WordTies”},
	abstract     = {During the last few years, extensive wordnets have been built locally for the Nordic and Baltic languages applying very different compilation strategies. The aim of the present investigation is to consolidate and examine these wordnets through an alignment via Princeton Core WordNet and thereby compare them along the measures of taxonomical structure, synonym structure, and assigned relations to approximate to a best practice. A common web interface and visualizer “WordTies” is developed to facilitate this purpose. Four bilingual wordnets are automatically processed and evaluated exposing interesting differences between the wordnets. Even if the alignments are judged to be of a good quality, the precision of the translations vary due to considerable differences in hyponymy depth and interpretation of the synset. All seven monolingual and four bilingual wordnets as well as WordTies have been made available via META-SHARE through the META-NORD project.},
	booktitle    = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013), May 22–24, 2013, Oslo University, Norway. NEALT Proceedings Series 16},
	author       = {Pedersen, Bolette and Borin, Lars and Forsberg, Markus and Kahusk, Neeme and Lindén, Krister and Niemi, Jyrki and Nisbeth, Niklas and Nygaard, Lars and Orav, Heili and Rögnvaldsson, Eiríkur and Seaton, Mitchel and Vider, Kadri and Voionmaa, Kaarlo},
	year         = {2013},
	number       = {16},
	pages        = {147--162},
}

@inProceedings{pilan-2015-helping-227313,
	title        = {Helping Swedish words come to their senses: word-sense disambiguation based on sense associations from the SALDO lexicon},
	abstract     = {This paper describes a knowledge-based
approach to word-sense disambiguation
using a lexical-semantic resource,
SALDO. This hierarchically organized
lexicon defining senses in terms of other
related senses has not been previously
explored for this purpose. The proposed
method is based on maximizing the
overlap between associated word senses
of nouns and verbs co-occuring within
a sentence. The results of a small-scale
experiment using this method are also
reported. Overall, the approach proved
more efficient for nouns, since not only
was the accuracy score higher for this
category (56%) than for verbs (46%), but
for nouns in 22% more of the cases was
a sense overlap found. As a result of an
in-depth analysis of the predictions, we
identified a number of ways the system
could be modified or extended for an
improved performance.},
	booktitle    = {Proceedings of the 20th Nordic Conference of Computational Linguistics (NODALIDA 2015). May 11–13, 2015, Vilnius, Lithuania},
	editor       = {Beáta Megyesi},
	author       = {Pilán, Ildikó},
	year         = {2015},
	number       = {109},
	ISBN         = {9789175190983},
	pages        = {275--279},
}

@inProceedings{pilan-2016-detecting-243715,
	title        = {Detecting Context Dependence in Exercise Item Candidates Selected from Corpora},
	abstract     = {We explore the factors influencing the dependence of single sentences on their larger textual context in order to automatically identify
candidate sentences for language learning exercises from corpora which are presentable in
isolation. An in-depth investigation of this
question has not been previously carried out.
Understanding this aspect can contribute to a
more efficient selection of candidate sentences
which, besides reducing the time required for
item writing, can also ensure a higher degree
of variability and authenticity. We present a
set of relevant aspects collected based on the
qualitative analysis of a smaller set of context-dependent corpus example sentences. Furthermore, we implemented a rule-based algorithm using these criteria which achieved
an average precision of 0.76 for the identification
of different issues related to context dependence. The method has also been
evaluated empirically where 80% of the sentences in which our system did not detect
context-dependent elements were also considered context-independent by human raters.},
	booktitle    = {Proceedings of the 11th Workshop on Innovative Use of NLP for Building Educational Applications, June 12 to June 17, 2016, San Diego, USA},
	author       = {Pilán, Ildikó},
	year         = {2016},
}

@inProceedings{pilan-etal-2016-coursebook-246349,
	title        = {Coursebook texts as a helping hand for classifying linguistic complexity in language learners' writings},
	abstract     = {We bring together knowledge from two different types of language learning data, texts learners read and texts they write, to improve linguistic complexity classification in the latter. Linguistic complexity in the foreign and second language learning context can be expressed in terms of proficiency levels.  We show that incorporating features capturing lexical complexity information from reading passages can boost significantly the machine learning based classification of learner-written texts into proficiency levels.  With an F1 score of .8 our system rivals state-of-the-art results reported for other languages for this task.  Finally, we present a freely available web-based tool for proficiency level classification and lexical complexity visualization for both learner writings and reading texts. },
	booktitle    = {Proceedings of the workshop on Computational Linguistics for Linguistic Complexity},
	author       = {Pilán, Ildikó and Alfter, David and Volodina, Elena},
	year         = {2016},
	ISBN         = {978-4-87974-709-9},
}

@inProceedings{pilan-etal-2017-larka-289884,
	title        = {Lärka: an online platform where language learning meets natural language processing},
	booktitle    = {7th ISCA Workshop on Speech and Language Technology in Education, 25-26 August 2017, Stockholm, Sweden},
	author       = {Pilán, Ildikó and Alfter, David and Volodina, Elena},
	year         = {2017},
}

@article{pilan-etal-2016-readable-226565,
	title        = {A readable read: Automatic Assessment of Language Learning Materials based on Linguistic Complexity.},
	abstract     = {Corpora and web texts can become a rich language learning resource if we have a means of assessing whether they are linguistically appropriate for learners at a given proficiency level. In this paper, we aim at addressing this issue by presenting the first approach for predicting linguistic complexity for Swedish second language learning material on a 5-point scale. After showing that the traditional Swedish readability measure, Läsbarhetsindex (LIX), is not suitable for this task, we propose a supervised machine learning model, based on a range of linguistic features, that can reliably classify texts according to their difficulty level.Our model obtained an accuracy of 81.3% and an F-score of 0.8, which is comparable to the state of the art in English and is considerably higher than previously reported results for other languages. We further studied the utility of our features with single sentences instead of full texts since sentences are a common linguistic unit in language learning exercises. We trained a separate model on sentence-level data with five classes, which yielded 63.4% accuracy. Although this is lower than the document level performance, we achieved an adjacent accuracy of 92%. Furthermore, we found that using a combination of different features, compared to using lexical features alone, resulted in 7% improvement in classification accuracy at the sentence level, whereas at the document level, lexical features were more dominant. Our models are intended for use in a freely accessible web-based language learning platform for the automatic generation of exercises, and they will be available also in the form of web-services.},
	journal      = {Computational Linguistics and Applications},
	author       = {Pilán, Ildikó and Vajjala, Sowmya and Volodina, Elena},
	year         = {2016},
	volume       = {7},
	number       = {1},
	pages        = {143--159},
}

@inProceedings{pilan-volodina-2014-reusing-200967,
	title        = {Reusing Swedish FrameNet for training semantic roles},
	abstract     = {In this article we present the first experiences of reusing the Swedish FrameNet (SweFN) as a resource for training semantic roles. We
give an account of the procedure we used to adapt SweFN to the needs of students of Linguistics in the form of an automatically
generated exercise. During this adaptation, the mapping of the fine-grained distinction of roles from SweFN into learner-friendlier
coarse-grained roles presented a major challenge. Besides discussing the details of this mapping, we describe the resulting multiple-choice exercise and its graphical user interface. The exercise was made available through Lärka, an online platform for students of Linguistics and learners of Swedish as a second language. We outline also aspects underlying the selection of the incorrect answer
options which include semantic as well as frequency-based criteria. Finally, we present our own observations and initial user feedback
about the applicability of such a resource in the pedagogical domain. Students' answers indicated an overall positive experience, the
majority found the exercise useful for learning semantic roles.
},
	booktitle    = {Proceedings of LREC 2014, May 26-31, 2014, Reykjavik, Iceland},
	author       = {Pilán, Ildikó and Volodina, Elena},
	year         = {2014},
	ISBN         = { 978-2-9517408-8-4},
	pages        = {1359--1363},
}

@inProceedings{pilan-volodina-2016-classification-248099,
	title        = {Classification of Language Proficiency Levels in Swedish Learners' Texts},
	abstract     = {We evaluate a system for the automatic classification of texts written by learners of Swedish as a second language into levels of language proficiency.   Since the amount of available annotated learner essay data for our target language is rather small, we explore also the potentials of domain adaptation for this task.  The additional domain consists of coursebook texts written by experts for learners.  We find that already with a smaller amount of in-domain Swedish learner essay data it is possible to obtain results that compare well to state-of-the-art systems for other languages, with domain adaptation methods yielding a slight improvement.},
	booktitle    = {The Sixth Swedish Language Technology Conference (SLTC), Umeå University, 17-18 November, 2016},
	author       = {Pilán, Ildikó and Volodina, Elena},
	year         = {2016},
}

@inProceedings{pilan-volodina-2018-exploring-275366,
	title        = {Exploring word embeddings and phonological similarity for the unsupervised correction of language learner errors.},
	abstract     = {The presence of misspellings and other errors or non-standard word forms poses a consider- able challenge for NLP systems. Although several supervised approaches have been proposed previously to normalize these, annotated training data is scarce for many languages. We in- vestigate, therefore, an unsupervised method where correction candidates for Swedish language learners’ errors are retrieved from word embeddings. Furthermore, we compare the usefulness of combining cosine similarity with orthographic and phonological similarity based on a neural grapheme-to-phoneme conversion system we train for this purpose. Although combinations of similarity measures have been explored for finding correction candidates, it remains unclear how these measures relate to each other and how much they contribute individually to identifying the correct alternative. We experiment with different combinations of these and find that integrating phonological information is especially useful when the majority of learner errors are related to misspellings, but less so when errors are of a variety of types including, e.g. grammatical errors.
},
	booktitle    = {Proceedings of the Second Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, COLING, Santa Fe, New Mexico, USA, August 25, 2018.},
	author       = {Pilán, Ildikó and Volodina, Elena},
	year         = {2018},
	publisher    = {Association of Computation Linguistics },
	ISBN         = {978-1-948087-61-2},
}

@inProceedings{pilan-volodina-2018-investigating-275367,
	title        = {Investigating the importance of linguistic complexity features across different datasets related to language learning.},
	abstract     = {We present the results of our investigations aiming at identifying the most informative linguistic complexity features for classifying language learning levels in three different datasets. The datasets vary across two dimensions: the size of the instances (texts vs. sentences) and the language learning skill they involve (reading comprehension texts vs. texts written by learners themselves). We present a subset of the most predictive features for each dataset, taking into consid- eration significant differences in their per-class mean values and show that these subsets lead not only to simpler models, but also to an improved classification performance. Furthermore, we pin-point fourteen central features that are good predictors regardless of the size of the linguistic unit analyzed or the skills involved, which include both morpho-syntactic and lexical dimensions.
},
	booktitle    = {Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing, COLING, Santa Fe, New Mexico, USA, August 25, 2018.},
	author       = {Pilán, Ildikó and Volodina, Elena},
	year         = {2018},
	publisher    = {Association of Computational Linguistics },
	ISBN         = {978-1-948087-62-9},
}

@misc{pilan-etal-2018-proceedings-275358,
	title        = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 },
	abstract     = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other.

The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.},
	author       = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars},
	year         = {2018},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7685-173-9},
}

@article{pilan-etal-2017-candidate-260382,
	title        = {Candidate sentence selection for language learning exercises: From a comprehensive framework to an empirical evaluation},
	abstract     = {We present a framework and its implementation relying on Natural Language Processing methods, which aims at the identification of exercise item candidates from corpora. The hybrid system combining heuristics and machine learning methods includes a number of relevant selection criteria. We focus on two fundamental aspects: linguistic complexity and the dependence of the extracted sentences on their original context. Previous work on exercise generation addressed these two criteria only to a limited extent, and a refined overall candidate sentence selection framework appears also to be lacking. In addition to a detailed description of the system, we present the results of an empirical evaluation conducted with language teachers and learners which indicate the usefulness of the system for educational purposes. We have integrated our system into a freely available online learning platform.},
	journal      = {Revue Traitement Automatique des Langues. Special issue on NLP for Learning and Teaching},
	author       = {Pilán, Ildikó and Volodina, Elena and Borin, Lars},
	year         = {2017},
	volume       = {57},
	number       = {3},
	pages        = {67--91},
}

@inProceedings{pilan-etal-2013-automatic-188465,
	title        = {Automatic Selection of Suitable Sentences  for Language Learning Exercises},
	abstract     = {In this study we investigated second and foreign language (L2) sentence 
readability, an area little explored so far in the case of several languages, including 
Swedish. The outcome of our research consists of two methods for sentence selection 
from native language corpora based on Natural Language Processing (NLP) and 
machine learning (ML) techniques. The two approaches have been made available 
online within Lärka, an Intelligent CALL (ICALL) platform offering activities 
for language learners and students of linguistics. Such an automatic selection 
of suitable sentences can be valuable for L2 teachers during the creation of new 
teaching materials, for L2 students who look for additional self-study exercises as 
well as for lexicographers in search of example sentences to illustrate the meaning 
of a vocabulary item. Members from all these potential user groups evaluated our 
methods and found the majority of the sentences selected suitable for L2 learning 
purposes.},
	booktitle    = {20 Years of EUROCALL: Learning from the Past, Looking to the Future. 2013 EUROCALL Conference, 11th  to 14th September 2013 Évora, Portugal, Proceedings.},
	author       = {Pilán, Ildikó and Volodina, Elena and Johansson, Richard},
	year         = {2013},
	ISBN         = {978-1-908416-12-4},
	pages        = {218--225},
}

@inProceedings{pilan-etal-2014-rule-210940,
	title        = {Rule-based and machine learning approaches for second language sentence-level readability},
	abstract     = {We present approaches for the identification
of sentences understandable by second
language learners of Swedish, which
can be used in automatically generated exercises based on corpora. In this work we
merged methods and knowledge from machine
learning-based readability research,
from rule-based studies of Good Dictionary
Examples and from second language
learning syllabuses. The proposed selection
methods have also been implemented
as a module in a free web-based language
learning platform. Users can use
different parameters and linguistic filters
to personalize their sentence search with
or without a machine learning component
assessing readability. The sentences selected
have already found practical use as
multiple-choice exercise items within the
same platform. Out of a number of deep
linguistic indicators explored, we found
mainly lexical-morphological and semantic
features informative for second language
sentence-level readability. We obtained
a readability classification accuracy
result of 71%, which approaches the performance of other models used in similar
tasks. Furthermore, during an empirical
evaluation with teachers and students,
about seven out of ten sentences selected
were considered understandable, the rule-based approach slightly outperforming the
method incorporating the machine learning
model.},
	booktitle    = {Proceedings of the Ninth Workshop on Innovative Use of NLP for Building Educational Applications, June 26, 2014 Baltimore, Maryland, USA},
	author       = {Pilán, Ildikó and Volodina, Elena and Johansson, Richard},
	year         = {2014},
	ISBN         = {978-1-941643-03-7},
	pages        = {174----184},
}

@inProceedings{pilan-etal-2016-predicting-247240,
	title        = {Predicting proficiency levels in learner writings by transferring a linguistic complexity model from expert-written coursebooks},
	abstract     = {The lack of a sufficient amount of data tailored for a task is a well-recognized problem for many
statistical NLP methods.   In this paper,  we explore whether data sparsity can be successfully
tackled  when  classifying  language  proficiency  levels  in  the  domain  of  learner-written  output
texts.   We  aim  at  overcoming  data  sparsity  by  incorporating  knowledge  in  the  trained  model
from another domain consisting of input texts written by teaching professionals for learners. We
compare different domain adaptation techniques and find that a weighted combination of the two
types of data performs best, which can even rival systems based on considerably larger amounts
of in-domain data. Moreover, we show that normalizing errors in learners’ texts can substantially
improve classification when in-domain data with annotated proficiency levels is not available.},
	booktitle    = {Proceedings of the 26th International Conference on Computational Linguistics (COLING), December 13-16, 2016, Osaka},
	author       = {Pilán, Ildikó and Volodina, Elena and Zesch, Torsten},
	year         = {2016},
	ISBN         = {978-4-87974-702-0},
}

@inProceedings{rama-borin-2011-estimating-140688,
	title        = {Estimating Language Relationships from a Parallel Corpus. A Study of the Europarl Corpus},
	abstract     = {Since the 1950s, linguists have been using short lists (40–200 items) of basic vocabulary as the central component in a methodology which is claimed to make it possible to automatically calculate genetic
relationships among languages. In
the last few years these methods have experienced something of a revival, in that more languages are involved, different distance
measures are systematically compared and evaluated, and methods from computational biology are used for calculating language family trees. In this paper, we explore how this methodology
can be extended in another direction, by using larger word lists automatically extracted from a parallel corpus using word alignment software. We present preliminary
results from using the Europarl parallel corpus in this way for estimating the distances between some languages in the Indo-European language family.},
	booktitle    = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2011},
	volume       = {11},
	pages        = {161--167},
}

@inProceedings{rama-borin-2012-properties-164449,
	title        = {Properties of phoneme N -grams across the world’s language families},
	abstract     = {In this article, we investigate the properties of phoneme N -grams across half of the world’s languages. The sizes of three different N -gram distributions of the world’s language families obey a power law. Further, the N -gram distributions of language families parallel the sizes of the families, which also follow a power law distribution. The correlation between N -gram distributions and language family sizes improves with increasing values of N . The study also raises some new questions about the use of N -gram distributions in linguistic research, which we hope to be able to investigate in the future.},
	booktitle    = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2012},
}

@article{rama-borin-2014-gram-187121,
	title        = {N-Gram Approaches to the Historical Dynamics of Basic Vocabulary},
	journal      = {Journal of Quantitative Linguistics},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2014},
	volume       = {21},
	number       = {1},
	pages        = {50--64},
}

@incollection{rama-borin-2015-comparative-197484,
	title        = {Comparative evaluation of string similarity measures for automatic language classification.},
	booktitle    = {Sequences in Language and Text},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2015},
	publisher    = {De Gruyter Mouton},
	ISBN         = {978-3-11-036287-9},
}

@misc{ranta-forsberg-2012-implementing-168685,
	title        = {Implementing Programming Languages},
	author       = {Ranta, Aarne and Forsberg, Markus},
	year         = {2012},
	publisher    = {College Publications},
	address      = {London},
	ISBN         = {978-1-84890-064-6},
}

@inProceedings{rehm-etal-2014-strategic-198556,
	title        = {The strategic impact of META-NET on the regional, national and international level},
	booktitle    = {Proceedings of LREC 2014,  26-31 May, Reykjavik, Iceland },
	author       = {Rehm, Georg and Uszkoreit, Hans and Ananiadou, Sophia and Bel, Núria and Bieleviciene, Audrone and Borin, Lars and Branco, António and Budin, Gerhard and Calzolari, Nicoletta and Daelemans, Walter and Garabík, Radovan and Grobelnik, Marko and Garcia-Mateo, Carmen and Genabith, Josef Van and Hajic, Jan and Hernaez, Inma and Judge, John and Koeva, Svetla and Krek, Simon and Krstev, Cvetana and Lindén, Krister and Magnini, Bernardo and Mariani, Joseph and Mcnaught, John and Melero, Maite and Monachini, Monica and Moreno, Asuncion and Odijk, Jan and Ogrodniczuk, Maciej and Pezik, Piotr and Piperidis, Stelios and Przepiórkowski, Adam and Rögnvaldsson, Eiríkur and Rosner, Michael and Pedersen, Bolette Sandford and Skadina, Inguna and De Smedt, Koenraad and Tadić, Marko and Thompson, Paul and Tufiș, Dan and Váradi, Tamás and Vasiljevs, Andrejs and Vider, Kadri and Zabarskaite, Jolanta},
	year         = {2014},
	ISBN         = {978-2-9517408-8-4},
	pages        = {1517--1524},
}

@article{rehm-etal-2016-strategic-237609,
	title        = {The strategic impact of META-NET on the regional, national and international level},
	abstract     = {This article provides an overview of the dissemination work carried out
in META-NET from 2010 until 2015; we describe its impact on the regional,
national and international level, mainly with regard to politics and the funding situation for LT topics. The article documents the initiative’s work throughout
Europe in order to boost progress and innovation in our field.},
	journal      = {Language resources and evaluation},
	author       = {Rehm, Georg and Uszkoreit, Hans and Ananiadou, Sophia and Bel, Núria and Bielevičienė, Audronė and Borin, Lars and Branco, António and Budin, Gerhard and Calzolari, Nicoletta and Daelemans, Walter and Garabík, Radovan and Grobelnik, Marko and García-Mateo, Carmen and Genabith, Josef Van and Hajič, Jan and Hernáez, Inma and Judge, John and Koeva, Svetla and Krek, Simon and Krstev, Cvetana and Lindén, Krister and Magnini, Bernardo and Mariani, Joseph and Mcnaught, John and Melero, Maite and Monachini, Monica and Moreno, Asunción and Odijk, Jan and Ogrodniczuk, Maciej and Pęzik, Piotr and Piperidis, Stelios and Przepiórkowski, Adam and Rögnvaldsson, Eiríkur and Rosner, Mike and Pedersen, Bolette Sandford and Skadiņa, Inguna and De Smedt, Koenraad and Tadić, Marko and Thompson, Paul and Tufiş, Dan and Váradi, Tamás and Vasiļjevs, Andrejs and Vider, Kadri and Zabarskaitė, Jolanta},
	year         = {2016},
	volume       = {50},
	number       = {2},
	pages        = {351--374},
}

@incollection{ribeck-borin-2014-lexical-201965,
	title        = {Lexical Bundles in Swedish Secondary School Textbooks},
	abstract     = {The present paper describes the process of identifying lexical bundles, i.e., frequently recurring word sequences such as by means of and in the end of, in secondary school history and physics textbooks. In its determination of finding genuine lexical bundles, i.e. the word boundaries between lexical bundles and surrounding arbitrary words, it proposes a new approach to come to terms with the problem of extracting overlapping bundles of different lengths. The results of the structural classification indicate that history uses more NP/PP-based and less dependent-clause-based bundles than physics. The comparative analysis manages to restrict this difference to the referential function. History almost only refers to phrases, i.e. within clauses, while physics much more tends to make references across clauses. The article also includes a report on an extension of the study, ongoing work where the automatic identification of multi-word expressions in general is in focus.},
	booktitle    = {Human Language Technology Challenges for Computer Science and Linguistics 5th Language and Technology Conference, LTC 2011, Poznań, Poland, November 25--27, 2011, Revised Selected Papers},
	editor       = {Zygmunt Vetulani and Joseph Mariani.},
	author       = {Ribeck, Judy Carola and Borin, Lars},
	year         = {2014},
	publisher    = {Springer International Publishing},
	volume       = {2014},
	number       = {XVI},
	address      = {Cham},
	ISBN         = {978-3-319-08958-4},
	pages        = {238--249},
}

@book{rosen-2016-theory-231969,
	title        = {Theory Exploration and Inductive Theorem Proving},
	abstract     = {We have built two state-of-the-art inductive theorem provers named HipSpec and Hipster. The main issue when automating proofs by induction is to discover essential helper lemmas. Our theorem provers use the technique theory exploration, which is a method to systematically discover interesting conclusions about a mathematical theory. We use the existing theory exploration system QuickSpec which conjectures properties for a program that seem to hold based on testing.  The idea is to try to prove these explored conjectures together with the user-stated goal conjecture. By using this idea and connecting it with our previous work on Hip, the Haskell Inductive Prover, we were able to take new leaps in field of inductive theorem proving.

Additionally, we have developed a benchmark suite named TIP, short for Tons of Inductive Problems, with benchmark problems for inductive theorem provers, and a tool box for converting and manipulating problems expressed in the TIP format. There were two main reasons to this initiative. Firstly, the inductive theorem proving field lacked a shared benchmark suite as well as a format.  Secondly, the benchmarks that have been used were outdated: all contemporary provers would solve almost every problem. We have so far added hundreds of new challenges to the TIP suite to encourage further research.
},
	author       = {Rosén, Dan},
	year         = {2016},
	publisher    = {Chalmers University of Technology},
	address      = {Göteborg},
}

@inProceedings{rosen-etal-2018-error-275363,
	title        = {Error Coding of Second-Language Learner Texts Based on Mostly Automatic Alignment of Parallel Corpora. },
	abstract     = {Error coding of second-language learner text, that is, detecting, correcting and annotating errors, is a cumbersome task which in turn requires interpretation of the text to decide what the errors are. This paper describes a system with which the annotator corrects the learner text by editing it prior to the actual error annotation. During the editing, the system automatically generates a parallel corpus of the learner and corrected texts. Based on this, the work of the annotator consists of three independent tasks that are otherwise often conflated: correcting the learner text, repairing inconsistent alignments, and performing the actual error annotation.},
	booktitle    = {Proceedings of CLARIN-2018 conference,  8-10 October 2018, Pisa, Italy},
	author       = {Rosén, Dan and Wirén, Mats  and Volodina, Elena},
	year         = {2018},
}

@inProceedings{rouces-etal-2019-tracking-281308,
	title        = {Tracking Attitudes Towards Immigration in Swedish Media},
	abstract     = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019. },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR Workshop Proceedings},
	address      = {Aachen },
}

@inProceedings{rouces-etal-2019-political-281307,
	title        = {Political Stance Analysis Using Swedish Parliamentary Data},
	abstract     = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.},
	booktitle    = {CEUR Workshop Proceedings (Vol. 2364).  Digital Humanities in the Nordic Countries 4th Conference, Copenhagen, Denmark, March 5-8, 2019.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2019},
	publisher    = {CEUR },
	address      = {Aachen },
}

@inProceedings{rouces-etal-2020-creating-290695,
	title        = {Creating an Annotated Corpus for Aspect-Based Sentiment Analysis in Swedish},
	abstract     = {Aspect-Based Sentiment Analysis constitutes a more fine-grained alternative to traditional sentiment analysis at sentence level. In addition to a sentiment value denoting how positive or negative a particular opinion or sentiment expression is, it identifies additional aspects or 'slots' that characterize the opinion. Some typical aspects are target and source, i.e. who holds the opinion and about which entity or aspect is the opinion. We present a large Swedish corpus annotated for Aspect-Based Sentiment Analysis. Each sentiment expression is annotated as a tuple that contains the following fields: one among 5 possible sentiment values, the target, the source, and whether the sentiment expressed is ironic.  In addition, the linguistic element that conveys the sentiment is identified too. Sentiment for a particular topic is also annotated at title, paragraph and document level.
The documents are articles obtained from two Swedish media (Svenska Dagbladet and Aftonbladet) and one online forum (Flashback), totalling around 4000 documents. The corpus is freely available and we plan to use it for training and testing an Aspect-Based Sentiment Analysis system.},
	booktitle    = {Proceedings of the 5th conference in Digital Humanities in the Nordic Countries, Riga, Latvia, October 21-23, 2020.},
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina},
	year         = {2020},
	publisher    = {CEUR Workshop Proceedings},
}

@inProceedings{rouces-etal-2018-defining-264721,
	title        = {Defining a gold standard for a Swedish sentiment lexicon: Towards higher-yield text mining in the digital humanities},
	abstract     = {There is an increasing demand for multilingual sentiment analysis, and most work on
sentiment lexicons is still carried out based on English lexicons like WordNet. In addition, many
of the non-English sentiment lexicons that do exist have been compiled by (machine) translation
from English resources, thereby arguably obscuring possible language-specific characteristics
of sentiment-loaded vocabulary. In this paper we describe the creation from scratch of a gold
standard for the sentiment annotation of Swedish terms as a first step towards the creation of a
full-fledged sentiment lexicon for Swedish.},
	booktitle    = {CEUR Workshop Proceedings vol. 2084.  Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018.  Edited by  Eetu Mäkelä Mikko Tolonen Jouni Tuominen },
	author       = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@inProceedings{rouces-etal-2018-generating-264719,
	title        = {Generating a Gold Standard for a Swedish Sentiment Lexicon},
	abstract     = {We create a gold standard for sentiment annotation of Swedish terms, using the freely available SALDO lexicon and the Gigaword
corpus. For this purpose, we employ a multi-stage approach combining corpus-based frequency sampling, direct score annotation and
Best-Worst Scaling. In addition to obtaining a gold standard, we analyze the data from our process and we draw conclusions about the
optimal sentiment model.},
	booktitle    = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, May 7-12, 2018, Miyazaki (Japan)},
	author       = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-00-9},
}

@inProceedings{rouces-etal-2018-sensaldo-264720,
	title        = {SenSALDO: Creating a Sentiment Lexicon for Swedish},
	abstract     = {The natural language processing subfield known as sentiment analysis or opinion mining has seen an explosive expansion over the
last decade or so, and sentiment analysis has become a standard item in the NLP toolbox. Still, many theoretical and methodological
questions remain unanswered and resource gaps unfilled. Most work on automated sentiment analysis has been done on English and
a few other languages; for most written languages of the world, this tool is not available. This paper describes the development of an
extensive sentiment lexicon for written (standard) Swedish. We investigate different methods for developing a sentiment lexicon for
Swedish. We use an existing gold standard dataset for training and testing. For each word sense from the SALDO Swedish lexicon,
we assign a real value sentiment score in the range [-1,1] and produce a sentiment label. We implement and evaluate three methods:
a graph-based method that iterates over the SALDO structure, a method based on random paths over the SALDO structure and a
corpus-driven method based on word embeddings. The resulting sense-disambiguated sentiment lexicon (SenSALDO) is an open source
resource and freely available from Språkbanken, The Swedish Language Bank at the University of Gothenburg.},
	booktitle    = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, 7-12 May 2018, Miyazaki (Japan)},
	author       = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Rødven-Eide, Stian },
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-00-9},
}

@techreport{roxendal-2013-state-189377,
	title        = {State Chart XML (SCXML): State Machine Notation for Control Abstraction – W3C Working Draft 6 December 2012},
	author       = {Roxendal, Johan},
	year         = {2013},
	publisher    = {MIT},
	address      = {Cambridge, USA},
}

@techreport{roxendal-2013-state-189376,
	title        = {State Chart XML (SCXML): State Machine Notation for Control Abstraction – W3C Last Call Working Draft 1 August 2013},
	author       = {Roxendal, Johan},
	year         = {2013},
	publisher    = {MIT},
	address      = {Cambridge, USA},
}

@article{sandberg-etal-2019-issue-285614,
	title        = {Issue Salience on Twitter During Swedish Party Leaders’ Debates },
	abstract     = {The objective of this study is to contribute knowledge about formation of political agendas on Twitter during mediated political events, using the party leaders’ debates in Sweden before the general election of 2014 as a case study. Our findings show that issues brought up during the debates were largely mirrored on Twitter, with one striking discrepancy. Contrary to our expectations, issues on the left-right policy dimension were more salient on Twitter than in the debates, whereas issues such as the environment, immigration and refugees, all tied to a liberal-authoritarian value axis, were less salient on Twitter.},
	journal      = {Nordicom Review},
	author       = {Sandberg, Linn and Bjereld, Ulf and Bunyik, Karina and Forsberg, Markus and Johansson, Richard},
	year         = {2019},
	volume       = {40},
	number       = {2},
	pages        = {49--61},
}

@article{sandfordpedersen-etal-2005-sprogteknologiske-35156,
	title        = {Sprogteknologiske ordbaser for nordiske sprog - rapport fra et forskning-netværk},
	journal      = {Nordiske  studiar i leksikografi },
	author       = {Sandford Pedersen, Bolette and Fjeld, Ruth V. and Toporowska Gronostaj, Maria},
	year         = {2005},
	volume       = {7},
}

@edited_book{saxena-borin-2006-lesser-33862,
	title        = {Lesser-known languages of South Asia. Status and policies, case studies and applications of information technology},
	editor       = {Saxena, Anju and Borin, Lars},
	year         = {2006},
	publisher    = {Mouton de Gruyter},
	address      = {Berlin},
	ISBN         = {3-11-018976-3},
}

@inProceedings{saxena-borin-2011-dialect-140689,
	title        = {Dialect Classification in the Himalayas: a Computational Approach},
	abstract     = {Linguistic fieldwork data – in the form of basic vocabulary lists – for nine closely related language varieties are compared using an automatic procedure with manual feedback, whose major advantage is its
complete consistency. The results of the vocabulary comparison turn out to be in accord with other linguistic features, making this methodology a promising addition to the toolbox of genetic lingusitics.},
	booktitle    = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)},
	author       = {Saxena, Anju and Borin, Lars},
	year         = {2011},
	volume       = {11},
	pages        = {307--310},
}

@incollection{saxena-borin-2013-carving-184759,
	title        = {Carving Tibeto-Kanauri by its joints: Using basic vocabulary lists for genetic grouping of languages},
	booktitle    = {Approaches to Measuring Linguistic Differences},
	author       = {Saxena, Anju and Borin, Lars},
	year         = {2013},
	publisher    = {De Gruyter Mouton},
	address      = {Berlin},
	ISBN         = {978-3-11-030525-8},
	pages        = {175--198},
}

@inProceedings{skadina-etal-2011-meta-148648,
	title        = {META-NORD: Towards sharing of language resources in Nordic and Baltic countries},
	abstract     = {This paper introduces the META-NORD project
which develops Nordic and Baltic part of
the European open language resource infrastructure.
META-NORD works on assembling,
linking across languages, and making
widely available the basic language resources
used by developers, professionals and researchers
to build specific products and applications.
The goals of the project, overall
approach and specific action lines on wordnets,
terminology resources and treebanks are
described. Moreover, results achieved in first
five months of the project, i.e. language
whitepapers, metadata specification and IPR
management, are presented.},
	booktitle    = {Proceedings of the Workshop on Language Resources, Technology and Services in the Sharing Paradigm},
	author       = {Skadina, Inguna and Vasiljevs, Andrejs and Borin, Lars and De Smedt, Koenraad and Lindén, Krister and Rögnvaldsson, Eiríkur},
	year         = {2011},
	pages        = {107--114},
}

@inProceedings{skadina-etal-2013-baltic-194532,
	title        = {Baltic and Nordic parts of the European linguistic infrastructure},
	booktitle    = {71. Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013)  22-24, May 2013 Oslo, Norway},
	author       = {Skadina, Inguna and Vasiljevs, Andrejs and Borin, Lars and Lindén, Krister and Losnegaard, Gyri and Pedersen, Bolette Sandford and Rozis, Roberts and De Smedt, Koenraad},
	year         = {2013},
	ISBN         = {978-91-7519-589-6},
	pages        = {195--211},
}

@inProceedings{skoldberg-etal-2013-between-186041,
	title        = {Between Grammars and Dictionaries: a Swedish Constructicon },
	abstract     = {This paper introduces the Swedish Constructicon (SweCxn), a database of Swedish constructions currently under development. We also present a small study of the treatment of constructions in Swedish (paper) dictionaries, thus illustrating the need for a constructionist approach, and discuss three different methods used to identify potential constructions for inclusion in the constructicon. SweCxn is a freely available electronic resource, with a particular focus on semi-general linguistic patterns of the type that are difficult to account for from a purely lexicographic or a purely grammatical perspective, and which therefore have tended to be neglected in both dictionaries and grammars. Far from being a small set of borderline cases, such constructions are both numerous and common. They are also quite problematic for second language acquisition as well as LT applications. Accordingly, various kinds of multi-word units have received more attention in recent years, not least from a lexicographic perspective. The coverage, however, is only partial, and the productivity of many constructions is hard to capture from a lexical viewpoint. To identify constructions for SweCxn, we use a combination of methods, such as working from existing construction descriptions for Swedish and other languages, applying LT tools to discover recurring patterns in texts, and extrapolating constructional information from dictionaries.   

},
	booktitle    = {Kosem, I., Kallas, J., Gantar, P., Krek, S., Langemets, M., Tuulik, M. (eds.) 2013. Electronic lexicography in the 21st century: thinking outside the paper. Proceedings of the eLex 2013 conference, 17-19 October 2013, Tallinn, Estonia. Ljubljana/Tallinn: Trojina, Institute for Applied Slovene Studies/Eesti Keele Instituut.},
	author       = {Sköldberg, Emma and Bäckström, Linnéa and Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Olsson, Leif-Jöran and Prentice, Julia and Rydstedt, Rudolf and Tingsell, Sofia and Uppström, Jonatan},
	year         = {2013},
	pages        = {310--327},
}

@inProceedings{skoldberg-toporowskagronostaj-2006-swedish-33612,
	title        = {Swedish Appellativized Forenames in Compunds - A Lexicographic Approach},
	booktitle    = {Proceedings XII Euralex International Congress. Torino, Italia, September 6-9, 2006},
	author       = {Sköldberg, Emma and Toporowska Gronostaj, Maria},
	year         = {2006},
	volume       = {2},
	ISBN         = {88-7694-918-6},
	pages        = {1193--1199},
}

@inProceedings{skoldberg-toporowskagronostaj-2008-from-73087,
	title        = {From Subdomains and Parameters to Collocational Patterns: On the Analysis of Swedish Medical Collocations},
	booktitle    = {Proceedings of the XIII EURALEX International Congress July 15 - 19 2008},
	author       = {Sköldberg, Emma and Toporowska Gronostaj, Maria},
	year         = {2008},
	ISBN         = {978-84-96742-67-3},
	pages        = {1421--1432},
}

@inProceedings{skoldberg-toporowskagronostaj-2008-modell-91906,
	title        = {Modell för beskrivning av kollokationer i ett medicinskt lexikon (MedLex)},
	booktitle    = {Nordiske studier i leksikografi 9. Rapport fra konference om lexikografi i Norden, Akureyri 22.-26. maj 2007},
	author       = {Sköldberg, Emma and Toporowska Gronostaj, Maria},
	year         = {2008},
	ISBN         = {978-9979-654-05-6},
	pages        = {433--445},
}

@article{skoldberg-toporowskagronostaj-2009-charmknutte-101453,
	title        = {Charmknutte, viktigpetter och kladdmaja. Substantiverade förnamn i sammansättningar ur ett lexikografiskt perspektiv},
	journal      = {Studia anthroponymica Scandinavica},
	author       = {Sköldberg, Emma and Toporowska Gronostaj, Maria},
	year         = {2009},
	volume       = {27},
	pages        = {73--96},
}

@inProceedings{smith-etal-2011-developing-152723,
	title        = {Developing a toolkit for written information materials for patients with colorectal cancer undergoing elective surgery},
	abstract     = {This study examines language complexity, readability and suitability, of written health information materials given to patients undergoing colorectal cancer (CRC) surgery. The overall aim is to investigate whether the implementation of adapted, person-centred information and communication for patients with CRC undergoing elective surgery, can enhance the patients’ self-care beliefs and well-being during recovery in the phase following diagnosis and initial treatment. Several explorative, qualitative studies are planned and will function both as a basis for the proposed interventions and provide explanations for the actual processes leading to the desired outcomes. Patients’ knowledge enablement will be reached by several interrelated intervention strategies and specific activities. One of these strategies deals with means to facilitate patients’ information seeking patterns and the goal is to provide patients with written information materials according to preferences for complex and detailed or legible texts. Thus, the interventions planed aim to enhance a movement from receiving information and instructions to participating in knowing. Written and printed patient information material from 28 Swedish clinics for patients diagnosed with CRC undergoing elective surgery were selected for analysis by means of standard metrics and more elaborate language technology techniques. Various text parameters such as lexical variation, frequency bands and the use of terminology were examined. The material was also analysed using a Suitability Assessment Instrument in order to examine content, literacy demand, graphic illustrations, layout and typography, learning stimulation and finally cultural appropriateness. In addition, five focusgroups were conducted where patients were asked to give their experiences of using written information. Results from the language technology analysis showed a variety in materials, where it could be divided in to easy, medium and difficult to read and comprehend. Patients in focusgroups told they would like written materials to be levelled in order to gain information stepwise, but also stressed the importance of information given both orally and in writing, and that they must correspond. Using the SAM-instrument was a good complement for deeper understanding, and taking all three analyses in account, we aim to design a balanced toolkit for how to best design written information materials where a person tailored approach can be offered.
},
	booktitle    = {Svenska Läkaresällskapets Riksstämman},
	author       = {Smith, Frida and Carlsson, Eva and Friberg, Febe and Kokkinakis, Dimitrios and Forsberg, Markus and Öhrn, Matilda and Öhlén, Joakim},
	year         = {2011},
}

@article{smith-etal-2014-readability-188146,
	title        = {Readability, suitability and comprehensibility in patient education materials for Swedish patients with colorectal cancer undergoing elective surgery: A mixed method design.},
	abstract     = {To characterize education materials provided to patients undergoing colorectal cancer surgery to gain a better understanding of how to design readable, suitable, comprehensible materials.},
	journal      = {Patient education and counseling},
	author       = {Smith, Frida and Carlsson, Eva and Kokkinakis, Dimitrios and Forsberg, Markus and Kodeda, Karl and Sawatzky, Richard and Friberg, Febe and Öhlén, Joakim},
	year         = {2014},
	volume       = {94},
	number       = {2},
	pages        = {202–209},
}

@inProceedings{smith-etal-2012-forbattra-170895,
	title        = {Hur kan vi förbättra skriftligt informations- och utbildningsmaterial för patienter som opereras elektivt för kolorektal cancer?},
	abstract     = {Kolorektal cancer (KRC) är den tredje största cancerdiagnosen i Sverige med drygt 5500 drabbade årligen. Primär behandling är kirurgi kompletterad av pre- och postoperativ onkologisk behandling. Standardiserade koncept för accelererat vårdförlopp med kortare vårdtider lägger mycket fokus på fysisk rehabilitering, men mindre på den psykiska påfrestning det innebär att bli opererad för en cancerdiagnos. Patienter förväntas ta stort ansvar för sin rehabilitering, både på sjukhuset och hemma. För att vara förberedd behövs både skriftlig och muntlig information.
Syftet med studien var att kartlägga och karaktärisera det skriftliga informations- och utbildningsmaterial (IOU) som används till patienter som opereras elektivt för KRC. Vidare var syftet att beskriva patienters uppfattning om struktur och innehåll på IOU.
IOU från 28 kliniker som opererar patienter med KRC samlades in (totalt 220 st). För att kunna ge ett mått på texternas svårighetsgrad gjordes språkteknologisk analys på samtliga IOU, där bl.a. ordlängd, meningsbyggnad och jämförelse med annan typ av litteratur mättes På 117 st gjordes en suitabilityanalys med instrumentet SAM+CAM där domän som innehåll, läsbarhet, bilder, layout samt stimulans och motivation för lärande bedömdes. Fem fokusgrupper med patienter genomfördes där patienterna uppmanades att berätta om vad de tycker utmärker ett bra respektive dåligt IOU, vad de saknar i innehåll och när och på vilket sätt de vill ha materialet utlämnat.
Resultatet av språkteknologiska- och suitabilityanalysen visar att de flesta IOU bedömdes som ”adequate”, men spridningen var stor. Patienterna hade önskemål om mer nivåuppdelat/nivåriktat material, där man själv kan välja hur mycket information man vill ha vid ett visst tillfälle. Flera ämnen saknades, eller var för otydligt beskrivna för att patienterna skulle känna sig trygga vid hemgång. 
Resultatet av de tre analysmetoderna bör kunna användas för att utveckla en ”verktygslåda” för att i framtiden kunna utforma bättre riktat IOU för patientgruppen.
},
	booktitle    = {Nationella konferensen i Cancervård, 24-25 maj 2012, Stockholm},
	author       = {Smith, Frida and Öhlén, Joakim and Carlsson, Eva and Forsberg, Markus and Kokkinakis, Dimitrios and Friberg, Febe},
	year         = {2012},
}

@article{smith-etal-2012-studie-170897,
	title        = {Ny studie visar hur information till patienter med kolorektal cancer kan förbättras},
	abstract     = {Skriftligt informationsmaterial är ofta skrivet på för hög nivå och ställer höga krav på den tänkta läsaren (patienten).  Förutom läsbarhet finns det fler faktorer att utvärdera för att se om materialet är lämpligt. Innehåll, struktur, layout och typsnitt, illustrationer och lärande och motivation är sådant som bör tas hänsyn till. Ett lämpligare, bättre anpassat material kan hjälpa personer med sjukdom att ställa bättre frågor när de har samtal med vårdpersonal och det kan göra personen mindre osäker och orolig för det okända som väntar. En ny studie som ingår i forskningsprojektet PINCORE (personcentred information and communication in colorectal cancer care) syftar till att förbättra information och kommunikation vid kolorektal cancer.},
	journal      = {Cancervården},
	author       = {Smith, Frida and Öhlén, Joakim and Carlsson, Eva and Friberg, Febe and Forsberg, Markus and Kokkinakis, Dimitrios},
	year         = {2012},
	number       = {5},
	pages        = {18--21},
}

@inProceedings{sundqvist-etal-2012-acoustic-162452,
	title        = {Acoustic and perceptual characteristics of speech in 22q11 deletion syndrome: Measures of voice onset time and syllable durations related to articulation and prosody.},
	abstract     = {Without abstract},
	booktitle    = {Proceedings of ICPLA2012},
	author       = {Sundqvist, Maria and Lindh, Jonas and Hartelius, Lena and Persson, Christina},
	year         = {2012},
	volume       = {14},
}

@article{sundqvist-etal-2016-syllable-227628,
	title        = {Syllable Repetition vs. Finger Tapping: Aspects of Motor Timing in 100 Healthy Adults.},
	abstract     = {In this study we systematically compared syllable repetition and finger tapping in healthy adults, and explored possible impacts of tempi, metronome, musical experience, and age on motor timing ability. One hundred healthy adults used finger-tapping and syllable repetition to perform an isochronous pulse in three different tempi, with and without a metronome. Results showed that the motor timing was more accurate with finger tapping than with syllable repetition in the slowest tempo, and the motor timing ability was better with the metronome than without. Persons with musical experience showed better motor timing accuracy than persons without such experience, and the timing asynchrony increased with increasing age. The slowest tempo 90 bpm posed extra challenges to the participants. We speculate that this pattern reflects the fact that the slow tempo lies outside the 3-8 Hz syllable rate of natural speech, which in turn has been linked to theta-based oscillations in the brain.},
	journal      = {Motor control},
	author       = {Sundqvist, Maria and Åsberg Johnels, Jakob and Lindh, Jonas and Laakso, Katja and Hartelius, Lena},
	year         = {2016},
	volume       = {20},
	number       = {3},
	pages        = {233--54},
}

@inProceedings{tahmasebi-2018-study-264722,
	title        = {A Study on Word2Vec on a Historical Swedish Newspaper Corpus},
	abstract     = {Detecting word sense changes can be of great interest in
the field of digital humanities. Thus far, most investigations and automatic methods have been developed and carried out on English text and
most recent methods make use of word embeddings. This paper presents
a study on using Word2Vec, a neural word embedding method, on a
Swedish historical newspaper collection. Our study includes a set of 11
words and our focus is the quality and stability of the word vectors over
time. We investigate if a word embedding method like Word2Vec can be
effectively used on texts where the volume and quality is limited.},
	booktitle    = {CEUR Workshop Proceedings. Vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference, Helsinki Finland, March 7-9, 2018. Edited by  Eetu Mäkelä, Mikko Tolonen, Jouni Tuominen },
	author       = {Tahmasebi, Nina},
	year         = {2018},
	publisher    = {University of Helsinki, Faculty of Arts},
	address      = {Helsinki},
}

@article{tahmasebi-etal-2015-visions-212969,
	title        = {Visions and open challenges for a knowledge-based culturomics},
	abstract     = {The concept of culturomics was born out of the availability of massive amounts of textual data and the interest to make sense of cultural and language phenomena over time. Thus far however, culturomics has only made use of, and shown the great potential of, statistical methods. In this paper, we present a vision for a knowledge-based culturomics that complements traditional culturomics. We discuss the possibilities and challenges of combining knowledge-based methods with statistical methods and address major challenges that arise due to the nature of the data; diversity of sources, changes in language over time as well as temporal dynamics of information in general. We address all layers needed for knowledge-based culturomics, from natural language processing and relations to summaries and opinions.},
	journal      = {International Journal on Digital Libraries},
	author       = {Tahmasebi, Nina and Borin, Lars and Capannini, Gabriele and Dubhashi, Devdatt and Exner, Peter and Forsberg, Markus and Gossen, Gerhard and Johansson, Fredrik and Johansson, Richard and Kågebäck, Mikael and Mogren, Olof and Nugues, Pierre and Risse, Thomas},
	year         = {2015},
	volume       = {15},
	number       = {2-4},
	pages        = {169--187},
}

@misc{tahmasebi-etal-2019-proceedings-285886,
	title        = {Proceedings of the 1st International Workshop on Computational Approaches to Historical Language Change, August 2, 2019, Florence, Italy},
	author       = {Tahmasebi, Nina and Borin, Lars and Jatowt, Adam  and Xu, Yang},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA},
	ISBN         = {978-1-950737-31-4},
}

@inProceedings{tahmasebi-etal-2016-clarin-233899,
	title        = {SWE-CLARIN – the Swedish CLARIN project – aims and activities},
	booktitle    = {Digital Humanities in the Nordic countries, Oslo, March 15-17 2016},
	author       = {Tahmasebi, Nina and Borin, Lars and Jordan, Caspar and Ekman, Stefan},
	year         = {2016},
	pages        = {122--123},
}

@inProceedings{tahmasebi-etal-2019-convergence-280684,
	title        = {A Convergence of Methodologies: Notes on Data-Intensive Humanities Research},
	abstract     = {In this paper, we discuss a data-intensive research methodology for the digital humanities. We highlight the differences and commonalities between quantitative and qualitative research methodologies in  relation  to  a  data-intensive  research  process.  We  argue  that  issues of  representativeness  and  reduction  must  be  in  focus  for  all  phases  of the process; from the status of texts as such, over their digitization topre-processing and methodological exploration.},
	booktitle    = {CEUR workshop proceedings ; 2364. Proceedings of the 4th Conference on Digital Humanities in the Nordic Countries, Copenhagen, Denmark, March 5-8, 2019},
	editor       = {Costanza Navarretta and Manex Agirrezabal and Bente Maegaard},
	author       = {Tahmasebi, Nina and Hagen, Niclas and Brodén, Daniel and Malm, Mats},
	year         = {2019},
	publisher    = {CEUR workshop proceedings},
	address      = {Aachen },
}

@article{tahmasebi-hengchen-2019-strengths-291189,
	title        = {The Strengths and Pitfalls of Large-Scale Text Mining for Literary Studies},
	abstract     = {This paper is an overview of the opportunities and challenges of using large-scale text mining to answer research questions that stem from the humanities in general and literature specifically.  In  this  paper,  we  will  discuss  a  data-intensive  research  methodology  and  how  different  views of digital text affect answers to research questions. We will discuss results derived from text mining, how these results can be evaluated, and their relation to hypotheses and research questions. Finally, we will discuss some pitfalls of computational literary analysis and give some pointers as to how these can be avoided.},
	journal      = {Samlaren : tidskrift för svensk litteraturvetenskaplig forskning},
	author       = {Tahmasebi, Nina and Hengchen, Simon},
	year         = {2019},
	volume       = {140},
	pages        = {198–227},
}

@inProceedings{tahmasebi-risse-2017-uses-256649,
	title        = {On the Uses of Word Sense Change for Research in the Digital Humanities},
	abstract     = {With advances in technology and culture, our language changes. We invent new words, add or change meanings of existing words and change names of existing things. Unfortunately, our language does not carry a memory; words, expressions and meanings used in the past are forgotten over time. When searching and interpreting content from archives, language changes pose a great challenge. In this paper, we present results of automatic word sense change detection and show the utility for archive users as well as digital humanities’ research. Our method is able to capture changes that relate to the usage and culture of a word that cannot easily be found using dictionaries or other resources.},
	booktitle    = {Research and Advanced Technology for Digital Libraries - 21st International  Conference on Theory and Practice of Digital Libraries, TPDL 2017, Thessaloniki, Greece, September 18-21, 2017. Proceedings},
	editor       = {Jaap Kamps and Giannis Tsakonas and Yannis Manolopoulos and Lazaros Iliadis and Ioannis Karydis},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2017},
	publisher    = {Springer Verlag},
	address      = {Cham},
	ISBN         = {978-3-319-67007-2},
}

@inProceedings{tahmasebi-risse-2017-finding-256637,
	title        = {Finding Individual Word Sense Changes and their Delay in Appearance},
	abstract     = {We  present  a  method  for  detecting  word sense  changes  by  utilizing  automatically
induced word senses.  Our method works on  the  level  of  individual  senses  and  allows a word to have  e.g. one stable sense and then add a novel sense that later experiences  change.
Senses  are  grouped based on polysemy to find linguistic concepts and we can find broadening and narrowing as well as novel (polysemous and homonymic)  senses. We  evaluate  on  a testset, present recall and estimates of the time between expected and found change.},
	booktitle    = {Proceedings of Recent Advances in Natural Language Processing 2017. Varna, Bulgaria 2–8 September, 2017},
	editor       = {Galia Angelova and Kalina Bontcheva and Ruslan Mitkov and Ivelina Nikolova and Irina Temnikova},
	author       = {Tahmasebi, Nina and Risse, Thomas},
	year         = {2017},
	ISBN         = {978-954-452-048-9},
}

@inProceedings{theiler-bouma-2012-price-172733,
	title        = {Two for the price of one: an LFG treatment of sentence initial object es in German.},
	abstract     = {    We present an analysis of sentence initial object es ‘it’ in German. The
weak pronoun es may only realize such an object under specific information
structural conditions. We follow recent work suggesting these conditions are
exactly those that licence the use of the presentational construction, marked
by a sentence initial dummy es. We propose that the initial objects are an
example of function amalgamation, show that only objects that may also
appear in the clause-internal postverbal domain can participate in this fusion
and make this precise in LFG. We end the paper with a contrastive discussion.
},
	booktitle    = {Proceedings of LFG'12. Miriam Butt and Tracy Holloway King (Eds.)},
	author       = {Theiler, Nadine and Bouma, Gerlof},
	year         = {2012},
	pages        = {603--623},
}

@article{themistocleous-etal-2018-identification-273026,
	title        = {Identification of Mild Cognitive Impairment From Speech in Swedish Using Deep Sequential Neural Networks},
	abstract     = {While people with mild cognitive impairment (MCI) portray noticeably incipient memory difficulty in remembering events and situations along with problems in decision making, planning, and finding their way in familiar environments, detailed neuropsychological assessments also indicate deficits in language performance. To this day, there is no cure for dementia but early-stage treatment can delay the progression of MCI; thus, the development of valid tools for identifying early cognitive changes is of great importance. In this study, we provide an automated machine learning method, using Deep Neural Network Architectures, that aims to identify MCI. Speech materials were obtained using a reading task during evaluation sessions, as part of the Gothenburg MCI research study. Measures of vowel duration, vowel formants (F1 to F5), and fundamental frequency were calculated from speech signals. To learn the acoustic characteristics associated with MCI vs. healthy controls, we have trained and evaluated ten Deep Neural Network Architectures and measured how accurately they can diagnose participants that are unknown to the model. We evaluated the models using two evaluation tasks: a 5-fold crossvalidation and by splitting the data into 90% training and 10% evaluation set. The findings suggest first, that the acoustic features provide significant information for the identification of MCI; second, the best Deep Neural Network Architectures can classify MCI and healthy controls with high classification accuracy (M = 83%); and third, the model has the potential to offer higher accuracy than 84% if trained with more data (cf., SD≈15%). The Deep Neural Network Architecture proposed here constitutes a method that contributes to the early diagnosis of cognitive decline, quantify the progression of the condition, and enable suitable therapeutics.},
	journal      = {Frontiers in Neurology},
	author       = {Themistocleous, Charalambos and Eckerström, Marie and Kokkinakis, Dimitrios},
	year         = {2018},
	volume       = {9},
	pages        = {1--10},
}

@inProceedings{themistocleous-kokkinakis-2018-themis-265112,
	title        = {THEMIS-SV: Automatic classification of language disorders from speech signals},
	abstract     = {Background and Aims:
Brain injuries resulting from stroke can affect the production of speech resulting in different types of language impairments, such as aphasia.
Studying these productions manually is an extremely cumbersome and time consuming process. 
The aim of this paper is to present THEMIS-SV: a system that enables the automatic transcription of speech signals and the segmentation of vowels and consonants in Swedish.

Method:
The input of the system are recordings of speech. The system processes the recordings and returns an output with three tiers: the utterance tier, the word tier, and the vowels/consonants tier. 

Results:
The output of the system is a fast and reliable transcription and segmentation of speech, which is very close to transcriptions and segmentations performed manually. 
The automatic segmentation of speech enables targeted acoustic measurements, such as measurements of consonant spectra, formant frequencies of vowels, fundamental frequency, pauses, speech rate, etc. and other acoustic measurements that have been known to differentiate between the different types of language disorders.

Conclusion:
The method proposed here can be employed for the analysis of speech of individuals with post-stroke aphasia and other language disorders and constitutes a promising step towards a fully automated differential diagnostic tool for language disorders. },
	booktitle    = {Abstracts of the 4th European Stroke Organisation Conference ​(ESOC 2018). Gothenburg, Sweden, 16-18 May, 2018. },
	author       = {Themistocleous, Charalambos and Kokkinakis, Dimitrios},
	year         = {2018},
}

@inProceedings{themistocleous-kokkinakis-2019-speech-289021,
	title        = {Speech and Mild Cognitive Impairment detection},
	abstract     = {It is of great importance to detect objective markers that can enable the early and fast identification of individuals with Mild Cognitive Impairment (MCI) from healthy individuals to inform, patient care, family and treatment planning. Connected speech productions can offer such markers. This study analyses recordings from picture description tasks by Swedish individuals with MCI and healthy control individuals (HC) and shows that voice quality, periodicity, and speech rate distinguish individuals with MCI from HC. 
},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Themistocleous, Charalambos and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = { ExLing Society},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{themistocleous-etal-2018-effects-270215,
	title        = {Effects of Mild Cognitive Impairment on vowel duration
},
	abstract     = {Mild cognitive impairment (MCI) is a neurological condition, which is characterized
by a noticeable decline of cognitive abilities, including communicative and linguistic
skills. In this study, we have measured the duration of vowels produced in a reading
task by 55 speakers— 30 healthy controls and 25 MCI—. The main results showed
that MCI speakers differed significantly from HC in vowel duration as MCI speakers
produced overall longer vowels. Also, we found that gender effects on vowel duration
were different in MCI and HC. One significant aspect of this finding is that they highlight
the contribution of vowel acoustic features as markers of MCI.},
	booktitle    = {Proceedings of the 9th Tutorial & Research Workshop on Experimental Linguistics, 28 - 30 August 2018, Paris, France},
	editor       = {Antonis Botinis},
	author       = {Themistocleous, Charalambos and Kokkinakis, Dimitrios and Eckerström, Marie and Fraser, Kathleen and Lundholm Fors, Kristina},
	year         = {2018},
	ISBN         = {978-960-466-162-6 },
}

@misc{tidemann-tahmasebi-2017-proceedings-264302,
	title        = {Proceedings of the 21st Nordic Conference on Computational Linguistics, NODALIDA 2017, Gothenburg, Sweden, May 22-24, 2017
},
	author       = {Tidemann, Jörg and Tahmasebi, Nina},
	year         = {2017},
	publisher    = {Association for Computational Linguistics},
	ISBN         = {978-91-7685-601-7},
}

@article{toporowskagronostaj-2005-elektroniska-34035,
	title        = {Elektroniska ordböcker i Sverige: nutid och framtid},
	abstract     = {The article Swedish electronic dictionaries: the present and the future aims at a survey of the state of art concerning electronic dictionaries in Sweden. The focus is on CD-ROM dictionaries, mainly defining ones, and their functionalities of technical and conceptual character. It is assumed here that the content, structure and functionalities of the electronic dictionaries of today can make a relevant contribution to designing the truly electronic dictionaries of the future. Some proposals for extending the content of the next generation dictionaries are suggested. The question of using a more sophisticated structural and functional access to the information is also discussed.},
	journal      = {LexicoNordica},
	author       = {Toporowska Gronostaj, Maria},
	year         = {2005},
	volume       = {12-2005},
	pages        = {87--107},
}

@article{toporowskagronostaj-skoldberg-2008-betydelseindikatorer-70495,
	title        = {Betydelseindikatorer och tematiska slingor. Om jakten på rätt betydelse i framtida svenska digitala ordböcker},
	journal      = {Nog ordat? Festskrift till Sven-Göran Malmgren},
	author       = {Toporowska Gronostaj, Maria and Sköldberg, Emma},
	year         = {2008},
	pages        = {364--372},
}

@incollection{toporowskagronostaj-skoldberg-2010-swedish-121119,
	title        = {Swedish Medical Collocations: A Lexicographic Approach},
	booktitle    = {Korpora, Web und Datenbanken. Computergestützte Methoden in der modernen Phraseologie und Lexikographie  (Phraseologie und Parömiologie 25)},
	author       = {Toporowska Gronostaj, Maria and Sköldberg, Emma},
	year         = {2010},
	publisher    = {Schneider Verlag Hohengehren GmbH},
	address      = {Baltmannsweiler, Germany},
	ISBN         = {978-3-8340-0733-9},
	pages        = {181--195},
}

@misc{torrent-etal-2018-proceedings-267405,
	title        = {Proceedings of the LREC 2018 Workshop International FrameNet Workshop 2018: Multilingual Framenets and Constructicons. 12 May 2018 – Miyazaki, Japan},
	abstract     = {The International FrameNet Workshop 2018 brought together researchers in Frame Semantics and Construction Grammar, two areas which have traditionally been interrelated, but which have been developing somewhat independently in recent years. It is also addressed at language technology researchers working with language resources based on Frame Semantics or Construction Grammar. The workshop follows on from similar joint meetings in Berkeley, California in 2013 (IFNW 2013, sponsored by the Swedish FrameNet group) and in Juiz de Fora, Brazil in 2016 (IFNW 2016, sponsored by FrameNet Brasil), and will cover the rapidly unfolding developments in both areas and recent research on their interconnections.},
	author       = {Torrent, Tiago Timponi and Borin, Lars and Baker, Collin},
	year         = {2018},
	publisher    = {ELRA},
	address      = {Miyazaki},
	ISBN         = {979-10-95546-04-7},
}

@inProceedings{vancoppenolle-etal-2011-german-154315,
	title        = {A German Grammar for Generation in OpenCCG},
	abstract     = {We present a freely available CCG fragment for German that is being developed for natural language generation tasks in the domain of share price statistics. It is implemented in OpenCCG, an open source Java implementation of the compuationally attractive CCG formalism. Since generation requires lexical categories to have semantic representations, so that posssible realizations can be produced, the underlying grammar needs to define semantics. Hybrid Logic Dependency Semantics, a logic calculus especially suited for encodings linguistic meaning, is used to declare the semantics layer. To our knowledge, related work on German CCG development has not yet focused on the semantics layer. In terms of syntax, we concentrate on aspects of German as a partially free constituent order language. Special attention is payed to scrambling, where we employ CCG's type-changing mechanism in a manner athat is somewhat unusual, but allows us to a) minimize the amount of syntactic categories that are needed to model scrambling, compared to providing categories for all possible argument orders, and b) retain enough control to impose restrictions on scrambling.},
	booktitle    = {H. Hedeland, T. Schmidt, K. Wörner (eds.): Multilingual Resources and Multilingual Applications. Proc. of the Conference of the German Society for Computational Linguistics and Language Technology (GSCL), Hamburg, 2011. Working Papers in Multilingualism, Series B },
	author       = {Vancoppenolle, Jean and Tabbert, Eric and Bouma, Gerlof and Stede, Manfred},
	year         = {2011},
	number       = {96},
	pages        = {145--150},
}

@inProceedings{vasiljevs-etal-2012-creation-156083,
	title        = {Creation of an Open Shared Language Resource Repository in the Nordic and Baltic Countries},
	abstract     = {The META-NORD project has contributed to an open infrastructure for language resources (data and tools) under the META-NET umbrella. This paper presents the key objectives of META-NORD and reports on the results achieved in the first year of the project. META-NORD has mapped and described the national language technology landscape in the Nordic and Baltic countries in terms of language use, language technology and resources, main actors in the academy, industry, government and society; identified and collected the first batch of language resources in the Nordic and Baltic countries; documented, processed, linked, and upgraded the identified language resources to agreed standards and guidelines. The three horizontal multilingual actions in META-NORD are overviewed in this paper: linking and validating Nordic and Baltic wordnets, the harmonisation of multilingual Nordic and Baltic treebanks, and consolidating multilingual terminology resources across European countries. This paper also touches upon intellectual property rights for the sharing of language resources.
},
	booktitle    = {Proceedings of LREC 2012},
	author       = {Vasiļjevs, Andrejs and Forsberg, Markus and Gornostay, Tatiana and Hansen, Dorte H. and Jóhannsdóttir, Kristín M. and Lindén, Krister and Lyse, Gunn I. and Offersgaard, Lene and Oksanen, Ville and Olsen, Sussi and Pedersen, Bolette S. and Rögnvaldsson, Eiríkur and Rozis, Roberts and Skadiņa, Inguna and Smedt, Koenraad De},
	year         = {2012},
	ISBN         = {978-2-9517408-7-7},
}

@inProceedings{vasljevs-etal-2011-meta-140690,
	title        = {META-NORD: Baltic and Nordic Branch of the European Open Linguistic Infrastructure},
	booktitle    = {Proceedings of the Nodalida 2011 Workshop on visibilty and availability of LT resources},
	author       = {Vasljevs, Andrejs and Pedersen, Bolette Sandford and De Smedt, Koenraad and Borin, Lars and Skadina, Inguna},
	year         = {2011},
}

@inProceedings{viklund-borin-2016-data-236738,
	title        = {How can big data help us study rhetorical history?},
	abstract     = {Rhetorical history is traditionally studied through rhetorical treatises or selected rhetorical practices, for example the speeches of major orators. Although valuable sources, these do not give us the answers to all our questions. Indeed, focus on a few canonical works or the major historical key figures might even lead us to reproduce cultural self-identifications and false generalizations. However, thanks to increasing availability of relevant digitized texts, we are now at a point where it is possible to see how new research questions can be formulated – and how old research questions can be addressed from a new angle or established results verified – on the basis of exhaustive collections of data, rather than small samples, but where a methodology has not yet established itself. The aim of this paper is twofold: (1) We wish to demonstrate the usefulness of large-scale corpus studies (“text mining”) in the field of rhetorical history, and hopefully point to some interesting research problems and how they can be analyzed using “big-data” methods. (2) In doing this, we also aim to make a contribution to method development in e-science for the humanities and social sciences, and in particular in the framework of CLARIN. },
	booktitle    = {Linköping Electronic Conference Proceedings, No. 123. Edited by Koenraad De Smedt. Selected Papers from the CLARIN Annual Conference 2015. October 14–16, 2015, Wroclaw, Poland},
	author       = {Viklund, Jon and Borin, Lars},
	year         = {2016},
	volume       = {123},
	ISBN         = {978-91-7685-765-6},
	pages        = {79--93},
}

@inProceedings{virk-etal-2019-exploiting-290903,
	title        = {Exploiting frame semantics and frame-semantic parsing for automatic extraction of typological information from descriptive grammars of natural languages},
	abstract     = {We describe a novel system for automatic extraction of typological linguistic information from descriptive grammars of natural languages, applying the theory of frame semantics in the form of frame-semantic parsing. The current proof-of-concept system covers a few selected linguistic features, but the methodology is general and can be extended not only to other typological features but also to descriptive grammars written in languages other than English. Such a system is expected to be a useful assistance for automatic curation of typological databases which otherwise are built manually, a very labor and time consuming as well as cognitively taxing enterprise.},
	booktitle    = {12th International Conference on Recent Advances in Natural Language Processing, RANLP 2019, Varna, Bulgaria, 2-4 September 2019},
	author       = {Virk, Shafqat and  Muhammad,  Azam Sheikh and Borin, Lars and Aslam, Muhammad Irfan and Iqbal, Saania and Khurram, Nazia},
	year         = {2019},
	publisher    = {INCOMA Ltd.},
	address      = {Shoumen, Bulgaria},
	ISBN         = {978-954-452-055-7},
}

@inProceedings{virk-etal-2017-automatic-261789,
	title        = {Automatic extraction of typological linguistic features from descriptive grammars},
	abstract     = {The present paper describes experiments on automatically extracting typological linguistic features of natural languages from traditional written descriptive grammars. The feature-extraction task has high potential value in typological, genealogical, historical, and other related areas of linguistics that make use of databases of structural features of languages. Until now, extraction of such features from grammars has been done manually, which is highly time and labor consuming and becomes prohibitive when extended to the thousands of languages for which linguistic descriptions are available. The system we describe here starts from semantically parsed text over which a set of rules are applied in order to extract feature values. We evaluate the system’s performance on the manually curated Grambank database as the gold standard and report the first measures of precision and recall for this problem.},
	booktitle    = {Text, Speech, and Dialogue 20th International Conference, TSD 2017, Prague, Czech Republic, August 27-31, 2017, Proceedings},
	editor       = {Kamil Ekštein and Václav Matoušek.},
	author       = {Virk, Shafqat and Borin, Lars and Saxena, Anju and Hammarström, Harald},
	year         = {2017},
	publisher    = {Springer International Publishing},
	address      = {Cham},
	ISBN         = {978-3-319-64205-5},
}

@article{vivekanand-etal-2014-identification-201958,
	title        = {Identification and qualitative characterization of high and low lignin lines from an oat TILLING population},
	abstract     = {To identify differences in seed lignin content, 520 randomly chosen independent lines were screened in mutagenized oat population and lines with the seed lignin levels ranging from 20 to 63 gkg(-1) were identified. In the commercial variety Belinda, from which the mutated population was developed, seed lignin level was determined to be 41 g kg(-1). In Assiniboia, a Canadian low lignin variety, it was found to be 21 g kg(-1). To investigate if extracted lignin from the mutated lines were also qualitatively different from Belinda, two lines with the lowest and highest lignin levels were selected for structural analyses using XRD, UV and FT-IR spectroscopy. Results showed that there were significant qualitative differences in seed lignin in the mutated lines and in Belinda, and based on this, we predict that lignin from the mutated seeds will be more digestible in ruminant animals than Belinda seeds. This prediction was confirmed in preliminary in vitro digestion experiments.},
	journal      = {Industrial crops and products},
	author       = {Vivekanand, Vivekanand and Chawade, Aakash and Larsson, Mikael and Larsson, Anette and Olsson, Olof},
	year         = {2014},
	volume       = {59},
	pages        = {1--8},
}

@inProceedings{volodina-borin-2012-developing-168523,
	title        = {Developing an Open-Source Web-Based Exercise Generator for Swedish},
	abstract     = {This paper reports on the ongoing international project System architecture for
ICALL and the progress made by the Swedish partner. The Swedish team is developing a
web-based exercise generator reusing available annotated corpora and lexical resources.
Apart from the technical issues like implementation of the user interface and the
underlying processing machinery, a number of interesting pedagogical questions need
to be solved, e.g., adapting learner-oriented exercises to proficiency levels; selecting authentic examples of an appropriate difficulty level; automatically ranking corpus examples by their quality; providing feedback to the learner, and selecting vocabulary for training domain-specific, academic or general-purpose vocabulary. In this paper we describe what has been done so far, mention the exercise types that can be generated at
the moment as well as describe the tasks left for the future.
},
	booktitle    = {CALL: Using, Learning, Knowing. EuroCALL Conference, Gothenburg, Sweden, 22-25 August 2012, Proceedings. Eds. Linda Bradley and Sylvie Thouësny. Research-publishing.net, Dublin, Ireland},
	author       = {Volodina, Elena and Borin, Lars},
	year         = {2012},
	volume       = {2012},
	ISBN         = {978-1-908416-03-2},
}

@edited_book{volodina-etal-2013-proceedings-188675,
	title        = {Proceedings of the second workshop on NLP for computer-assisted language learning at NODALIDA 2013 May 22-24, 2013, Oslo, Norway},
	editor       = {Volodina, Elena and Borin, Lars and Loftsson, Hrafn},
	year         = {2013},
	publisher    = {Linköping University Press},
	address      = {Linköping, Sweden},
	ISBN         = {978-91-7519-588-9},
}

@inProceedings{volodina-etal-2012-waste-165936,
	title        = {Waste not, want not: Towards a system architecture for ICALL based on NLP component re-use},
	booktitle    = {Proceedings of the SLTC 2012 workshop on NLP for CALL, Lund, 25th October, 2012},
	author       = {Volodina, Elena and Borin, Lars and Loftsson, Hrafn and Arnbjörnsdóttir, Birna and Leifsson, Guðmundur Örn},
	year         = {2012},
	pages        = {47--58},
}

@edited_book{volodina-etal-2014-proceedings-206135,
	title        = {Proceedings of the third workshop on NLP for computer-assisted language learning at SLTC 2014, Uppsala University},
	abstract     = {The workshop series on NLP for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The papers in the proceedings volume from the third NLP4CALL workshop cover three main topic areas: resources for development of ICALL applications (e.g., learner corpora and coursebook corpora), tools and algorithms for the analysis of learner language (e.g., focusing on collocations, reading tasks, cloze items, pronunciation, spelling, level classification of learner production), and the generation of learning materials (e.g., exercise generators).},
	editor       = {Volodina, Elena and Borin, Lars and Pilán, Ildikó},
	year         = {2014},
	publisher    = {Linköping University Press},
	address      = {Linköping},
	ISBN         = {978-91-7519-175-1},
}

@edited_book{volodina-etal-2015-proceedings-226574,
	title        = {Proceedings of the 4th workshop on NLP for computer assisted language learning at Nodalida 2015, Vilnius, 11th May, 2015},
	editor       = {Volodina, Elena and Borin, Lars and Pilán, Ildikó},
	year         = {2015},
	publisher    = {Linköping University Press},
	address      = {Linköping},
	ISBN         = {978-91-7519-036-5},
}

@inProceedings{volodina-etal-2017-svalex-262848,
	title        = {SVALex. En andraspråksordlista med CEFR-nivåer},
	abstract     = {När man planerar att utveckla en språkkurs i ett andra- eller främmandespråk (L2) ställs man inför utmaningen att definiera vilket ordförråd inlärarna behöver tillägna sig. Forskning inom andraspråksinlärning tyder på att läsaren behöver kunna 95–98 % av löporden i en text för att förstå den (Laufer & Ravenhorst-Kalovski 2010). Sådana studier är användbara för att uppskatta storleken på det ordförråd som behövs för att tillägna sig innehållet i en text, men de ger ingen närmare metodologisk vägledning för den som vill utveckla nivåstrukturerade läromedel eller kurser för andraspråksundervisning. Speciellt tydligt är detta inom CALL, Computer-Assisted Language Learning, där läromaterial (t.ex. övningar) genereras automatiskt, och behöver elektroniska resurser som kunskapskälla.

Man kan istället angripa problemet från andra hållet. Om man har en samling nivåklassificerade texter för andraspråksinlärare kan man utifrån dem bygga ordlistor där varje ord är placerat på en färdighetsskala. Om man känner till den förutsatta färdighetsnivån hos läsaren, kan man helt enkelt anta att den textnivå där ett ord dyker upp första gången också anger ordets svårighetsgrad. SVALex är ett lexikon som har byggts enligt den principen. Resursen ska kunna användas av inlärare och lärare i svenska som andraspråk, men även av lexikografer, av kursutvecklare och provkonstruktörer samt av dem som likt oss själva ägnar sig åt utveckling av språkteknologibaserade datorstöd för språkinlärning och språktestning.

SVALex utgör en vidareutveckling i förhållande till tidigare lexikonresurser för svenska som andraspråk (se avsnitt 2), genom att den konsekvent relaterar de 15 681 lexikoningångarna till en vida använd färdighetsskala för andra- och främmandespråksinlärning, Europarådets gemensamma europeiska referensram för språk (Common European Framework of Reference, i fortsättningen refererad till som CEFR) (Council of Europe 2001; Skolverket 2009).

Nivåklassningen av lexikonenheterna i SVALex görs på basis av deras distribution i COCTAILL, en korpus innehållande lärobokstexter i svenska som andraspråk, där lärare har placerat in varje text i någon av CEFR-nivåerna (Volodina et al. 2014).
},
	booktitle    = {Svenskans beskrivning. 35, Förhandlingar vid trettiofemte sammankomsten : Göteborg 11–13 maj 2016 / Redigerad av Emma Sköldberg, Maia Andréasson, Henrietta Adamsson Eryd, Filippa Lindahl, Sven Lindström, Julia Prentice & Malin Sandberg},
	author       = {Volodina, Elena and Borin, Lars and Pilán, Ildikó and François, Thomas and Tack, Annaïs},
	year         = {2017},
	publisher    = {Göteborgs universitet},
	address      = {Göteborg},
	ISBN         = {978-91-87850-64-6},
}

@article{volodina-etal-2019-swell-285609,
	title        = {The SweLL Language Learner Corpus: From Design to Annotation},
	abstract     = {The article presents a new language learner corpus for Swedish, SweLL, and the methodology from collection and pesudonymisation to protect personal information of learners to annotation adapted to second language learning. The main aim is to deliver a well-annotated corpus of essays written by second language learners of Swedish and make it available for research through a browsable environment. To that end, a new annotation tool and a new project management tool have been implemented, – both with the main purpose to ensure reliability and quality of the final corpus. In the article we discuss reasoning behind metadata selection, principles of gold corpus compilation and argue for separation of normalization from correction annotation.},
	journal      = {Northern European Journal of Language Technology},
	author       = {Volodina, Elena and Granstedt, Lena and Matsson, Arild and Megyesi, Beáta and Pilán , Ildikó  and Prentice, Julia and Rosén, Dan and Rudebeck, Lisa  and Schenström, Carl-Johan and Sundberg, Gunlög and Wirén, Mats},
	year         = {2019},
	volume       = {6},
	pages        = {67--104},
}

@inProceedings{volodina-etal-2018-annotation-275361,
	title        = {Annotation of learner corpora: first SweLL insights.},
	abstract     = {This is a concise description of experiences with learner corpus annotation performed within SweLL project. Experiences include work with legal issues, anonymization, error annotation, normalization and questions relating to quality of annotation. },
	booktitle    = {Proceedings of SLTC 2018, Stockholm, October 7-9, 2018},
	author       = {Volodina, Elena and Granstedt, Lena and Megyesi, Beáta and Prentice, Julia and Rosén, Dan and Schenström, Carl-Johan and Sundberg, Gunlög  and Wirén, Mats},
	year         = {2018},
}

@misc{volodina-etal-2016-preface-248087,
	title        = {Preface. Proceedings of the joint workshop on NLP for Computer Assisted Language Learning and NLP for Language Acquisition at SLTC, Umeå, 16th November 2016},
	abstract     = {The joint workshop on Natural Language Processing (NLP) for Computer-Assisted Language Learning (CALL) & NLP for Language Acquisition (LA) – shorthand NLP4CALL&LA – is an effort to provide a debate space and collaboration between two closely related areas. Both focus on language acquisition, related resources and technologies, that can support research of the language learning process as well as aim to bring interdisciplinary advantage to the field. Individual workshop areas are outlined below.

The area of NLP4CALL is applied in essence, where tools, algorithms, and ready-to-use programs play an important role. It has a traditional focus on second or foreign language learning, and the target age group of school children or older. The intersection of Natural Language Processing and Speech Technology, with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has provided the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition (SLA) theories and practices, second language assessment, as well as knowledge of L2 pedagogy and didactics.

The workshop on Language Processing for Research in Language Acquisition (NLP4LA) broadens the scope of the joint workshop to also include theoretical, empirical, and experimental investigation of first, second and bilingual language acquisition. NLP4LA aims to foster collaboration between the NLP, linguistics, psychology and cognitive science communities. The workshop is targeted at anyone interested in the relevance of computational techniques for first, second and bilingual language acquisition.

The joint workshop series on NLP4CALL&LA has arisen in 2016 and has become a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in systems supporting language learning and research around it, and exploring the theoretical and methodological issues arising during language acquisition.
},
	author       = {Volodina, Elena and Grigonytė, Gintarė  and Pilán, Ildikó and Nilsson Björkenstam, Kristina  and Borin, Lars},
	year         = {2016},
	number       = {130},
	pages        = { i–viii},
}

@misc{volodina-etal-2016-proceedings-248081,
	title        = {Proceedings of the joint workshop on NLP for Computer Assisted Language Learning and NLP for Language Acquisition at SLTC, Umeå, 16th November 2016},
	abstract     = {The joint workshop on Natural Language Processing (NLP) for Computer-Assisted Language Learning (CALL) & NLP for Language Acquisition (LA) – shorthand NLP4CALL&LA – is an effort to provide a debate space and collaboration between two closely related areas. Both focus on language acquisition, related resources and technologies, that can support research of the language learning process as well as aim to bring interdisciplinary advantage to the field. Individual workshop areas are outlined below.

The area of NLP4CALL is applied in essence, where tools, algorithms, and ready-to-use programs play an important role. It has a traditional focus on second or foreign language learning, and the target age group of school children or older. The intersection of Natural Language Processing and Speech Technology, with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has provided the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition (SLA) theories and practices, second language assessment, as well as knowledge of L2 pedagogy and didactics.

The workshop on Language Processing for Research in Language Acquisition (NLP4LA) broadens the scope of the joint workshop to also include theoretical, empirical, and experimental investigation of first, second and bilingual language acquisition. NLP4LA aims to foster collaboration between the NLP, linguistics, psychology and cognitive science communities. The workshop is targeted at anyone interested in the relevance of computational techniques for first, second and bilingual language acquisition.

The joint workshop series on NLP4CALL&LA has arisen in 2016 and has become a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in systems supporting language learning and research around it, and exploring the theoretical and methodological issues arising during language acquisition.},
	author       = {Volodina, Elena and Grigonytė, Gintarė  and Pilán, Ildikó and Nilsson Björkenstam, Kristina  and Borin, Lars},
	year         = {2016},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-633-8},
}

@inProceedings{volodina-etal-2012-towards-168516,
	title        = {Towards a system architecture for ICALL},
	abstract     = {In this paper, we present an on-going project whose overall aim is to develop open-source system architecture for supporting ICALL systems that will facilitate re-use of existing NLP tools and resources on a plug-and-play basis. We introduce the project, describe the approaches adopted by the two language teams, and present two applications being developed using the proposed architecture.},
	booktitle    = {In G. Biswas et al. (eds), Proceedings of the 20th International Conference on Computers in Education. Singapore: Asia-Pacific Society for Computers in Education},
	author       = {Volodina, Elena and Hrafn, Loftsson and Arnbjörnsdóttir, Birna and Borin, Lars and Leifsson, Guðmundur Örn},
	year         = {2012},
	volume       = {2012},
	ISBN         = {978-981-07-4649-0},
}

@inProceedings{volodina-etal-2012-semi-165961,
	title        = {Semi-automatic selection of best corpus examples for Swedish: Initial algorithm evaluation.},
	abstract     = {The study presented here describes the results
of the initial evaluation of two sorting
approaches to automatic ranking of corpus
examples for Swedish. Representatives from
two potential target user groups have been
asked to rate top three hits per approach for
sixty search items from the point of view of the
needs of their professional target groups,
namely second/foreign language (L2) teachers
and lexicographers. This evaluation has shown, on the one hand, which of the two approaches to example rating (called in the text below algorithms #1 and #2) performs better in terms of finding better examples for each target user group; and on the other hand, which features evaluators associate with good examples. It has also facilitated statistic analysis of the “good” versus “bad” examples with reference to the measurable features, such as sentence length, word length, lexical frequency profiles, PoS constitution, dependency structure, etc. with a
potential to find out new reliable classifiers.},
	booktitle    = {Proceedings of the SLTC 2012 workshop on NLP for CALL, Lund, 25th October, 2012. },
	author       = {Volodina, Elena and Johansson, Richard and Johansson Kokkinakis, Sofie},
	year         = {2012},
	number       = {080},
	pages        = {59--70},
}

@inProceedings{volodina-etal-2019-svala-285617,
	title        = {SVALA: an Annotation Tool for Learner Corpora generating parallel texts},
	abstract     = {Learner corpora are actively used for research on Language Acquisition and in Learner Corpus Research (LCR).  The  data  is,  however,  very  expensive  to  collect  and  manually  annotate,  and  includes  steps  like  anonymization,  normalization, error annotation, linguistic annotation. In the past, projects often re - used tools from a number of  different projects for the above steps. As a result, various input and output formats between the tools needed to  be converted, which increased the complexity of the task. In  the  present  project,  we  are  developing  a  tool  that  handles  all  of  the  above - mentioned  steps  in  one  environment maintaining a stable interpretable  format between the  steps. A distinguishing feature of the tool is  that users work in a usual environment (plain text) while the tool visualizes all performed edits via a graph that  links an original learner text with an edited one, token by token.},
	booktitle    = {Learner Corpus Research conference (LCR-2019), Warsaw, 12-14 September 2019, Book of abstracts},
	author       = {Volodina, Elena and Matsson, Arild and Rosén, Dan and Wirén, Mats },
	year         = {2019},
}

@inProceedings{volodina-etal-2013-towards-188549,
	title        = {Towards a gold standard for Swedish CEFR-based ICALL},
	abstract     = {In qualitative projects on ICALL (Intelligent Computer-Assisted Language Learning), research and development always go hand
in hand: development both depends upon the research results and dictates the research agenda. Likewise, in the development of the Swedish ICALL platform Lärka, the practical issues of development have dictated its research agenda. With NLP approaches, sooner or later, the necessity for reliable training data becomes unavoidable. At the moment Lärka's research agenda cannot be addressed without access to reliable training data, so-called “gold standard”. This paper
gives an overview of the current state of the Swedish ICALL platform development and related research agenda, and describes the first attempts to collect the reference corpus (“gold standard”) coming from course books
used in CEFR-based language teaching.},
	booktitle    = {Proceedings of the Second Workshop on NLP for Computer-Assisted Language Learning. NEALT Proceedings Series 17. Nodalida 2013, Oslo, Norway. },
	author       = {Volodina, Elena and Pijetlovic, Dijana and Pilán, Ildikó and Johansson Kokkinakis, Sofie},
	year         = {2013},
	ISBN         = {978-91-7519-588-9},
}

@inProceedings{volodina-pilan-2016-svalex-248116,
	title        = {SVALex: en andraspråksordlista graderad enligt CEFR nivåer.},
	booktitle    = {Svenskans Beskrivning 35, Göteborg 2016},
	author       = {Volodina, Elena and Pilán, Ildikó},
	year         = {2016},
}

@inProceedings{volodina-etal-2016-classification-246346,
	title        = {Classification of Swedish learner essays by CEFR levels},
	abstract     = {The paper describes initial efforts on creating a system for the automatic assessment  of  Swedish  second  language  (L2)  learner  essays  from  two  points  of  view: holistic evaluation of the reached level according to the  Common European Framework of Reference (CEFR), and the lexical analysis of texts for receptive and productive vocabulary per CEFR level. We describe the data and resources that our experiments were based on, provide a short introduction to the algorithm for essay classification and experiment results, present the user interface we developed for testing new essays and outline future work. },
	booktitle    = {Proceedings of EuroCALL 2016. 24-27th August 2016, Cyprus.},
	author       = {Volodina, Elena and Pilán, Ildikó and Alfter, David},
	year         = {2016},
	publisher    = {Research-publishing.net},
	ISBN         = { 978-1-908416-44-5},
}

@misc{volodina-etal-2017-preface-262846,
	title        = {Preface. Proceedings of the Joint 6th Workshop on NLP for Computer Assisted Language Learning
and 2nd Workshop on NLP for Research on Language Acquisition at NoDaLiDa 2017, Gothenburg, 22nd May 2017},
	abstract     = {For the second year in a row we brought two related themes of NLP for Computer-Assisted Language Learning and NLP for Language Acquisition together. The goal of organizing joint workshops is to provide a meeting place for researchers working on language learning issues including both empirical and experimental studies and NLP-based applications. The resulting volume covers a variety of topics from the two fields and - hopefully - showcases the challenges and achievements in the field.
The seven papers in this volume cover native language identification in learner writings, using syntactic complexity development in language learner language to identify reading comprehension texts of appropriate level, exploring the potential of parallel corpora to predict mother-language specific problem areas for learners of another language, tools for learning languages - both well-resourced ones such as English as well as endangered or under-resourced ones such as Yakut and Võro, as well as exploring the potential of automatically identifying and correcting word-level errors in Swedish learner writing.},
	author       = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Grigonyte, Gintare and Nilsson Björkenstam, Kristina},
	year         = {2017},
	volume       = {30},
	pages        = {i--vi},
}

@misc{volodina-etal-2017-proceedings-262838,
	title        = {Proceedings of the Joint 6th Workshop on NLP for Computer Assisted Language Learning and 2nd Workshop on NLP for Research on Language Acquisition at NoDaLiDa 2017, Gothenburg, 22nd May 2017},
	abstract     = {For the second year in a row we have brought the two related themes of NLP for Computer-Assisted Language Learning and NLP for Language Acquisition together under one umbrella. The goal of organizing these joint workshops is to provide a meeting place for researchers working on language learning issues including both empirical and experimental studies and NLP-based applications.},
	author       = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Grigonyte, Gintare and Nilsson Björkenstam, Kristina},
	year         = {2017},
	publisher    = {Linköping University Press},
	address      = {Linköping, Sweden},
	ISBN         = { 978-91-7685-502-7},
}

@inProceedings{volodina-etal-2014-flexible-201885,
	title        = {A flexible language learning platform based on language resources and web services. },
	abstract     = {We present Lärka, the language learning platform of Språkbanken (the Swedish Language Bank). It consists of an exercise generator which reuses resources available through Språkbanken: mainly Korp, the corpus infrastructure, and Karp, the lexical infrastructure.
Through Lärka we reach new user groups – students and teachers of Linguistics as well as second language learners and their teachers
– and this way bring Språkbanken's resources in a relevant format to them.
Lärka can therefore be viewed as a case of a real-life language resource evaluation with end users. In this article we describe Lärka's architecture, its user interface, and the five exercise types that have been released for users so far. The first user evaluation following in-class usage with students of linguistics, speech therapy and teacher candidates are presented. The outline of future work concludes the paper.},
	booktitle    = {Proceedings of LREC 26-31 May 2014, Reykjavik, Iceland },
	author       = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Tiedemann, Therese Lindström},
	year         = {2014},
	ISBN         = {978-2-9517408-8-4},
	pages        = {3973--3978},
}

@inProceedings{volodina-etal-2014-what-206132,
	title        = {You get what you annotate: a pedagogically annotated corpus of coursebooks for Swedish as a Second Language.},
	abstract     = {We present the COCTAILL corpus, containing over 700.000 tokens of Swedish texts from 12 coursebooks aimed at second/foreign  language (L2) learning. Each text in the corpus is labelled with a proficiency level according to the CEFR proficiency scale. Genres, topics, associated activities, vocabulary lists and other types of information are annotated in the coursebooks to facilitate Second Language Acquisition (SLA)-aware studies and experiments aimed at
Intelligent Computer-Assisted Language Learning (ICALL). Linguistic annotation in the form of parts-of-speech (POS; e.g. nouns, verbs), base forms (lemmas) and syntactic relations (e.g. subject, object) has been also added to the corpus.
In the article we describe our annotation scheme and the editor we have developed for the content mark-up of the coursebooks, including the taxonomy of pedagogical activities and linguistic skills. Inter-annotator agreement has been computed and reported
on a subset of the corpus.
Surprisingly, we have not found any other examples of pedagogically marked-up corpora based on L2 coursebooks to draw on existing experiences. Hence, our work may be viewed as “groping in the darkness” and eventually a starting point for others.
The paper also presents our first quantitative exploration of the corpus where we focus on
textually and pedagogically annotated features of the coursebooks to exemplify what types of studies can be performed using the presented annotation scheme. We explore trends shown in use of topics and genres over proficiency levels and compare pedagogical
focus of exercises across levels.
The final section of the paper summarises the potential this corpus holds for research within SLA and various ICALL tasks. },
	booktitle    = {NEALT Proceedings Series},
	author       = {Volodina, Elena and Pilán, Ildikó and Rødven-Eide, Stian  and Heidarsson, Hannes},
	year         = {2014},
	volume       = {22},
	ISBN         = {978-91-7519-175-1},
	pages        = {128--144},
}

@inProceedings{volodina-etal-2016-swell-248141,
	title        = {SweLL on the rise: Swedish Learner Language corpus for European Reference Level studies.},
	abstract     = {We present a new resource for Swedish, SweLL, a corpus of Swedish Learner essays linked to learners’ performance according to the Common European Framework of Reference (CEFR). SweLL consists of three subcorpora – SpIn, SW1203 and Tisus, collected from three different educational establishments.  The common metadata for all subcorpora includes age, gender, native languages, time of residence in Sweden, type of written task.  Depending on the subcorpus, learner texts may contain additional information, such as text genres, topics, grades. Five of the six CEFR levels are represented in the corpus: A1, A2, B1, B2 and C1 comprising in total 339 essays. C2 level is not included since courses at C2 level are not offered.  The work flow consists of collection of essays and permits, essay digitization and registration, meta-data annotation, automatic linguistic annotation.  Inter-rater agreement is presented on the basis of SW1203 subcorpus.  The work on SweLL is still ongoing with more that 100 essays waiting in the pipeline.  This article both describes
the resource and the “how-to” behind the compilation of SweLL.},
	booktitle    = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016), May 23-28, 2016, Portorož, Slovenia},
	author       = {Volodina, Elena and Pilán, Ildikó and Enström, Ingegerd  and Llozhi, Lorena and Lundkvist, Peter and Sundberg, Gunlög  and Sandell, Monica },
	year         = {2016},
	publisher    = {European Language Resources Association},
	address      = {Paris},
	ISBN         = {978-2-9517408-9-1},
}

@inProceedings{volodina-etal-2016-swell-248145,
	title        = {SweLL – en korpus med L2 uppsatser för CEFR studier.},
	booktitle    = {Svenskans Beskrivning 35, Göteborg 2016},
	author       = {Volodina, Elena and Pilán, Ildikó and Enström, Ingegerd and Lundkvist, Peter and Sundberg, Gunlög and Llozhi, Lorena and Sandell, Monica},
	year         = {2016},
}

@inProceedings{volodina-etal-2016-swellex-248090,
	title        = {SweLLex: second language learners' productive vocabulary.},
	abstract     = {This paper presents a new lexical resource for learners of Swedish as a second language, SweLLex, and a know-how behind its creation. We concentrate on L2 learners’ productive vocabulary, i.e. words that they are actively able to produce, rather than the lexica they comprehend (receptive vocabulary). The proposed list covers productive vocabulary used by L2 learners in their essays. Each lexical item on the list is connected to its frequency distribution over the six levels of proficiency defined by the Common European Framework of Reference (CEFR) (Council of Europe, 2001}. To make this list a more reliable resource, we experiment with normalizing L2 word-level errors by replacing them with their correct equivalents. SweLLex has been tested in a prototype system for automatic CEFR level classification of essays as well as in a visualization tool aimed at exploring L2 vocabulary contrasting receptive and productive vocabulary usage at different levels of language proficiency.},
	booktitle    = {Linköping Electronic Conference Proceedings. Proceedings of the joint workshop on NLP for Computer Assisted Language Learning and NLP for Language Acquisition at SLTC, Umeå, 16th November 2016},
	author       = {Volodina, Elena and Pilán, Ildikó and Llozhi, Lorena  and Degryse, Baptiste  and François, Thomas },
	year         = {2016},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7685-633-8},
}

@inProceedings{wilhelmsson-2008-heuristic-79686,
	title        = {Heuristic Schema Parsing of Swedish Text},
	abstract     = {A method for identification of the primary (main clause) functional constituents of Swedish sentences is outlined. The method gives a robust analysis of the unbounded constituents (phrases which do not have an upper bound on their length: subjects, objects/predicatives and adverbials) by first identifying bounded constituents. Diderichsen’s sentence schema, chunking, syntactic valency data and heuristics are used for the delimitation of the constituents and labelling with grammatical functions.},
	booktitle    = {Proceedings of the Swedish Language Technology Conference (SLTC'08)},
	author       = {Wilhelmsson, Kenneth},
	year         = {2008},
	pages        = {41--42},
}

@article{wilhelmsson-2008-automatic-79714,
	title        = {Automatic Variation of Swedish Text by Syntactic Fronting},
	abstract     = {Ongoing work with a prototype implementation for automatic fronting of primary (main clause) constituents in Swedish input text is described. Linguistic constraints and some technical aspects are also discussed.},
	journal      = {Proceedings of the Workshop on NLP for Reading and Writing — Resources, Algorithms and Tools  November 20, 2008 Stockholm, Sweden SLTC 2008, NEALT Proceedings Series},
	author       = {Wilhelmsson, Kenneth},
	year         = {2008},
	volume       = {3 },
	pages        = {22--23},
}

@inProceedings{wilhelmsson-2010-automatisk-247440,
	title        = {Automatisk generering av frågor som svensk text besvarar: ett informationssystem},
	abstract     = {Vilken information kan en text sägas innehålla? Ett enkelt svar är ”de frågor som den besvarar.” I vilken grad går det i så fall att automatiskt generera dessa frågor och därmed programmera ett frågebesvarande informationssystem för svensk text?},
	booktitle    = {Röster från Humanisten 2010},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
}

@article{wilhelmsson-2010-automatisk-137859,
	title        = {Automatisk generering av frågor som svensk text besvarar: ett informationssystem},
	abstract     = {Vilken information kan en text sägas innehålla? Ett enkelt svar är ”de frågor som den besvarar.” I vilken grad går det i så fall att automatiskt generera dessa frågor och därmed programmera ett frågebesvarande informationssystem för svensk text?

Ett prototypsystem för denna uppgift har skapats som en del av ett avhandlingsprojekt inom språkteknologi. Det vore till exempel möjligt att vidareutveckla det system som här visas till en allmän teknisk tjänst, t.ex. webbaserad, som ger användare möjlighet att söka efter information med naturligt språk i en valfri digital text.

Denna text tar upp de allmänna förutsättningarna för automatisk generering av de frågor som en svensk text besvarar. Själva den teoretiska uppgiften har egenskaper som kan sägas vara lingvistiska eller informationsteoretiska. För att skapa det program som här beskrivs har dessutom naturligtvis en programmeringsinsats krävts, men denna kommer inte att tas upp här, den rent praktiska sidan av uppgiften är möjlig att lösa på många sätt.

http://www.hum.gu.se/samverkan/popularvetenskap/roster-fran-humanisten-2010/

http://hdl.handle.net/2320/7176
},
	journal      = {Röster från Humanisten, 2010},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	volume       = {2010},
}

@book{wilhelmsson-2010-heuristisk-132135,
	title        = {Heuristisk analys med Diderichsens satsschema – Tillämpningar för svensk text, 2 uppl},
	abstract     = {A heuristic method for parsing Swedish text, heuristic schema parsing, is described and implemented. Focusing on main clause (primary) analysis, a collection of licensing techniques for removing non-primary verb candidates is employed, leaving e.g. the primary verbs, particles and conjunctions (bounded key constituents) that delimit the content of the fields in Diderichsen’s sentence schema. Hereby, the subsequent identification of constituents which do not have an upper bound on their length (subject, object/predicatives and adverbials) can be identified relying to a lesser on extent explicit pattern matching, and more on different heuristic rules. For phrase type identification and delimitation of these constituents, when adjacent to each other, a novel chunking technique, rank-based chunking, is applied. Following this, a series of further rules merge chunks into larger ones, aiming at a final number of nominal chunks compatible with the valency information of the main verb. The aim is to identify full nominal and adverbial constituents, including post-modifiers. The implementation uses the Stockholm Umeå Corpus 2.0, a corpus which is balanced for different genres in published Swedish text. SUC’s tagset is also used unmodified in part-of-speech tagging which enables the program to deal with input text. The functional parsing, which includes no explicit language-defining grammar component is carried out technically using an object-based representation of clause structure.

The thesis work also includes two prototype applications, both requiring high accuracy of the sort of functional syntactic analysis described here. The first one is an implementation of automatic syntactic fronting in the area of text editing for Swedish, where the user is presented with a syntactically analyzed copy of her writing, from which paraphrases easily can be generated. The second application is in the field of natural language query systems and produces questions with answers from an arbitrary declarative input text. This prototype incorporates a text database from Swedish Wikipedia, and investigates primarily generation of WH-questions formed via fronting of unbounded primary constituents. The questions are generated as a text is opened and thus permits users to only ask the available ones, thus aiming at a high precision value.
},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
	ISBN         = {978-91-977196-9-8},
}

@book{wilhelmsson-2010-heuristisk-126092,
	title        = {Heuristisk analys med Diderichsens satsschema - Tillämpningar för svensk text},
	abstract     = {A heuristic method for parsing Swedish text, heuristic schema parsing, is described and implemented. Focusing on main clause (primary) analysis, a collection of licensing techniques for removing non-primary verb candidates is employed, leaving e.g. the primary verbs, particles and conjunctions (bounded key constituents) that delimit the content of the fields in Diderichsen’s sentence schema. Hereby, the subsequent identification of constituents which do not have an upper bound on their length (subject, object/predicatives and adverbials) can be identified relying to a lesser on extent explicit pattern matching, and more on different heuristic rules. For phrase type identification and delimitation of these constituents, when adjacent to each other, a novel chunking technique, rank-based chunking, is applied. Following this, a series of further rules merge chunks into larger ones, aiming at a final number of nominal chunks compatible with the valency information of the main verb. The aim is to identify full nominal and adverbial constituents, including post-modifiers. The implementation uses the Stockholm Umeå Corpus 2.0, a corpus which is balanced for different genres in published Swedish text. SUC’s tagset is also used unmodified in part-of-speech tagging which enables the program to deal with input text. The functional parsing, which includes no explicit language-defining grammar component is carried out technically using an object-based representation of clause structure.

Although output formats and types of evaluations of correctness are very different in parsers for Swedish text, it is claimed that the manual approach presented can provide high accuracy, which can be improved given more time for development.

The thesis work also includes two prototype applications, both requiring high accuracy of the sort of functional syntactic analysis described here. The first one is an implementation of automatic syntactic fronting in the area of text editing for Swedish, where the user is presented with a syntactically analyzed copy of her writing, from which paraphrases easily can be generated. The second application is in the field of natural language query systems and produces questions with answers from an arbitrary declarative input text. This prototype incorporates a text database from Swedish Wikipedia, and investigates primarily generation of WH-questions formed via fronting of unbounded primary constituents. The questions are generated as a text is opened and thus permits users to only ask the available ones, thus aiming at a high precision value.},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@inProceedings{wilhelmsson-2011-automatic-259874,
	title        = {Automatic Question Generation from Swedish Documents as a Tool for Information Extraction},
	abstract     = {An implementation of automatic question generation (QG) from raw Swedish text is presented. QG is here chosen as an alternative to natural query systems where any query can be posed and no indication is given of whether the current text database includes the information sought for. The program builds on parsing with grammatical functions from which corresponding questions are generated and it incorporates the article database of Swedish Wikipedia. The pilot system is meant to work with a text shown in the GUI and auto-completes user input to help find available questions. The act of question generation is here described together with early test results regarding the current produced questions.},
	booktitle    = {Proceedings of the 18th Nordic Conference of Computational Linguistics NODALIDA 2011, NEALT Proceedings Series Vol. 11},
	author       = {Wilhelmsson, Kenneth},
	year         = {2011},
	publisher    = { Northern European Association for Language Technology (NEALT) },
	address      = {Tartu},
}

@inProceedings{wilhelmsson-2012-automatic-165989,
	title        = {Automatic question generation for Swedish: The current state},
	abstract     = {The research area of question generation (QG), in its current form, has a relatively brief history within NLP. A description of the current question generation implementation for Swedish text built on schema parsing is here presented and exemplified. Underlying the current approach is the view of ‘all textual information as answers to questions.’ This paper discusses strategies for enhanced functionality for arbitrary Swedish text through extended question generation. It also brings up some theoretical issues regarding the nature of the task, and concerns practical considerations in an area such as Intelligent CALL (ICALL) where this type of application has been considered for English.

ISSN (print): 1650-3686, ISSN (online): 1650-3740},
	booktitle    = {Proceedings of the SLTC 2012 workshop on NLP for CALL, Lund, 25th October, 2012, Linköping Electronic Conference Proceedings},
	author       = {Wilhelmsson, Kenneth},
	year         = {2012},
	volume       = {80},
	pages        = {71--79},
}

@techreport{wilhelmsson-2012-adverbialkarakteristik-160440,
	title        = {Adverbialkarakteristik för praktisk informationsextraktion i svensk text - Projektrapport},
	abstract     = {Den aktuella rapporten beskriver ett projekt som i första hand har inneburit ett praktiskt arbete syftande till att skapa en automatiserad process som returnerar frågeled, t.ex. varifrån, för adverbialled, t.ex. inifrån rummet, i svensk digital text. Det är en utbytesprocess som behövs av rent praktiska skäl i uppgiften frågegenerering, vilken innebär att en samling frågor som en text besvarar genereras snabbt automatiskt. Denna process finner sin plats i program som på olika sätt syftar till att ge informationsåtkomst i godtycklig okänd svensk text. Det är i detta tillämpningsfall fråga om att på något sätt öppna upp för den stora informationsmängd som i datalogiskt perspektiv ligger ’ostrukturerad’, dvs. i naturligt språk-form.

Syftet med att avgöra lämpliga frågeled (ofta till en hv-form) för förekommande satsled i text har dock förmodligen en mer allmän relevans än användning i nämnda programtyp. Förutom att också behövas i andra liknande datalingvistiska applikationer kan själva frågeställningen rymmas inom ramarna för grundforskningen. De vanliga semantiskt grundade adverbialkategorierna (vilka skiljer sig åt mellan olika grammatikor) definierar gärna adverbialkategorier just genom att beskriva vilka slags frågor de besvarar. Att som här sikta på att avgöra frågeled för adverbial är en mer detaljerad uppgift än att avgöra adverbialkategori.

Den praktiska metod som implementerats i projektet kan sönderdelas i ett antal steg som antas vara allmängiltiga och svåra att undgå med det aktuella syftet. Indata till programmet är ett i princip godtyckligt adverbialled som användaren i prototypprogrammet kan skriva in. De nämnda steg som tar vid är de följande. 1) En uppmärkning med ordklass- och annan grammatisk information för varje löpord inleder. Detta sker med en statistisk trigrambaserad s.k. Hidden Markov-modell. 2/3) Ett avgörande av vilken strukturtyp som ledet har (bisats, PP, etc.) görs utifrån löporden med informationen i föregående steg. Intimt förknippat med denna uppgift är bestämning av huvudord, och för flera led även bestämning av andra signifikanta komponenter som rektionshuvudord. Lösningen till detta delsteg heter rangbaserad chunkning. 4) De steg som följer härefter skiljer sig mycket åt beroende på den aktuella strukturtypen. För prepositionsfraser undersöks t.ex. preposition och, beroende på vilken preposition det är fråga om, rektionshuvudord, dess grundform och andra ingående textsegment. I arbetet har t.ex. SweFN (Borin, Dannélls, Forsberg, Toporowska Gronostaj, & Kokkinakis, 2010) delvis undersökts för att eventuellt förbättra avgörandet av substantivsemantik, vilket ofta blir relevant för PP-adverbial.

Rapporten visar hur uppgiften praktiskt sett varierar mycket i svårighetsgrad, från de fall där adverbialet utgörs av t.ex. particip-, adverbfraser eller bisatser, då en mappning till motsvarande frågeled ofta kan ske direkt utifrån huvudordet – till de mest komplicerade fallen av PP och s.k. som-fraser där kombinationer av huvudord, rektionshuvudord, dess grundform samt annan syntaktisk och semantisk information krävs för att urskilja förekomsters särskilda frågemotsvarigheter. Ett återkommande tema i det praktiska arbetet är undantag som behöver kännas igen. Exempelvis kategorin satsadverbial, som kan anta många olika strukturella former men som ändå oftast renderar resultatet ’ingen frågemotsvarighet’, måste kännas igen uttryckligen (ev. tillsammans med andra med samma frågeledsresultat). Även processen som helhet bygger emellertid programmeringstekniskt på grundfall och undantag. I många fall, som t.ex. för i-PP finns det en mängd olika motsvarigheter och vad som får utgöra grundfall i programmet blir en empirisk/heuristisk fråga under det att regler skrivs mot faktiska förekomster av adverbial i Stockholm Umeå Corpus (Hädanefter SUC). Att i liksom andra prepositioner kan sägas ha en prototypisk riktningsbetydelse betyder inte att var nödvändigtvis ska fungera som utgångsfall. Det förekommer ’lager’ av undantag inom olika strukturslag i programmet men även externt motiverade sådana utgående från huvudverbet, som genom valensmatchning kan klargöra att ett adverbial är ’prepositionsobjekt’ och därmed får andra omfrågningsegenskaper. De användargränssnitt som skapats och använts för regelskrivande utifrån faktiska exempel har tillåtit viss omedelbar regeluppdatering och återkontroll vid åsynen av felaktiga resultat. Det är också genom tillägg av nya undantagsregler i någon mening som programmet rimligen ska kunna förbättras framöver från den aktuella kvalitetsnivån. Korrektheten som uppnåtts hittills är inte kvantitativt övertygande men detta arbete som saknar föregångare möjliggör kontinuerlig förbättring genom programmet. 

Projektet visar att mappningsuppgiften…},
	author       = {Wilhelmsson, Kenneth},
	year         = {2012},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@misc{wilhelmsson-2015-autentiska-249227,
	title        = {Autentiska och artificiella frågor till svensk text Automatisk frågegenerering jämfört med användares frågor för informationsåtkomst},
	abstract     = {Informationssökning mot ostrukturerade datakällor som fri text är ett av de områden där användargränssnitt med fri formulering i naturligt språk har tagits fram. I ett sådant, eventuellt AI-betonat, system kan några grundläggande svårigheter från användarperspektivet märkas. En sådan svårighet är att en användare inte känner till huruvida en fråga som hon avser att ställa egentligen kan besvaras av den aktuella texten. Denna svårighet, tillsammans med andra, som de kraftiga variationsmöjligheterna för formen för ett giltigt svar på en ställd fråga, riskerar att leda till att användarintrycken av systemtypen blir negativa.
De moment som behöver ingå i ett sådant frågebaserat informationssystems funktionssätt måste på något sätt inbegripa en mappning av frågeled i frågan (t.ex. när) till den form och grammatisk funktion som svaret i texten måste ha (för frågan när normalt ett tidsadverbial). Bland annat denna iakttagelse inbjuder till användning av automatisk frågegenerering (question generation, QG). Frågegenerering innebär att frågor som en naturlig text besvarar initialt utvinns av ett program som samlar in dem i explicit form. Tanken för användning i informationssökning är att en användare i gränssnittet enbart ska kunna ställa just dessa frågor, vilka faktiskt besvaras av texten.
Denna studie gäller just de frågor som ett automatiskt frågegenereringssystem för svenska kan, och genom vidare utveckling, skulle kunna generera för godtycklig digital svensk text. Även om mängden automatiskt genererade frågor och frågeformuleringar kan bli mycket stor, utrymmesmässigt många gånger större än ursprungstexten, så är det tydligt att den beskrivna metoden för frågegenerering för svenska inte kan och troligen inte heller kommer att kunna förmås att skapa alla de frågor och frågeformuleringar som en vanlig användare skulle anse att en viss text besvarar. Men hur väl fungerar då automatiskt genererade frågor i detta sammanhang?
Denna uppsats kretsar kring en användarundersökning där undersökningsdeltagare har ombetts att formulera frågor som texter besvarar, och som anses vara relevanta frågor. Den resulterande samlingen frågor undersöktes och kategoriserades. Resultatet av undersökningens huvudfråga visar att bara 20-25 % av användarnas frågeformuleringar skulle kunna genereras direkt automatiskt med aktuell ansats – utan vissa informationstekniska förbättringar.
Uppsatsen föreslår viss ny terminologi för detta outforskade område, bl.a. för att skilja mellan de olika grader av processkrav som generering av olika frågeslag från text kräver.},
	author       = {Wilhelmsson, Kenneth},
	year         = {2015},
	address      = {Göteborgs universitet, Inst för tillämpad IT},
}

@techreport{wilhelmsson-2016-huvudansatser-247442,
	title        = {Huvudansatser för parsningsmetoder. Om programutvecklingens förutsättningar i en svensk kontext},
	abstract     = {Syftet med denna text var att ge en inblick i området (syntaktisk) parsning. Tanken
var att ge en bild av utvecklingen som var 1) fri från alltför tekniska detaljer, då
området är programmeringstekniskt, och 2) beskriven ur ett svenskt perspektiv.
Bakgrunden till valet av ämne till texten, som var tänkt att finnas med i antologin
Text och kontext, var att parsning är relativt okänt för många personer verksamma
inom närliggande områden, samtidigt som det är ett absolut nyckelbegrepp för
den som ägnar sig åt datorlingvistik eller språkteknologi.
Målet var alltså att ge en ganska allmän utifrånblick på några centrala sidor av
utvecklingen, samtidigt som det tydligt är så att den som själv arbetat med
utveckling kan ha starka åsikter och preferenser rörande metodval, något som i
ärlighetens namn kanske inte heller denna text är lösgjord från.
Hur ska det göras? Konsten att utveckla automatisk syntaxanalys av naturlig text
kan läras ut från ett flertal perspektiv. Det kan t.ex. ske med fokus på användandet
av en viss grammatikformalism, med fokus på beräkningssnabbhet, med fokus på
entydiggörande av möjliga ambiguiteter. Tolkningsval kan göras med hjälp av
antingen handskrivna regler eller inhämtad statistik.
En sorts huvudtema i denna text är hur metoder för parsning på senare år uppvisar
förändringar som kanske kan förklaras med att programmen har fått andra
användningsområden och att metoderna har anpassats därefter (en annan tolkning
är att flera senare system inte längre gör parsning i strikt mening).
När detta tänkta ”kapitel” var färdigt fick det kommentaren att det inte var
anpassat för antologins målgrupp. Det fick skrivas en annan kapiteltext, men det
kom samtidigt ett förslag att publicera texten om parsning här som denna rapport.},
	author       = {Wilhelmsson, Kenneth},
	year         = {2016},
	publisher    = {Göteborgs universitet},
	address      = {Göteborg},
}

@incollection{wilhelmsson-2017-forutsattningarna-249467,
	title        = {Om förutsättningarna för språkligt datorstöd på ordnivån och uppåt},
	booktitle    = {Text och kontext - perspektiv på textanalys  / Karin Helgesson, Hans Lundqvist, Anna Lyngfelt, Andreas Nord & Åsa Wengelin (red.)},
	author       = {Wilhelmsson, Kenneth},
	year         = {2017},
	publisher    = {Gleerups},
	address      = {Malmö},
	ISBN         = {978-91-40-69364-8},
	pages        = {207--228},
}

@inProceedings{wiren-etal-2018-svala-285624,
	title        = {SVALA: Annotation of Second-Language Learner Text Based on Mostly Automatic Alignment of Parallel Corpora},
	abstract     = {Annotation of second-language learner text is a cumbersome manual task which in turn requires interpretation to postulate the intended meaning of the learner’s language. This paper describes SVALA, a tool which separates the logical steps in this process while providing rich visual support for each of them. The first step is to pseudonymize the learner text to fulfil the legal and ethical requirements for a distributable learner corpus. The second step is to correct the text, which is carried out in the simplest possible way by text editing. During the editing, SVALA automatically maintains a parallel corpus with alignments between words in the learner source text and corrected text, while the annotator may repair inconsistent word alignments. Finally, the actual labelling of the corrections (the postulated errors) is performed. We describe the objectives, design and workflow of SVALA, and our plans for further development.
},
	booktitle    = {Selected papers from the CLARIN Annual Conference 2018, Pisa, 8-10 October 2018},
	editor       = {Inguna Skadina and Maria Eskevich},
	author       = {Wirén, Mats and Matsson, Arild and Rosén, Dan and Volodina, Elena},
	year         = {2018},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköpings universitet},
	ISBN         = {978-91-7685-034-3},
}

@inProceedings{wittenburg-etal-2010-resource-118909,
	title        = {Resource and service centres as the backbone for a sustainable service infrastructure},
	booktitle    = {Proceedings of LREC 2010},
	author       = {Wittenburg, Peter and Bel, Nuria and Borin, Lars and Budin, Gerhard and Calzolari, Nicoletta and Hajicova, Eva and Koskenniemi, Kimmo and Lemnitzer, Lothar and Mægaard, Bente and Piasecki, Maciej and Pierrel, Jean-Marie and Piperidis, Stelios and Skadina, Inguna and Tufis, Dan and van Veenendal, Remco and Váradi, Tamás and Wynne, Martin},
	year         = {2010},
}

@techreport{ahlfelt-etal-2006-literature-34047,
	title        = {Literature Review on Patient_Friendly Documentation Systems},
	author       = {Åhlfelt, Hans and Borin, Lars and Daumke, Philipp and Grabar, Natalia and Hallett, Catalina and Hardcastle, david and Kokkinakis, Dimitrios and Mancini, Clara and Marko, Kornel and Merkel, Magnus and Pietsch, Christian and Power, Richard and Scott, Donia and Silvervarg, Annika and Toporowska Gronostaj, Maria and Williams, Sandra and Willis, Alistair},
	year         = {2006},
	publisher    = {Göteborg University},
	address      = {Göteborg},
}

@inProceedings{akesson-lindh-2013-describing-188836,
	title        = {Describing a database collection procedure for studying ‘double filtering’ effects},
	booktitle    = {22nd conference of the International Association for Forensic Phonetics and Acoustics (IAFPA). July 21st-24th, 2013, Tampa, Florida, USA},
	author       = {Åkesson, Joel and Lindh, Jonas},
	year         = {2013},
}

@inProceedings{akesson-etal-2010-post-122323,
	title        = {Post surgery effects on VOT for Parkinson Disease STN/DBS patients},
	abstract     = {In this paper we discuss and analyse voice onset time (VOT) pre and post surgical treatment with deep brain stimulation (DBS) in 17 patients diagnosed with Parkinson’s disease (PD) at Sahlgrenska University Hospital in Gothenburg, Sweden. The patients were all at different stages of the disease but
with the common denominator they have all undergone surgery to enhance synaptic responses through bilateral electrode implants in the subthalamic nucleus (STN) region of the brain, also known as Deep Brain Stimulation (DBS).The main focal point of the paper is to compare the pre and post surgery VOT data to see if there were any effects stemming from the STN surgery. Preliminary results for Mean VOT, Standard deviation VOT and percent of unsuccessfully produced/unmeasureable diadochokinetic syllable repetitions are presented and discussed. We found that the standard deviation decreased significantly for the consonant /p/ and this is discussed in the perspective of the ease of articulation of the different plosives.
},
	booktitle    = {Proceedings from FONETIK 2010, Working Papers},
	author       = {Åkesson, Joel and Lindh, Jonas and Hartelius, Lena},
	year         = {2010},
	volume       = {54},
	pages        = {119--124},
}

@inProceedings{akesson-etal-2012-voice-162453,
	title        = {Voice Onset Time before and after STN-surgery in patients with Parkinson’s disease},
	abstract     = {Without abstract},
	booktitle    = {ICPLA2012},
	author       = {Åkesson, Joel and Lindh, Jonas and Hartelius, Lena and Carlsson, Emilia},
	year         = {2012},
	volume       = {14},
}