@inProceedings{tahmasebi-risse-2017-uses-256649, title = {On the Uses of Word Sense Change for Research in the Digital Humanities}, abstract = {With advances in technology and culture, our language changes. We invent new words, add or change meanings of existing words and change names of existing things. Unfortunately, our language does not carry a memory; words, expressions and meanings used in the past are forgotten over time. When searching and interpreting content from archives, language changes pose a great challenge. In this paper, we present results of automatic word sense change detection and show the utility for archive users as well as digital humanities’ research. Our method is able to capture changes that relate to the usage and culture of a word that cannot easily be found using dictionaries or other resources.}, booktitle = {Research and Advanced Technology for Digital Libraries - 21st International Conference on Theory and Practice of Digital Libraries, TPDL 2017, Thessaloniki, Greece, September 18-21, 2017. Proceedings}, editor = {Jaap Kamps and Giannis Tsakonas and Yannis Manolopoulos and Lazaros Iliadis and Ioannis Karydis}, author = {Tahmasebi, Nina and Risse, Thomas}, year = {2017}, publisher = {Springer Verlag}, address = {Cham}, ISBN = {978-3-319-67007-2}, } @article{themistocleous-2017-nature-251205, title = {The Nature of Phonetic Gradience across a Dialect Continuum: Evidence from Modern Greek Vowels.}, abstract = {This study investigates the acoustic properties of vowels in 2 Modern Greek varieties: Standard Modern Greek (SMG) and Cypriot Greek (CG). Both varieties contain in their phonetic inventories the same 5 vowels. Forty-five female speakers between 19 and 29 years old participated in this study: 20 SMG speakers and 25 CG speakers, born and raised in Athens and Nicosia, respectively. Stimuli consisted of a set of nonsense CVCV and VCV words, each containing 1 of the 5 Greek vowels in stressed and unstressed position. Gaining insights from the controlled experimental design, the study sheds light on the gradient effects of vowel variation in Modern Greek. It shows that (1) stressed vowels are more peripheral than unstressed vowels, (2) SMG unstressed /i a u/ vowels are more raised than the corresponding CG vowels, (3) SMG unstressed vowels are shorter than CG unstressed vowels, and (4) SMG /i·u/ are more rounded than the corresponding CG vowels. Moreover, it shows that variation applies to specific subsystems, as it is the unstressed vowels that vary cross-varietally whereas the stressed vowels display only minor differences. The implications of these findings with respect to vowel raising and vowel reduction are discussed.}, journal = {Phonetica}, author = {Themistocleous, Charalambos}, year = {2017}, volume = {74}, number = {3}, pages = {157--172}, } @inProceedings{pilan-etal-2017-larka-289884, title = {Lärka: an online platform where language learning meets natural language processing}, booktitle = {7th ISCA Workshop on Speech and Language Technology in Education, 25-26 August 2017, Stockholm, Sweden}, author = {Pilán, Ildikó and Alfter, David and Volodina, Elena}, year = {2017}, } @article{alfter-agebjorn-2017-review-253359, title = {Review of Developing, Modelling and Assessing Second Languages}, journal = {Linguistlist}, author = {Alfter, David and Agebjörn, Anders}, year = {2017}, } @inProceedings{hammarstrom-etal-2017-poor-261851, title = {Poor man's OCR post-correction: Unsupervised recognition of variant spelling applied to a multilingual document collection}, abstract = {© 2017 Copyright held by the owner/author(s). The accuracy of Optical Character Recognition (OCR) is sets the limit for the success of subsequent applications used in text analyzing pipeline. Recent models of OCR postprocessing significantly improve the quality of OCR-generated text but require engineering work or resources such as humanlabeled data or a dictionary to perform with such accuracy on novel datasets. In the present paper we introduce a technique for OCR post-processing that runs off-the-shelf with no resources or parameter tuning required. In essence, words which are similar in form that are also distributionally more similar than expected at random are deemed OCR-variants. As such it can be applied to any language or genre (as long as the orthography segments the language at the word-level). The algorithm is illustrated and evaluated using a multilingual document collection and a benchmark English dataset.}, booktitle = {DATeCH2017, Proceedings of the 2nd International Conference on Digital Access to Textual Cultural Heritage, Göttingen, Germany — June 01 - 02, 2017 }, author = {Hammarström, Harald and Virk, Shafqat and Forsberg, Markus}, year = {2017}, publisher = {Association for Computing Machinery (ACM)}, address = {New York}, ISBN = {978-1-4503-5265-9}, } @misc{tidemann-tahmasebi-2017-proceedings-264302, title = {Proceedings of the 21st Nordic Conference on Computational Linguistics, NODALIDA 2017, Gothenburg, Sweden, May 22-24, 2017 }, author = {Tidemann, Jörg and Tahmasebi, Nina}, year = {2017}, publisher = {Association for Computational Linguistics}, ISBN = {978-91-7685-601-7}, } @inProceedings{tahmasebi-risse-2017-finding-256637, title = {Finding Individual Word Sense Changes and their Delay in Appearance}, abstract = {We present a method for detecting word sense changes by utilizing automatically induced word senses. Our method works on the level of individual senses and allows a word to have e.g. one stable sense and then add a novel sense that later experiences change. Senses are grouped based on polysemy to find linguistic concepts and we can find broadening and narrowing as well as novel (polysemous and homonymic) senses. We evaluate on a testset, present recall and estimates of the time between expected and found change.}, booktitle = {Proceedings of Recent Advances in Natural Language Processing 2017. Varna, Bulgaria 2–8 September, 2017}, editor = {Galia Angelova and Kalina Bontcheva and Ruslan Mitkov and Ivelina Nikolova and Irina Temnikova}, author = {Tahmasebi, Nina and Risse, Thomas}, year = {2017}, ISBN = {978-954-452-048-9}, } @techreport{hammarstedt-etal-2017-korp-256056, title = {Korp 6 - Användarmanual}, author = {Hammarstedt, Martin and Borin, Lars and Forsberg, Markus and Roxendal, Johan and Schumacher, Anne and Öhrman, Maria}, year = {2017}, publisher = {Institutionen för svenska språket, Göteborgs universitet}, } @inProceedings{abualhajia-etal-2017-parameter-256642, title = {Parameter Transfer across Domains for Word Sense Disambiguation}, abstract = {Word sense disambiguation is defined as finding the corresponding sense for a target word in a given context, which comprises a major step in text applications. Recently, it has been addressed as an optimization problem. The idea behind is to find a sequence of senses that corresponds to the words in a given context with a maximum semantic similarity. Metaheuristics like simulated annealing and D-Bees provide approximate good-enough solutions, but are usually influenced by the starting parameters. In this paper, we study the parameter tuning for both algorithms within the word sense disambiguation problem. The experiments are conducted on different datasets to cover different disambiguation scenarios. We show that D-Bees is robust and less sensitive towards the initial parameters compared to simulated annealing, hence, it is sufficient to tune the parameters once and reuse them for different datasets, domains or languages.}, booktitle = {Proceedings of Recent Advances in Natural Language Processing Meet Deep Learning, Varna, Bulgaria 2–8 September 2017 / Edited by Galia Angelova, Kalina Bontcheva, Ruslan Mitkov, Ivelina Nikolova, Irina Temnikova }, author = {Abualhajia, Sallam and Tahmasebi, Nina and Forin, Diane and Zimmermann, Karl-Heinz}, year = {2017}, ISBN = { 978-954-452-048-9}, } @inProceedings{nietopina-johansson-2017-training-261938, title = {Training Word Sense Embeddings With Lexicon-based Regularization}, abstract = {We propose to improve word sense embeddings by enriching an automatic corpus-based method with lexicographic data. Information from a lexicon is introduced into the learning algorithm’s objective function through a regularizer. The incorporation of lexicographic data yields embeddings that are able to reflect expertdefined word senses, while retaining the robustness, high quality, and coverage of automatic corpus-based methods. These properties are observed in a manual inspection of the semantic clusters that different degrees of regularizer strength create in the vector space. Moreover, we evaluate the sense embeddings in two downstream applications: word sense disambiguation and semantic frame prediction, where they outperform simpler approaches. Our results show that a corpusbased model balanced with lexicographic data learns better representations and improve their performance in downstream tasks}, booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), Taipei, Taiwan, November 27 – December 1, 2017}, author = {Nieto Piña, Luis and Johansson, Richard}, year = {2017}, publisher = {Asian Federation of Natural Language Processing }, ISBN = {978-1-948087-00-1}, } @techreport{hammarstedt-etal-2017-korp-256055, title = {Korp 6 - Technical Report}, author = {Hammarstedt, Martin and Roxendal, Johan and Öhrman, Maria and Borin, Lars and Forsberg, Markus and Schumacher, Anne}, year = {2017}, publisher = {Institutionen för svenska språket, Göteborgs universitet}, } @article{themistocleous-2017-effects-259668, title = {Effects of Two Linguistically Proximal Varieties on the Spectral and Coarticulatory Properties of Fricatives: Evidence from Athenian Greek and Cypriot Greek}, abstract = {Several studies have explored the acoustic structure of fricatives, yet there has been very little acoustic research on the effects of dialects on the production of fricatives. This article investigates the effects of two linguistically proximal Modern Greek dialects, Athenian Greek and Cypriot Greek on the temporal, spectral, and coarticulatory properties of fricatives and aims to determine the acoustic properties that convey information about these two dialects. Productions of voiced and voiceless labiodental, dental, alveolar, palatal, and velar fricatives were extracted from a speaking task from typically speaking female adult speakers (25 Cypriot Greek and 20 Athenian Greek speakers). Measures were made of spectral properties, using a spectral moments analysis. The formants of the following vowel were measured and second degree polynomials of the formant contours were calculated. The findings showed that Athenian Greek and Cypriot Greek fricatives differ in all spectral properties across all places of articulation. Also, the co-articulatory effects of fricatives on following vowel were different depending on the dialect. Duration, spectral moments, and the starting frequencies of F1, F2, F3, and F4 contributed the most to the classification of dialect. These findings provide a solid evidence base for the manifestation of dialectal information in the acoustic structure of fricatives.}, journal = {Frontiers in Psychology}, author = {Themistocleous, Charalambos}, year = {2017}, volume = {8}, number = {1945}, pages = {1--19}, } @article{grohmann-etal-2017-acquiring-252175, title = {Acquiring Clitic Placement in Bilectal Settings: Interactions between Social Factors}, abstract = {This paper examines the development of object clitic placement by children acquiring Cypriot Greek. Greek-speaking Cyprus is sociolinguistically characterized by diglossia between two varieties of Greek, the local Cypriot Greek and the official Standard Modern Greek. Arguably as a result of this situation, clitics may be placed post- (enclisis) or preverbally (proclisis) in the same syntactic environment; while the former is a property of Cypriot Greek, the latter is typically considered an effect of the standard language. The following issues are investigated here: (a) how such bilectal speakers distinguish between the two Greek varieties with respect to clitic placement; (b) how the acquisition of clitics develops over time; (c) how, and which, sociolinguistic factors determine clitic placement; and (d) how schooling may affect clitic placement. To address (a)–(d), a sentence completion task was used to elicit clitic productions, administered to 431 children around Cyprus ranging from 2;8 to 8;11. The C5.0 machine-learning algorithm was employed to model the interaction of (socio-)linguistic factors on the development of clitic placement. The model shows that speakers acquire the relevant features very early, yet compartmentalization of form and function according to style emerges only as they engage in the larger speech community. In addition, the effects of sociolinguistic factors on clitic placement appear gradually.}, journal = {Frontiers in Communication}, author = {Grohmann, Kleanthes and Papadopoulou, Elena and Themistocleous, Charalambos}, year = {2017}, volume = {2}, } @article{themistocleous-2017-classifying-254040, title = {Classifying linguistic and dialectal information from vowel acoustic parameters}, abstract = {This study provides a classification model of two Modern Greek dialects, namely Athenian Greek and Cypriot Greek, using information from formant dynamics of F1, F2, F3, F4 and vowel duration. To this purpose, a large corpus of vowels from 45 speakers of Athenian Greek and Cypriot Greek was collected. The first four formant frequencies were measured at multiple time points and modelled using second degree polynomials. The measurements were employed in classification experiments, using three classifiers: Linear Discriminant Analysis, Flexible Discriminant Analysis, and C5.0. The latter outperformed the other classification models, resulting in a higher classification accuracy of the dialect. C5.0 classification shows that duration and the zeroth coefficient of F2, F3 and F4 contribute more to the classification of the dialect than the other measurements; it also shows that formant dynamics are important for the classification of dialect.}, journal = {Speech Communication}, author = {Themistocleous, Charalambos}, year = {2017}, volume = {92}, pages = {13--22}, } @inProceedings{nord-forsberg-2017-enklare-259902, title = {Enklare efter klarspråk? Myndighetstexter före och efter ett klarspråksprojekt}, booktitle = {Saga Bendegard, Ulla Melander Marttala & Maria Westman (red.), Språk och norm: Rapport från ASLA:s symposium, Uppsala universitet 21–22 april 2016}, author = {Nord, Andreas and Forsberg, Markus}, year = {2017}, publisher = {ASLA}, address = {Uppsala}, ISBN = {978-91-87884-26-9}, } @article{nautsch-etal-2017-making-258734, title = {Making Likelihood Ratios Digestible for Cross-Application Performance Assessment}, abstract = {Performance estimation is crucial to the assessment of novel algorithms and systems. In detection error tradeoff (DET) diagrams, discrimination performance is solely assessed targeting one application, where cross-application performance considers risks resulting from decisions, depending on application constraints. For the purpose of interchangeability of research results across different application constraints, we propose to augment DET curves by depicting systems regarding their support of security and convenience levels. Therefore, application policies are aggregated into levels based on verbal likelihood ratio scales, providing an easy to use concept for business-to-business communication to denote operative thresholds. We supply a reference implementation in Python, an exemplary performance assessment on synthetic score distributions, and a fine-tuning scheme for Bayes decision thresholds, when decision policies are bounded rather than fix.}, journal = {IEEE Signal Processing Letters}, author = {Nautsch, A. and Meuwly, D. and Ramos, D. and Lindh, Jonas and Busch, C.}, year = {2017}, volume = {24}, number = {10}, pages = {1552--1556}, } @inProceedings{bjorkner-etal-2017-voice-256522, title = {Voice acoustic parameters for detecting signs of early cognitive impairment}, abstract = {Aiding the detection of very early cognitive impairment in Alzheimer's disease (AD) and assessing the disease progression are essential foundations for effective psychological assessment, diagnosis and planning. Efficient tools for routine dementia screening in primary health care, particularly non-invasive and cost-effective methods, are desirable. The aim of this study is to find out if voice acoustic analysis can be a useful tool for detecting signs of early cognitive impairment.}, booktitle = {PEVOC (PanEuropean Voice Conference) 12, August 30th - September 1st 2017, Ghent, Belgium}, author = {Björkner, Eva and Lundholm Fors, Kristina and Kokkinakis, Dimitrios and Nordlund, Arto}, year = {2017}, } @book{lindh-2017-forensic-261214, title = {Forensic comparison of voices, speech and speakers : tools and methods in forensic phonetics}, abstract = {This thesis has three main objectives. The first objective (A) includes Study I, which investigates the parameter fundamental frequency (F0) and its robustness in different acoustic contexts by using different measures. The outcome concludes that using the alternative baseline as a measure will diminish the effect of low-quality recordings or varying speaking liveliness. However, both creaky voice and raised vocal effort induce intra-variation problems that are yet to be solved. The second objective (B) includes study II, III and IV. Study II investigates the differences between the results from an ear witness line-up experiment and the pairwise perceptual judgments of voice similarity performed by a large group of listeners. The study shows that humans seem to be much more focused on similarities of speech style than features connected to voice quality, even when recordings are played backwards. Study III investigates the differences between an automatic voice comparison system and humans’ perceptual judgments of voice similarity. The experiments’ results show that it is possible to see a correlation between how speakers were judged as more or less different using multidimensional scaling of similarity ranks compared to both the automatic system and the listeners. However, there are also differences due to the fact that human listeners include information about speech style and have difficulties weighting the parameters, i.e. ignoring them when they are contradictory. Study IV successfully investigates a new functional method for how to convert the perceptual similarity judgments made by humans and then compare those to the automatic system results within the likelihood ratio framework. It was discovered that the automatic system outperformed the naïve human listeners in this task (using a very small dataset). The third objective (C) includes study V. Study V investigates several statistical modelling techniques to calculate relevant likelihood ratios using simulations based on existing reference data in an authentic forensic case of a disputed utterance. The study presents several problems with modelling small datasets and develops methods to take into account the lack of data within the likelihood ratio framework. In summary, the thesis contains a larger historical background to forensic speaker comparison to guide the reader into the current research situation within forensic phonetics. The work further seeks to build a bridge between forensic phonetics and automatic voice recognition. Practical casework implications have been considered throughout the work on the basis of own experience as a forensic caseworker and through collaborative interaction with other parties working in the field, both in research and in forensic practice and law enforcement. Since 2005, the author has been involved in over 400 forensic cases and given testimony in several countries.}, author = {Lindh, Jonas}, year = {2017}, publisher = {Department of Philosophy, Linguistics, and Theory of Science, University of Gothenburg}, address = {Gothenburg}, ISBN = {978-91-629-0141-7}, } @inProceedings{kokkinakis-etal-2017-data-256955, title = {Data Collection from Persons with Mild Forms of Cognitive Impairment and Healthy Controls - Infrastructure for Classification and Prediction of Dementia}, abstract = {Cognitive and mental deterioration, such as difficulties with memory and language, are some of the typical phenotypes for most neurodegenerative diseases including Alzheimer’s disease and other dementia forms. This paper describes the first phases of a project that aims at collecting various types of cognitive data, acquired from human subjects in order to study relationships among linguistic and extra-linguistic observations. The project’s aim is to identify, extract, process, correlate, evaluate, and disseminate various linguistic phenotypes and measurements and thus contribute with complementary knowledge in early diagnosis, monitor progression, or predict individuals at risk. In the near future, automatic analysis of these data will be used to extract various types of features for training, testing and evaluating automatic classifiers that could be used to differentiate individuals with mild symptoms of cognitive impairment from healthy, age-matched controls and identify possible indicators for the early detection of mild forms of cognitive impairment. Features will be extracted from audio recordings (speech signal), the transcription of the audio signals (text) and the raw eye-tracking data.}, booktitle = {Proceedings of the 21st Nordic Conference on Computational Linguistics, NoDaLiDa, 22-24 May 2017, Gothenburg, Sweden}, author = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Björkner, Eva and Nordlund, Arto}, year = {2017}, publisher = {Linköping University Electronic Press, Linköpings universitet}, address = {Linköping}, ISBN = {978-91-7685-601-7}, } @misc{volodina-etal-2017-preface-262846, title = {Preface. Proceedings of the Joint 6th Workshop on NLP for Computer Assisted Language Learning and 2nd Workshop on NLP for Research on Language Acquisition at NoDaLiDa 2017, Gothenburg, 22nd May 2017}, abstract = {For the second year in a row we brought two related themes of NLP for Computer-Assisted Language Learning and NLP for Language Acquisition together. The goal of organizing joint workshops is to provide a meeting place for researchers working on language learning issues including both empirical and experimental studies and NLP-based applications. The resulting volume covers a variety of topics from the two fields and - hopefully - showcases the challenges and achievements in the field. The seven papers in this volume cover native language identification in learner writings, using syntactic complexity development in language learner language to identify reading comprehension texts of appropriate level, exploring the potential of parallel corpora to predict mother-language specific problem areas for learners of another language, tools for learning languages - both well-resourced ones such as English as well as endangered or under-resourced ones such as Yakut and Võro, as well as exploring the potential of automatically identifying and correcting word-level errors in Swedish learner writing.}, author = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Grigonyte, Gintare and Nilsson Björkenstam, Kristina}, year = {2017}, volume = {30}, pages = {i--vi}, } @inProceedings{volodina-etal-2017-svalex-262848, title = {SVALex. En andraspråksordlista med CEFR-nivåer}, abstract = {När man planerar att utveckla en språkkurs i ett andra- eller främmandespråk (L2) ställs man inför utmaningen att definiera vilket ordförråd inlärarna behöver tillägna sig. Forskning inom andraspråksinlärning tyder på att läsaren behöver kunna 95–98 % av löporden i en text för att förstå den (Laufer & Ravenhorst-Kalovski 2010). Sådana studier är användbara för att uppskatta storleken på det ordförråd som behövs för att tillägna sig innehållet i en text, men de ger ingen närmare metodologisk vägledning för den som vill utveckla nivåstrukturerade läromedel eller kurser för andraspråksundervisning. Speciellt tydligt är detta inom CALL, Computer-Assisted Language Learning, där läromaterial (t.ex. övningar) genereras automatiskt, och behöver elektroniska resurser som kunskapskälla. Man kan istället angripa problemet från andra hållet. Om man har en samling nivåklassificerade texter för andraspråksinlärare kan man utifrån dem bygga ordlistor där varje ord är placerat på en färdighetsskala. Om man känner till den förutsatta färdighetsnivån hos läsaren, kan man helt enkelt anta att den textnivå där ett ord dyker upp första gången också anger ordets svårighetsgrad. SVALex är ett lexikon som har byggts enligt den principen. Resursen ska kunna användas av inlärare och lärare i svenska som andraspråk, men även av lexikografer, av kursutvecklare och provkonstruktörer samt av dem som likt oss själva ägnar sig åt utveckling av språkteknologibaserade datorstöd för språkinlärning och språktestning. SVALex utgör en vidareutveckling i förhållande till tidigare lexikonresurser för svenska som andraspråk (se avsnitt 2), genom att den konsekvent relaterar de 15 681 lexikoningångarna till en vida använd färdighetsskala för andra- och främmandespråksinlärning, Europarådets gemensamma europeiska referensram för språk (Common European Framework of Reference, i fortsättningen refererad till som CEFR) (Council of Europe 2001; Skolverket 2009). Nivåklassningen av lexikonenheterna i SVALex görs på basis av deras distribution i COCTAILL, en korpus innehållande lärobokstexter i svenska som andraspråk, där lärare har placerat in varje text i någon av CEFR-nivåerna (Volodina et al. 2014). }, booktitle = {Svenskans beskrivning. 35, Förhandlingar vid trettiofemte sammankomsten : Göteborg 11–13 maj 2016 / Redigerad av Emma Sköldberg, Maia Andréasson, Henrietta Adamsson Eryd, Filippa Lindahl, Sven Lindström, Julia Prentice & Malin Sandberg}, author = {Volodina, Elena and Borin, Lars and Pilán, Ildikó and François, Thomas and Tack, Annaïs}, year = {2017}, publisher = {Göteborgs universitet}, address = {Göteborg}, ISBN = {978-91-87850-64-6}, } @inProceedings{bernardy-themistocleous-2017-modelling-258661, title = {Modelling prosodic structure using Artificial Neural Networks}, abstract = {The ability to accurately perceive whether a speaker is asking a question or is making a statement is crucial for any successful interaction. However, learning and classifying tonal patterns has been a challenging task for automatic speech recognition and for models of tonal representation, as tonal contours are characterized by significant variation. This paper provides a classification model of Cypriot Greek questions and statements. We evaluate two state-of-the-art network architectures: a Long Short-Term Memory (LSTM) network and a convolutional network (ConvNet). The ConvNet outperforms the LSTM in the classification task and exhibited an excellent performance with 95% classification accuracy.}, booktitle = {ExLing 2017. Proceedings of 8 th Tutorial and Research Workshop on Experimental Linguistics, 19-22 June 2017, Heraklion, Crete, Greece}, editor = {Antonis Botinis}, author = {Bernardy, Jean-Philippe and Themistocleous, Charalambos}, year = {2017}, publisher = {University of Athens}, address = {Athens}, ISBN = {978-960-466-162-6}, } @misc{bouma-adesam-2017-proceedings-254435, title = {Proceedings of the NoDaLiDa 2017 Workshop on Processing Historical Language}, author = {Bouma, Gerlof and Adesam, Yvonne}, year = {2017}, publisher = {Linköping University Electronic Press, Linköpings universitet}, address = {Linköping}, ISBN = {978-91-7685-503-4}, } @inProceedings{fyndanis-etal-2017-time-260585, title = {Time reference and aspect in agrammatic aphasia: Evidence from Greek}, abstract = {Time reference, which has been found to be selectively impaired in agrammatic aphasia (e.g., Bastiaanse et al., 2011), is often interwoven with grammatical aspect. Dragoy and Bastiaanse (2013) investigated the relationship between time reference/tense and aspect focusing on Russian aphasia and found that the two interact: past reference was less impaired when tested within perfective aspect (compared to when tested within imperfective aspect), and reference to the nonpast was less impaired when tested within imperfective aspect (compared to when tested within perfective aspect). To account for this pattern, Dragoy and Bastiaanse (2013: 114) claimed that “perfectives primarily refer to completed, past events while imperfectives prototypically describe ongoing, non-past events”. This study explores the relationship between time reference and aspect focusing on Greek aphasia. In Greek, verb forms referring to the past and future encode the perfective-imperfective contrast. Dragoy and Bastiaanse (2013) would make predictions PR1–PR4 for Greek. (PR1) past reference within perfective aspect > past reference within imperfective aspect; (PR2) future reference within perfective aspect < future reference within imperfective aspect; (PR3) perfective aspect within past reference > imperfective aspect within past reference; (PR4) perfective aspect within future reference < imperfective aspect within future reference. Methods Eight Greek-speaking persons with agrammatic aphasia (PWA) and eight controls were administered a sentence completion task consisting of 128 experimental source sentence (SS)-target sentence (TS) pairs. There were eight subconditions, each of which consisted of 16 items: past reference within perfective aspect; past reference within imperfective aspect; future reference within perfective aspect; future reference within imperfective aspect; perfective aspect within past reference; imperfective aspect within past reference; perfective aspect within future reference; imperfective aspect within future reference. Participants were auditorily presented with a SS and the beginning of the TS, and were asked to orally complete the TS producing the missing Verb Phrase. We fitted generalized linear mixed-effect models and employed Fisher’s exact tests to make within-participant comparisons. Results Overall, the aphasic group fared significantly worse than the control group (p < 0.001). At the group level, none of the four relevant comparisons (see PR1–PR4) yielded significant differences for PWA (Table 1). Four PWA (P1, P3, P7, P8) exhibited dissociations, with three of them making up a double dissociation: P1 performed better on imperfective aspect-future reference than on perfective aspect-future reference (p < 0.001), and P7 and P8 exhibited the opposite pattern (p = 0.016 and p < 0.001 for P7 and P8, respectively). Discussion Results are not consistent with Dragoy and Bastiaanse’s (2013) findings, which challenges the idea of prototypical and non-prototypical associations between time reference and aspect. The double dissociation that emerged in the aspect condition indicates that a given time reference-aspect combination may be relatively easy to process for some PWA but demanding for some others. Thus, studies investigating tense/time reference in aphasia should ensure that this grammatical/semantic category is not confounded by aspect. }, booktitle = { Front. Hum. Neurosci. Conference Abstract: Academy of Aphasia, 55th Annual Meeting, Baltimore, United States, 5 Nov - 7 Nov, 2017. }, author = {Fyndanis, Valantis and Themistocleous, Charalambos and Christidou, Paraskevi}, year = {2017}, } @inProceedings{borin-etal-2017-clarin-261157, title = {Swe-Clarin: Language resources and technology for Digital Humanities}, abstract = {CLARIN is a European Research Infrastructure Consortium (ERIC), which aims at (a) making extensive language-based materials available as primary research data to the humanities and social sciences (HSS); and (b) offering state-of-the-art language technology (LT) as an e-research tool for this purpose, positioning CLARIN centrally in what is often referred to as the digital humanities (DH). The Swedish CLARIN node Swe-Clarin was established in 2015 with funding from the Swedish Research Council. In this paper, we describe the composition and activities of Swe-Clarin, aiming at meeting the requirements of all HSS and other researchers whose research involves using text and speech as primary research data, and spreading the awareness of what Swe-Clarin can offer these research communities. We focus on one of the central means for doing this: pilot projects conducted in collaboration between HSS researchers and Swe-Clarin, together formulating a research question, the addressing of which requires working with large language-based materials. Four such pilot projects are described in more detail, illustrating research on rhetorical history, second-language acquisition, literature, and political science. A common thread to these projects is an aspiration to meet the challenge of conducting research on the basis of very large amounts of textual data in a consistent way without losing sight of the individual cases making up the mass of data, i.e., to be able to move between Moretti’s “distant” and “close reading” modes. While the pilot projects clearly make substantial contributions to DH, they also reveal some needs for more development, and in particular a need for document-level access to the text materials. As a consequence of this, work has now been initiated in Swe-Clarin to meet this need, so that Swe-Clarin together with HSS scholars investigating intricate research questions can take on the methodological challenges of big-data language-based digital humanities.}, booktitle = {Digital Humanities 2016. Extended Papers of the International Symposium on Digital Humanities (DH 2016) Växjö, Sweden, November, 7-8, 2016. Edited by Koraljka Golub, Marcelo Milra. Vol-2021}, author = {Borin, Lars and Tahmasebi, Nina and Volodina, Elena and Ekman, Stefan and Jordan, Caspar and Viklund, Jon and Megyesi, Beáta and Näsman, Jesper and Palmér, Anne and Wirén, Mats and Björkenstam, Kristina and Grigonyte, Gintare and Gustafson Capková, Sofia and Kosiński, Tomasz}, year = {2017}, publisher = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen.}, address = {Aachen}, } @incollection{wilhelmsson-2017-forutsattningarna-249467, title = {Om förutsättningarna för språkligt datorstöd på ordnivån och uppåt}, booktitle = {Text och kontext - perspektiv på textanalys / Karin Helgesson, Hans Lundqvist, Anna Lyngfelt, Andreas Nord & Åsa Wengelin (red.)}, author = {Wilhelmsson, Kenneth}, year = {2017}, publisher = {Gleerups}, address = {Malmö}, ISBN = {978-91-40-69364-8}, pages = {207--228}, } @inProceedings{fraser-etal-2017-analysis-257840, title = {An analysis of eye-movements during reading for the detection of mild cognitive impairment}, abstract = {We present a machine learning analysis of eye-tracking data for the detection of mild cognitive impairment, a decline in cognitive abilities that is associated with an increased risk of developing dementia. We compare two experimental configurations (reading aloud versus reading silently), as well as two methods of combining information from the two trials (concatenation and merging). Additionally, we annotate the words being read with information about their frequency and syntactic category, and use these annotations to generate new features. Ultimately, we are able to distinguish between participants with and without cognitive impairment with up to 86% accuracy.}, booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing. September 9-11, 2017 Copenhagen, Denmark / Editors Martha Palmer, Rebecca Hwa, Sebastian Riedel }, author = {Fraser, Kathleen and Lundholm Fors, Kristina and Kokkinakis, Dimitrios and Nordlund, Arto}, year = {2017}, publisher = {Association for Computational Linguistics }, ISBN = {978-1-945626-83-8}, } @article{pilan-etal-2017-candidate-260382, title = {Candidate sentence selection for language learning exercises: From a comprehensive framework to an empirical evaluation}, abstract = {We present a framework and its implementation relying on Natural Language Processing methods, which aims at the identification of exercise item candidates from corpora. The hybrid system combining heuristics and machine learning methods includes a number of relevant selection criteria. We focus on two fundamental aspects: linguistic complexity and the dependence of the extracted sentences on their original context. Previous work on exercise generation addressed these two criteria only to a limited extent, and a refined overall candidate sentence selection framework appears also to be lacking. In addition to a detailed description of the system, we present the results of an empirical evaluation conducted with language teachers and learners which indicate the usefulness of the system for educational purposes. We have integrated our system into a freely available online learning platform.}, journal = {Revue Traitement Automatique des Langues. Special issue on NLP for Learning and Teaching}, author = {Pilán, Ildikó and Volodina, Elena and Borin, Lars}, year = {2017}, volume = {57}, number = {3}, pages = {67--91}, } @inProceedings{virk-etal-2017-automatic-261789, title = {Automatic extraction of typological linguistic features from descriptive grammars}, abstract = {The present paper describes experiments on automatically extracting typological linguistic features of natural languages from traditional written descriptive grammars. The feature-extraction task has high potential value in typological, genealogical, historical, and other related areas of linguistics that make use of databases of structural features of languages. Until now, extraction of such features from grammars has been done manually, which is highly time and labor consuming and becomes prohibitive when extended to the thousands of languages for which linguistic descriptions are available. The system we describe here starts from semantically parsed text over which a set of rules are applied in order to extract feature values. We evaluate the system’s performance on the manually curated Grambank database as the gold standard and report the first measures of precision and recall for this problem.}, booktitle = {Text, Speech, and Dialogue 20th International Conference, TSD 2017, Prague, Czech Republic, August 27-31, 2017, Proceedings}, editor = {Kamil Ekštein and Václav Matoušek.}, author = {Virk, Shafqat and Borin, Lars and Saxena, Anju and Hammarström, Harald}, year = {2017}, publisher = {Springer International Publishing}, address = {Cham}, ISBN = {978-3-319-64205-5}, } @article{gruzitis-dannells-2017-multilingual-225789, title = {A multilingual FrameNet-based grammar and lexicon for controlled natural language}, abstract = {Berkeley FrameNet is a lexico-semantic resource for English based on the theory of frame semantics. It has been exploited in a range of natural language processing applications and has inspired the development of framenets for many languages. We present a methodological approach to the extraction and generation of a computational multilingual FrameNet-based grammar and lexicon. The approach leverages FrameNet-annotated corpora to automatically extract a set of cross-lingual semantico-syntactic valence patterns. Based on data from Berkeley FrameNet and Swedish FrameNet, the proposed approach has been implemented in Grammatical Framework (GF), a categorial grammar formalism specialized for multilingual grammars. The implementation of the grammar and lexicon is supported by the design of FrameNet, providing a frame semantic abstraction layer, an interlingual semantic application programming interface (API), over the interlingual syntactic API already provided by GF Resource Grammar Library. The evaluation of the acquired grammar and lexicon shows the feasibility of the approach. Additionally, we illustrate how the FrameNet-based grammar and lexicon are exploited in two distinct multilingual controlled natural language applications. The produced resources are available under an open source license.}, journal = {Language resources and evaluation}, author = {Gruzitis, Normunds and Dannélls, Dana}, year = {2017}, volume = {51}, number = {1}, pages = {37–66}, } @misc{volodina-etal-2017-proceedings-262838, title = {Proceedings of the Joint 6th Workshop on NLP for Computer Assisted Language Learning and 2nd Workshop on NLP for Research on Language Acquisition at NoDaLiDa 2017, Gothenburg, 22nd May 2017}, abstract = {For the second year in a row we have brought the two related themes of NLP for Computer-Assisted Language Learning and NLP for Language Acquisition together under one umbrella. The goal of organizing these joint workshops is to provide a meeting place for researchers working on language learning issues including both empirical and experimental studies and NLP-based applications.}, author = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Grigonyte, Gintare and Nilsson Björkenstam, Kristina}, year = {2017}, publisher = {Linköping University Press}, address = {Linköping, Sweden}, ISBN = { 978-91-7685-502-7}, }